42 #ifndef STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
43 #define STOKHOS_CUDA_BLOCKCRSMATRIX_HPP
49 #include "Kokkos_Core.hpp"
56 template<
class BlockSpec ,
typename MatrixValue ,
typename VectorValue >
59 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > ,
60 Kokkos::View< VectorValue** , Kokkos::LayoutLeft , Kokkos::Cuda > >
66 typedef Kokkos::View< VectorValue** ,Kokkos::LayoutLeft , Kokkos::Cuda >
block_vector_type ;
93 iBlock < blockCount ; iBlock += gridDim.x ) {
99 for ( ; iEntry < iEntryEnd ; ++iEntry ) {
100 const VectorValue *
const x = & m_x( 0 , m_A.
graph.entries(iEntry) );
101 const MatrixValue *
const a = & m_A.
values( 0 , iEntry );
106 if ( threadIdx.x + blockDim.x * threadIdx.y < m_A.
block.dimension() ) {
107 m_y(threadIdx.x,iBlock) =
y ;
117 Kokkos::Impl::cuda_internal_maximum_warp_count() * Kokkos::Impl::CudaTraits::WarpSize ;
119 const size_type row_count =
A.graph.row_map.extent(0) - 1 ;
122 std::min( row_count , Kokkos::Impl::cuda_internal_maximum_grid_count() ) , 1 , 1 );
128 if ( thread_max < block.x * block.y ) {
129 std::ostringstream msg ;
130 msg <<
"Kokkos::Impl::Multiply< BlockCrsMatrix< Block , Value , Cuda > , ... >"
131 <<
" ERROR: block dimension = " << block.x * block.y
132 <<
" > " << thread_max <<
"== maximum Cuda threads per block" ;
133 throw std::runtime_error(msg.str());
136 Kokkos::Impl::cuda_parallel_launch_local_memory<<< grid , block , shmem >>>(
Multiply(
A,
x,
y) );