46 #include <cusp/gallery/poisson.h>
47 #include <cusp/csr_matrix.h>
56 template <
typename Orientation,
57 typename IndexType,
typename ValueType,
typename MemorySpace>
59 const cusp::csr_matrix<IndexType, ValueType, MemorySpace>&
A,
60 IndexType nrhs, IndexType max_its, ValueType tol) {
63 cusp::array2d<ValueType, MemorySpace, Orientation>
x(
A.num_rows, nrhs, 0);
64 cusp::array2d<ValueType, MemorySpace, Orientation> b(
A.num_rows, nrhs, 1);
82 typedef int IndexType;
83 typedef double ValueType;
84 typedef cusp::device_memory MemorySpace;
93 CLP.setDocString(
"This test performance of block multiply routines.\n");
95 CLP.setOption(
"n", &
n,
"Number of mesh points in the each direction");
96 IndexType nrhs_begin = 32;
97 CLP.setOption(
"begin", &nrhs_begin,
98 "Staring number of right-hand-sides");
99 IndexType nrhs_end = 512;
100 CLP.setOption(
"end", &nrhs_end,
101 "Ending number of right-hand-sides");
102 IndexType nrhs_step = 32;
103 CLP.setOption(
"step", &nrhs_step,
104 "Increment in number of right-hand-sides");
105 IndexType max_its = 100;
106 CLP.setOption(
"max_iterations", &max_its,
107 "Maximum number of CG iterations");
109 CLP.setOption(
"tolerance", &tol,
"Convergence tolerance");
111 CLP.setOption(
"device", &device_id,
"CUDA device ID");
115 cudaSetDevice(device_id);
116 cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
119 cusp::csr_matrix<IndexType, ValueType, MemorySpace>
A;
120 cusp::gallery::poisson27pt(
A,
n,
n,
n);
134 std::cout <<
"nrhs , num_rows , num_entries , "
135 <<
"row_cg , row_op , row_prec , row_prec_op , row_coarse , "
136 <<
"col_cg , col_op , col_prec , col_prec_op , col_coarse"
139 for (IndexType nrhs = nrhs_begin; nrhs <= nrhs_end; nrhs += nrhs_step) {
141 std::cout << nrhs <<
" , "
142 <<
A.num_rows <<
" , " <<
A.num_entries <<
" , ";
146 cusp_sa_block_cg<cusp::row_major>(
A, nrhs, max_its, tol);
156 cusp_sa_block_cg<cusp::column_major>(
A, nrhs, max_its, tol);