44 #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
45 #define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
56 #include <Kokkos_Pair.hpp>
57 #include <Kokkos_UnorderedMap.hpp>
59 #include <impl/Kokkos_Timer.hpp>
76 Dim3(
const size_t x_,
const size_t y_ = 1,
const size_t z_ = 1) :
77 x(x_),
y(y_),
z(z_) {}
85 const size_t threads_per_block_x_ = 0,
86 const size_t threads_per_block_y_ = 0,
87 const size_t threads_per_block_z_ = 1) :
88 block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
94 template<
typename ValueType ,
class Space >
96 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
97 typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned >
StaticCrsGraphType ;
110 ,
values(
"crs_matrix_values" , arg_graph.entries.extent(0) )
116 template <
typename ViewType,
typename Enabled =
void>
117 struct LocalViewTraits {
126 KOKKOS_INLINE_FUNCTION
128 const unsigned local_rank)
132 #if defined( KOKKOS_ENABLE_CUDA )
134 template <
typename ViewType>
135 struct LocalViewTraits<
137 typename std::enable_if< std::is_same<typename ViewType::execution_space,
138 Kokkos::Cuda>::value &&
139 Kokkos::is_view_mp_vector<ViewType>::value
146 KOKKOS_INLINE_FUNCTION
148 const unsigned local_rank)
150 return Kokkos::partition<1>(v, local_rank);
157 template <
typename ScalarType>
158 struct CreateDeviceConfigs {
167 template <
typename StorageType>
168 struct CreateDeviceConfigs<
Sacado::MP::Vector<StorageType> > {
172 static const unsigned VectorSize = StorageType::static_size;
173 #if defined( KOKKOS_ENABLE_CUDA )
174 enum { is_cuda = Kokkos::Impl::is_same< execution_space, Kokkos::Cuda >::value };
176 enum { is_cuda =
false };
189 template<
class ElemNodeIdView ,
class CrsGraphType ,
unsigned ElemNode >
190 class NodeNodeGraph {
196 typedef Kokkos::UnorderedMap< key_type, void , execution_space >
SetType ;
197 typedef typename CrsGraphType::row_map_type::non_const_type
RowMapType ;
201 typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space >
236 const unsigned arg_node_count,
252 Kokkos::Impl::Timer wall_clock ;
258 size_t set_capacity = (((28ull *
node_count) / 2ull)*4ull)/3ull;
274 execution_space::fence();
276 results.fill_node_set = wall_clock.seconds();
290 unsigned graph_entry_count = 0 ;
296 graph.entries =
typename CrsGraphType::entries_type(
"graph_entries" , graph_entry_count );
301 execution_space::fence();
302 results.scan_node_count = wall_clock.seconds();
308 execution_space::fence();
309 results.fill_graph_entries = wall_clock.seconds();
325 execution_space::fence();
326 results.sort_graph_entries = wall_clock.seconds();
335 execution_space::fence();
336 results.fill_element_graph = wall_clock.seconds();
342 KOKKOS_INLINE_FUNCTION
346 for (
unsigned row_local_node = 0 ; row_local_node <
elem_node_id.extent(1) ; ++row_local_node ) {
348 const unsigned row_node =
elem_node_id( ielem , row_local_node );
350 for (
unsigned col_local_node = row_local_node ; col_local_node <
elem_node_id.extent(1) ; ++col_local_node ) {
352 const unsigned col_node =
elem_node_id( ielem , col_local_node );
358 const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ;
360 const typename SetType::insert_result result =
node_node_set.insert( key );
362 if ( result.success() ) {
371 KOKKOS_INLINE_FUNCTION
376 const unsigned row_node = key.first ;
377 const unsigned col_node = key.second ;
381 graph.entries( offset ) = col_node ;
384 if ( col_node <
row_count.extent(0) && col_node != row_node ) {
386 graph.entries( offset ) = row_node ;
391 KOKKOS_INLINE_FUNCTION
394 typedef typename CrsGraphType::size_type size_type;
395 const size_type row_beg =
graph.row_map( irow );
396 const size_type row_end =
graph.row_map( irow + 1 );
397 for ( size_type i = row_beg + 1 ; i < row_end ; ++i ) {
398 const typename CrsGraphType::data_type col =
graph.entries(i);
400 for ( ; row_beg <
j && col <
graph.entries(
j-1) ; --
j ) {
407 KOKKOS_INLINE_FUNCTION
410 typedef typename CrsGraphType::data_type entry_type;
411 for (
unsigned row_local_node = 0 ; row_local_node <
elem_node_id.extent(1) ; ++row_local_node ) {
413 const unsigned row_node =
elem_node_id( ielem , row_local_node );
415 for (
unsigned col_local_node = 0 ; col_local_node <
elem_node_id.extent(1) ; ++col_local_node ) {
417 const unsigned col_node =
elem_node_id( ielem , col_local_node );
419 entry_type entry = 0 ;
421 if ( row_node + 1 <
graph.row_map.extent(0) ) {
423 const entry_type entry_end = static_cast<entry_type> (
graph.row_map( row_node + 1 ));
425 entry =
graph.row_map( row_node );
427 for ( ; entry < entry_end &&
graph.entries(entry) != static_cast<entry_type> (col_node) ; ++entry );
429 if ( entry == entry_end ) entry = ~0u ;
432 elem_graph( ielem , row_local_node , col_local_node ) = entry ;
437 KOKKOS_INLINE_FUNCTION
459 KOKKOS_INLINE_FUNCTION
475 KOKKOS_INLINE_FUNCTION
478 KOKKOS_INLINE_FUNCTION
479 void join(
volatile unsigned &
update ,
const volatile unsigned & input )
const {
update += input ; }
495 struct ElementComputationConstantCoefficient {
500 KOKKOS_INLINE_FUNCTION
501 double operator()(
double pt[],
unsigned ensemble_rank)
const
512 template <
typename Scalar,
typename MeshScalar,
typename Device >
513 class ExponentialKLCoefficient {
517 template <
typename T1,
typename T2 = MeshScalar,
typename T3 = Device>
524 typedef typename RandomVariableView::size_type
size_type;
541 const MeshScalar mean ,
542 const MeshScalar variance ,
543 const MeshScalar correlation_length ,
552 solverParams.
set(
"Number of KL Terms",
int(num_rv));
553 solverParams.
set(
"Mean", mean);
554 solverParams.
set(
"Standard Deviation",
std::sqrt(variance));
557 correlation_lengths(ndim, correlation_length);
558 solverParams.
set(
"Domain Upper Bounds", domain_upper);
559 solverParams.
set(
"Domain Lower Bounds", domain_lower);
560 solverParams.
set(
"Correlation Lengths", correlation_lengths);
573 KOKKOS_INLINE_FUNCTION
576 KOKKOS_INLINE_FUNCTION
579 KOKKOS_INLINE_FUNCTION
592 template<
class FiniteElementMeshType ,
class SparseMatrixType
593 ,
class CoeffFunctionType = ElementComputationConstantCoefficient
595 class ElementComputation ;
599 typename ScalarType ,
class CoeffFunctionType >
603 , CoeffFunctionType >
618 typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space >
vector_type ;
627 static const bool use_team = local_vector_view_traits::use_team;
629 static const unsigned SpatialDim = element_data_type::spatial_dimension ;
630 static const unsigned TensorDim = SpatialDim * SpatialDim ;
631 static const unsigned ElemNodeCount = element_data_type::element_node_count ;
632 static const unsigned FunctionCount = element_data_type::function_count ;
633 static const unsigned IntegrationCount = element_data_type::integration_count ;
639 typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space >
elem_matrices_type ;
669 , elem_node_ids( rhs.elem_node_ids )
670 , node_coords( rhs.node_coords )
671 , elem_graph( rhs.elem_graph )
672 , elem_jacobians( rhs.elem_jacobians )
673 , elem_residuals( rhs.elem_residuals )
674 , solution( rhs.solution )
675 , residual( rhs.residual )
676 , jacobian( rhs.jacobian )
677 , coeff_function( rhs.coeff_function )
678 , dev_config( rhs.dev_config )
684 const CoeffFunctionType & arg_coeff_function ,
691 , elem_node_ids( arg_mesh.elem_node() )
692 , node_coords( arg_mesh.node_coord() )
693 , elem_graph( arg_elem_graph )
696 , solution( arg_solution )
697 , residual( arg_residual )
698 , jacobian( arg_jacobian )
699 , coeff_function( arg_coeff_function )
700 , dev_config( arg_dev_config )
707 const size_t nelem = elem_node_ids.extent(0);
710 const size_t league_size =
712 Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
713 parallel_for( config , *
this );
716 parallel_for( nelem , *
this );
722 static const unsigned FLOPS_transform_gradients =
723 FunctionCount * TensorDim * 2 +
727 KOKKOS_INLINE_FUNCTION
729 const double grad[][ FunctionCount ] ,
735 double dpsidz[] )
const
737 enum { j11 = 0 , j12 = 1 , j13 = 2 ,
738 j21 = 3 , j22 = 4 , j23 = 5 ,
739 j31 = 6 , j32 = 7 , j33 = 8 };
743 double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
745 for(
unsigned i = 0; i < FunctionCount ; ++i ) {
746 const double x1 =
x[i] ;
747 const double x2 =
y[i] ;
748 const double x3 = z[i] ;
750 const double g1 = grad[0][i] ;
751 const double g2 = grad[1][i] ;
752 const double g3 = grad[2][i] ;
769 double invJ[ TensorDim ] = {
770 static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
771 static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
772 static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
774 static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
775 static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
776 static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
778 static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
779 static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
780 static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
782 const double detJ = J[j11] * invJ[j11] +
786 const double detJinv = 1.0 / detJ ;
788 for (
unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
792 for(
unsigned i = 0; i < FunctionCount ; ++i ) {
793 const double g0 = grad[0][i];
794 const double g1 = grad[1][i];
795 const double g2 = grad[2][i];
797 dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
798 dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
799 dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
805 KOKKOS_INLINE_FUNCTION
808 const double dpsidx[] ,
809 const double dpsidy[] ,
810 const double dpsidz[] ,
813 const double integ_weight ,
814 const double bases_vals[] ,
823 for (
unsigned m = 0 ; m < FunctionCount ; m++ ) {
824 value_at_pt += dof_values[m] * bases_vals[m] ;
825 gradx_at_pt += dof_values[m] * dpsidx[m] ;
826 grady_at_pt += dof_values[m] * dpsidy[m] ;
827 gradz_at_pt += dof_values[m] * dpsidz[m] ;
831 const local_scalar_type res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
837 for (
unsigned m = 0; m < FunctionCount; ++m) {
839 const double bases_val_m = bases_vals[m];
840 const double dpsidx_m = dpsidx[m] ;
841 const double dpsidy_m = dpsidy[m] ;
842 const double dpsidz_m = dpsidz[m] ;
844 elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
845 dpsidy_m * grady_at_pt +
846 dpsidz_m * gradz_at_pt ) +
847 res_val * bases_val_m ;
849 for(
unsigned n = 0;
n < FunctionCount;
n++) {
851 mat[
n] += k_detJ_weight * ( dpsidx_m * dpsidx[
n] +
852 dpsidy_m * dpsidy[
n] +
853 dpsidz_m * dpsidz[
n] ) +
854 mat_val * bases_val_m * bases_vals[
n];
859 KOKKOS_INLINE_FUNCTION
860 void operator()(
const typename TeamPolicy< execution_space >::member_type & dev )
const
863 const unsigned num_ensemble_threads = dev_config.
block_dim.
x ;
864 const unsigned num_element_threads = dev_config.
block_dim.
y ;
865 const unsigned element_rank = dev.team_rank() / num_ensemble_threads ;
866 const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
868 const unsigned ielem =
869 dev.league_rank() * num_element_threads + element_rank;
871 if (ielem >= elem_node_ids.extent(0))
874 (*this)( ielem, ensemble_rank );
878 KOKKOS_INLINE_FUNCTION
880 const unsigned ensemble_rank = 0 )
const
883 local_vector_view_traits::create_local_view(solution,
886 local_vector_view_traits::create_local_view(residual,
889 local_matrix_view_traits::create_local_view(jacobian.
values,
894 double x[ FunctionCount ] ;
895 double y[ FunctionCount ] ;
896 double z[ FunctionCount ] ;
898 unsigned node_index[ ElemNodeCount ];
900 for (
unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
901 const unsigned ni = elem_node_ids( ielem , i );
905 x[i] = node_coords( ni , 0 );
906 y[i] = node_coords( ni , 1 );
907 z[i] = node_coords( ni , 2 );
909 val[i] = local_solution( ni );
916 for(
unsigned i = 0; i < FunctionCount ; i++ ) {
918 for(
unsigned j = 0;
j < FunctionCount ;
j++){
924 for (
unsigned i = 0 ; i < IntegrationCount ; ++i ) {
925 double dpsidx[ FunctionCount ] ;
926 double dpsidy[ FunctionCount ] ;
927 double dpsidz[ FunctionCount ] ;
932 double pt[] = {0.0, 0.0, 0.0};
936 if ( ! coeff_function.is_constant ) {
937 for (
unsigned j = 0 ;
j < FunctionCount ; ++
j ) {
940 pt[2] += z[
j] * elem_data.
values[i][
j] ;
945 coeff_k = coeff_function(pt, ensemble_rank);
949 transform_gradients( elem_data.
gradients[i] ,
x ,
y , z ,
950 dpsidx , dpsidy , dpsidz );
952 contributeResidualJacobian(
val , dpsidx , dpsidy , dpsidz ,
956 elem_vec , elem_mat );
959 for(
unsigned i = 0 ; i < FunctionCount ; i++ ) {
960 const unsigned row = node_index[i] ;
961 if ( row < residual.extent(0) ) {
962 atomic_add( & local_residual( row ) , elem_vec[i] );
964 for(
unsigned j = 0 ;
j < FunctionCount ;
j++ ) {
965 const unsigned entry = elem_graph( ielem , i ,
j );
966 if ( entry != ~0u ) {
967 atomic_add( & local_jacobian_values( entry ) , elem_mat[i][
j] );
977 template<
class FixtureType ,
class SparseMatrixType >
978 class DirichletComputation ;
981 typename ScalarType >
982 class DirichletComputation<
998 typedef Kokkos::View< scalar_type* , execution_space >
vector_type ;
1006 static const bool use_team = local_vector_view_traits::use_team;
1021 const unsigned bc_plane ;
1022 const unsigned node_count ;
1031 const unsigned arg_bc_plane ,
1035 : node_coords( arg_mesh.node_coord() )
1036 , solution( arg_solution )
1037 , jacobian( arg_jacobian )
1038 , residual( arg_residual )
1039 , bc_lower_value( arg_bc_lower_value )
1040 , bc_upper_value( arg_bc_upper_value )
1043 , bc_plane( arg_bc_plane )
1044 , node_count( arg_mesh.node_count_owned() )
1046 , dev_config( arg_dev_config )
1048 parallel_for( node_count , *
this );
1056 const size_t league_size =
1058 Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1059 parallel_for( config , *
this );
1062 parallel_for( node_count , *
this );
1067 KOKKOS_INLINE_FUNCTION
1068 void operator()(
const typename TeamPolicy< execution_space >::member_type & dev )
const
1071 const unsigned num_ensemble_threads = dev_config.
block_dim.
x ;
1072 const unsigned num_node_threads = dev_config.
block_dim.
y ;
1073 const unsigned node_rank = dev.team_rank() / num_ensemble_threads ;
1074 const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1076 const unsigned inode = dev.league_rank() * num_node_threads + node_rank;
1078 if (inode >= node_count)
1081 (*this)( inode, ensemble_rank );
1085 KOKKOS_INLINE_FUNCTION
1087 const unsigned ensemble_rank = 0)
const
1090 local_vector_view_traits::create_local_view(residual,
1093 local_matrix_view_traits::create_local_view(jacobian.
values,
1101 const unsigned iBeg = jacobian.
graph.row_map[inode];
1102 const unsigned iEnd = jacobian.
graph.row_map[inode+1];
1105 const bool bc_lower = c <= bc_lower_limit ;
1106 const bool bc_upper = bc_upper_limit <= c ;
1109 solution(inode) = bc_lower ? bc_lower_value : (
1110 bc_upper ? bc_upper_value : 0 );
1113 if ( bc_lower || bc_upper ) {
1115 local_residual(inode) = 0 ;
1120 for(
unsigned i = iBeg ; i < iEnd ; ++i ) {
1121 local_jacobian_values(i) =
int(inode) ==
int(jacobian.
graph.entries(i)) ? 1 : 0 ;
1129 for(
unsigned i = iBeg ; i < iEnd ; ++i ) {
1130 const unsigned cnode = jacobian.
graph.entries(i) ;
1133 if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) {
1134 local_jacobian_values(i) = 0 ;
1142 template<
typename FixtureType ,
typename VectorType >
1184 Kokkos::parallel_reduce(
solution.extent(0) , *this , response );
1190 KOKKOS_INLINE_FUNCTION
1195 const double z[] )
const
1197 enum { j11 = 0 , j12 = 1 , j13 = 2 ,
1198 j21 = 3 , j22 = 4 , j23 = 5 ,
1199 j31 = 6 , j32 = 7 , j33 = 8 };
1203 double J[
TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1206 const double x1 =
x[i] ;
1207 const double x2 =
y[i] ;
1208 const double x3 = z[i] ;
1210 const double g1 = grad[0][i] ;
1211 const double g2 = grad[1][i] ;
1212 const double g3 = grad[2][i] ;
1230 static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
1231 static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
1232 static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
1234 static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
1235 static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
1236 static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
1238 static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
1239 static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
1240 static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
1242 const double detJ = J[j11] * invJ[j11] +
1243 J[j21] * invJ[j12] +
1244 J[j31] * invJ[j13] ;
1249 KOKKOS_INLINE_FUNCTION
1253 const double integ_weight ,
1254 const double bases_vals[] )
const
1260 value_at_pt += dof_values[m] * bases_vals[m] ;
1264 value_at_pt * value_at_pt * detJ * integ_weight ;
1266 return elem_response;
1300 KOKKOS_INLINE_FUNCTION
1304 response += (u * u) /
fixture.node_count_global();
1307 KOKKOS_INLINE_FUNCTION
1311 KOKKOS_INLINE_FUNCTION
1314 { response += input ; }
1325 #if defined( __CUDACC__ )