Sacado Package Browser (Single Doxygen Collection)
Version of the Day
test
performance
fenl_assembly_view
view/TestAssembly.cpp
Go to the documentation of this file.
1
// @HEADER
2
// ***********************************************************************
3
//
4
// Stokhos Package
5
// Copyright (2009) Sandia Corporation
6
//
7
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8
// license for use of this work by or on behalf of the U.S. Government.
9
//
10
// Redistribution and use in source and binary forms, with or without
11
// modification, are permitted provided that the following conditions are
12
// met:
13
//
14
// 1. Redistributions of source code must retain the above copyright
15
// notice, this list of conditions and the following disclaimer.
16
//
17
// 2. Redistributions in binary form must reproduce the above copyright
18
// notice, this list of conditions and the following disclaimer in the
19
// documentation and/or other materials provided with the distribution.
20
//
21
// 3. Neither the name of the Corporation nor the names of the
22
// contributors may be used to endorse or promote products derived from
23
// this software without specific prior written permission.
24
//
25
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
//
37
// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38
//
39
// ***********************************************************************
40
// @HEADER
41
42
#include <iostream>
43
44
// Tests
45
#include "
TestAssembly.hpp
"
46
47
// Devices
48
#include "Kokkos_Core.hpp"
49
50
// Utilities
51
#include "
Teuchos_CommandLineProcessor.hpp
"
52
#include "
Teuchos_StandardCatchMacros.hpp
"
53
#ifdef KOKKOS_ENABLE_CUDA
54
#include "cuda_runtime_api.h"
55
#endif
56
57
// For vtune
58
#include <sys/types.h>
59
#include <unistd.h>
60
61
int
main
(
int
argc,
char
*argv[])
62
{
63
bool
success =
true
;
64
bool
verbose =
false
;
65
try
{
66
67
const
size_t
num_sockets = Kokkos::hwloc::get_available_numa_count();
68
const
size_t
num_cores_per_socket =
69
Kokkos::hwloc::get_available_cores_per_numa();
70
const
size_t
num_threads_per_core =
71
Kokkos::hwloc::get_available_threads_per_core();
72
73
// Setup command line options
74
Teuchos::CommandLineProcessor
CLP;
75
CLP.
setDocString
(
76
"This test performance of MP::Vector FEM assembly.\n"
);
77
int
nGrid = 0;
78
CLP.
setOption
(
"n"
, &nGrid,
"Number of mesh points in each direction. Set to zero to use a range"
);
79
int
nGridBegin = 8;
80
CLP.
setOption
(
"n-begin"
, &nGridBegin,
"Beginning number of mesh points in each direction."
);
81
int
nGridEnd = 48;
82
CLP.
setOption
(
"n-end"
, &nGridEnd,
"Ending number of mesh points in each direction."
);
83
int
nGridStep = 8;
84
CLP.
setOption
(
"n-step"
, &nGridStep,
"Increment in number of mesh points in each direction."
);
85
int
nIter = 10;
86
CLP.
setOption
(
"ni"
, &nIter,
"Number of assembly iterations"
);
87
bool
print =
false
;
88
CLP.
setOption
(
"print"
,
"no-print"
, &print,
"Print debugging output"
);
89
bool
check
=
false
;
90
CLP.
setOption
(
"check"
,
"no-check"
, &
check
,
"Check correctness"
);
91
bool
quadratic =
false
;
92
CLP.
setOption
(
"quadratic"
,
"linear"
, &quadratic,
"Use quadratic basis functions"
);
93
int
num_cores = num_cores_per_socket * num_sockets;
94
CLP.
setOption
(
"cores"
, &num_cores,
95
"Number of CPU cores to use (defaults to all)"
);
96
int
num_hyper_threads = num_threads_per_core;
97
CLP.
setOption
(
"hyperthreads"
, &num_hyper_threads,
98
"Number of hyper threads per core to use (defaults to all)"
);
99
#ifdef KOKKOS_ENABLE_THREADS
100
bool
threads =
true
;
101
CLP.
setOption
(
"threads"
,
"no-threads"
, &threads,
"Enable Threads device"
);
102
#endif
103
#ifdef KOKKOS_ENABLE_OPENMP
104
bool
openmp =
true
;
105
CLP.
setOption
(
"openmp"
,
"no-openmp"
, &openmp,
"Enable OpenMP device"
);
106
#endif
107
#ifdef KOKKOS_ENABLE_CUDA
108
bool
cuda =
true
;
109
CLP.
setOption
(
"cuda"
,
"no-cuda"
, &cuda,
"Enable Cuda device"
);
110
int
device_id = 0;
111
CLP.
setOption
(
"device"
, &device_id,
"CUDA device ID."
);
112
#endif
113
bool
vtune =
false
;
114
CLP.
setOption
(
"vtune"
,
"no-vtune"
, &vtune,
"connect to vtune"
);
115
CLP.
parse
( argc, argv );
116
117
if
(nGrid > 0) {
118
nGridBegin = nGrid;
119
nGridEnd = nGrid;
120
}
121
122
// Connect to VTune if requested
123
if
(vtune) {
124
std::stringstream cmd;
125
pid_t my_os_pid=getpid();
126
const
std::string vtune_loc =
127
"amplxe-cl"
;
128
const
std::string output_dir =
"./vtune/vtune.0"
;
129
cmd << vtune_loc
130
<<
" -collect hotspots -result-dir "
<< output_dir
131
<<
" -target-pid "
<< my_os_pid <<
" &"
;
132
std::cout << cmd.str() << std::endl;
133
system(cmd.str().c_str());
134
system(
"sleep 10"
);
135
}
136
137
Kokkos::initialize(argc,argv);
138
#ifdef KOKKOS_ENABLE_THREADS
139
if
(threads) {
140
typedef
Kokkos::Threads Device;
141
142
std::cout << std::endl
143
<<
"Threads performance with "
<< num_cores*num_hyper_threads
144
<<
" threads:"
<< std::endl;
145
146
performance_test_driver<Device>(
147
print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check
);
148
}
149
#endif
150
151
#ifdef KOKKOS_ENABLE_OPENMP
152
if
(openmp) {
153
typedef
Kokkos::OpenMP Device;
154
155
std::cout << std::endl
156
<<
"OpenMP performance with "
<< num_cores*num_hyper_threads
157
<<
" threads:"
<< std::endl;
158
159
performance_test_driver<Device>(
160
print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check
);
161
}
162
#endif
163
164
#ifdef KOKKOS_ENABLE_CUDA
165
if
(cuda) {
166
typedef
Kokkos::Cuda Device;
167
168
cudaDeviceProp deviceProp;
169
cudaGetDeviceProperties(&deviceProp, device_id);
170
std::cout << std::endl
171
<<
"CUDA performance performance with device "
<< device_id
172
<<
" ("
173
<< deviceProp.name <<
"):"
174
<< std::endl;
175
176
performance_test_driver<Device>(
177
print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check
);
178
179
}
180
#endif
181
Kokkos::finalize();
182
}
183
TEUCHOS_STANDARD_CATCH_STATEMENTS
(verbose, std::cerr, success);
184
185
if
(success)
186
return
0;
187
return
-1;
188
}
main
int main(int argc, char *argv[])
Definition:
view/TestAssembly.cpp:61
Teuchos_StandardCatchMacros.hpp
Teuchos::CommandLineProcessor::setDocString
void setDocString(const char doc_string[])
Teuchos::CommandLineProcessor::setOption
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
Teuchos_CommandLineProcessor.hpp
TestAssembly.hpp
Teuchos::CommandLineProcessor
Teuchos::CommandLineProcessor::parse
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
check
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
TEUCHOS_STANDARD_CATCH_STATEMENTS
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
Generated by
1.8.16