Sacado Package Browser (Single Doxygen Collection)
Version of the Day
test
performance
fenl_assembly
TestAssembly.cpp
Go to the documentation of this file.
1
// @HEADER
2
// ***********************************************************************
3
//
4
// Stokhos Package
5
// Copyright (2009) Sandia Corporation
6
//
7
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8
// license for use of this work by or on behalf of the U.S. Government.
9
//
10
// Redistribution and use in source and binary forms, with or without
11
// modification, are permitted provided that the following conditions are
12
// met:
13
//
14
// 1. Redistributions of source code must retain the above copyright
15
// notice, this list of conditions and the following disclaimer.
16
//
17
// 2. Redistributions in binary form must reproduce the above copyright
18
// notice, this list of conditions and the following disclaimer in the
19
// documentation and/or other materials provided with the distribution.
20
//
21
// 3. Neither the name of the Corporation nor the names of the
22
// contributors may be used to endorse or promote products derived from
23
// this software without specific prior written permission.
24
//
25
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
//
37
// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38
//
39
// ***********************************************************************
40
// @HEADER
41
42
#include <iostream>
43
44
// Tests
45
#include "
TestAssembly.hpp
"
46
47
// Devices
48
#include "Kokkos_Core.hpp"
49
50
// Utilities
51
#include "
Teuchos_CommandLineProcessor.hpp
"
52
#include "
Teuchos_StandardCatchMacros.hpp
"
53
#ifdef KOKKOS_ENABLE_CUDA
54
#include "cuda_runtime_api.h"
55
#endif
56
57
// For vtune
58
#include <sys/types.h>
59
#include <unistd.h>
60
61
int
main
(
int
argc,
char
*argv[])
62
{
63
bool
success =
true
;
64
bool
verbose =
false
;
65
try
{
66
67
// Setup command line options
68
Teuchos::CommandLineProcessor
CLP;
69
CLP.
setDocString
(
70
"This test performance of MP::Vector FEM assembly.\n"
);
71
int
nGrid = 0;
72
CLP.
setOption
(
"n"
, &nGrid,
"Number of mesh points in each direction. Set to zero to use a range"
);
73
int
nGridBegin = 8;
74
CLP.
setOption
(
"n-begin"
, &nGridBegin,
"Beginning number of mesh points in each direction."
);
75
int
nGridEnd = 48;
76
CLP.
setOption
(
"n-end"
, &nGridEnd,
"Ending number of mesh points in each direction."
);
77
int
nGridStep = 8;
78
CLP.
setOption
(
"n-step"
, &nGridStep,
"Increment in number of mesh points in each direction."
);
79
int
nIter = 10;
80
CLP.
setOption
(
"ni"
, &nIter,
"Number of assembly iterations"
);
81
bool
print =
false
;
82
CLP.
setOption
(
"print"
,
"no-print"
, &print,
"Print debugging output"
);
83
bool
check
=
false
;
84
CLP.
setOption
(
"check"
,
"no-check"
, &
check
,
"Check correctness"
);
85
bool
quadratic =
false
;
86
CLP.
setOption
(
"quadratic"
,
"linear"
, &quadratic,
"Use quadratic basis functions"
);
87
int
num_cores = -1;
88
CLP.
setOption
(
"cores"
, &num_cores,
89
"Number of CPU cores to use (defaults to all)"
);
90
int
num_hyper_threads = -1;
91
CLP.
setOption
(
"hyperthreads"
, &num_hyper_threads,
92
"Number of hyper threads per core to use (defaults to all)"
);
93
#ifdef KOKKOS_ENABLE_THREADS
94
bool
threads =
true
;
95
CLP.
setOption
(
"threads"
,
"no-threads"
, &threads,
"Enable Threads device"
);
96
#endif
97
#ifdef KOKKOS_ENABLE_OPENMP
98
bool
openmp =
true
;
99
CLP.
setOption
(
"openmp"
,
"no-openmp"
, &openmp,
"Enable OpenMP device"
);
100
#endif
101
#ifdef KOKKOS_ENABLE_CUDA
102
bool
cuda =
true
;
103
CLP.
setOption
(
"cuda"
,
"no-cuda"
, &cuda,
"Enable Cuda device"
);
104
int
device_id = 0;
105
CLP.
setOption
(
"device"
, &device_id,
"CUDA device ID."
);
106
#endif
107
bool
vtune =
false
;
108
CLP.
setOption
(
"vtune"
,
"no-vtune"
, &vtune,
"connect to vtune"
);
109
CLP.
parse
( argc, argv );
110
111
if
(nGrid > 0) {
112
nGridBegin = nGrid;
113
nGridEnd = nGrid;
114
}
115
116
// Connect to VTune if requested
117
if
(vtune) {
118
std::stringstream cmd;
119
pid_t my_os_pid=getpid();
120
const
std::string vtune_loc =
121
"amplxe-cl"
;
122
const
std::string output_dir =
"./vtune/vtune.0"
;
123
cmd << vtune_loc
124
<<
" -collect hotspots -result-dir "
<< output_dir
125
<<
" -target-pid "
<< my_os_pid <<
" &"
;
126
std::cout << cmd.str() << std::endl;
127
system(cmd.str().c_str());
128
system(
"sleep 10"
);
129
}
130
131
Kokkos::initialize(argc,argv);
132
#ifdef KOKKOS_ENABLE_THREADS
133
if
(threads) {
134
typedef
Kokkos::Threads Device;
135
136
std::cout << std::endl
137
<<
"Threads performance with "
<< Kokkos::Threads::concurrency()
138
<<
" threads:"
<< std::endl;
139
140
performance_test_driver<Device>(
141
print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check
);
142
}
143
#endif
144
145
#ifdef KOKKOS_ENABLE_OPENMP
146
if
(openmp) {
147
typedef
Kokkos::OpenMP Device;
148
149
std::cout << std::endl
150
<<
"OpenMP performance with "
<< Kokkos::OpenMP::concurrency()
151
<<
" threads:"
<< std::endl;
152
153
performance_test_driver<Device>(
154
print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check
);
155
156
}
157
#endif
158
159
#ifdef KOKKOS_ENABLE_CUDA
160
if
(cuda) {
161
typedef
Kokkos::Cuda Device;
162
163
cudaDeviceProp deviceProp;
164
cudaGetDeviceProperties(&deviceProp, device_id);
165
std::cout << std::endl
166
<<
"CUDA performance performance with device "
<< device_id
167
<<
" ("
168
<< deviceProp.name <<
"):"
169
<< std::endl;
170
171
performance_test_driver<Device>(
172
print, nIter, nGridBegin, nGridEnd, nGridStep, quadratic,
check
);
173
174
}
175
#endif
176
Kokkos::finalize();
177
}
178
TEUCHOS_STANDARD_CATCH_STATEMENTS
(verbose, std::cerr, success);
179
180
if
(success)
181
return
0;
182
return
-1;
183
}
main
int main(int argc, char *argv[])
Definition:
TestAssembly.cpp:61
Teuchos_StandardCatchMacros.hpp
Teuchos::CommandLineProcessor::setDocString
void setDocString(const char doc_string[])
Teuchos::CommandLineProcessor::setOption
void setOption(const char option_true[], const char option_false[], bool *option_val, const char documentation[]=NULL)
Teuchos_CommandLineProcessor.hpp
TestAssembly.hpp
Teuchos::CommandLineProcessor
Teuchos::CommandLineProcessor::parse
EParseCommandLineReturn parse(int argc, char *argv[], std::ostream *errout=&std::cerr) const
check
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
TEUCHOS_STANDARD_CATCH_STATEMENTS
#define TEUCHOS_STANDARD_CATCH_STATEMENTS(VERBOSE, ERR_STREAM, SUCCESS_FLAG)
Generated by
1.8.16