Epetra Package Browser (Single Doxygen Collection)  Development
test/BasicPerfTest_LL/cxx_main.cpp
Go to the documentation of this file.
1 //@HEADER
2 // ************************************************************************
3 //
4 // Epetra: Linear Algebra Services Package
5 // Copyright 2011 Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 //@HEADER
41 
42 
43 #define EPETRA_HAVE_JADMATRIX
44 #define EPETRA_VERY_SHORT_PERFTEST
45 #define EPETRA_HAVE_STATICPROFILE
46 #include "Epetra_Map.h"
47 #include "Epetra_LocalMap.h"
48 #include "Epetra_BlockMap.h"
49 #include "Epetra_Time.h"
50 #include "Epetra_CrsMatrix.h"
51 #include "Epetra_VbrMatrix.h"
52 #include "Epetra_Vector.h"
53 #include "Epetra_IntVector.h"
54 #include "Epetra_MultiVector.h"
57 #include "Epetra_Flops.h"
58 #ifdef EPETRA_MPI
59 #include "Epetra_MpiComm.h"
60 #include "mpi.h"
61 #else
62 #include "Epetra_SerialComm.h"
63 #endif
64 #include "../epetra_test_err.h"
65 #include "Epetra_Version.h"
66 #ifdef EPETRA_HAVE_JADMATRIX
67 #include "Epetra_JadMatrix.h"
68 #endif
69 
70 // prototypes
71 
72 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
73  int * xoff, int * yoff,
74  const Epetra_Comm &comm, bool verbose, bool summary,
75  Epetra_Map *& map,
77  Epetra_Vector *& b,
78  Epetra_Vector *& bt,
79  Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
80 
81 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
82  int * xoff, int * yoff, int nrhs,
83  const Epetra_Comm &comm, bool verbose, bool summary,
84  Epetra_Map *& map,
86  Epetra_MultiVector *& b,
87  Epetra_MultiVector *& bt,
88  Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
89 
90 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
91  int * xoff, int * yoff,
92  int nsizes, int * sizes,
93  const Epetra_Comm &comm, bool verbose, bool summary,
94  Epetra_BlockMap *& map,
96  Epetra_Vector *& b,
97  Epetra_Vector *& bt,
98  Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
99 
100 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
101  int * xoff, int * yoff,
102  int nsizes, int * sizes, int nrhs,
103  const Epetra_Comm &comm, bool verbose, bool summary,
104  Epetra_BlockMap *& map,
105  Epetra_VbrMatrix *& A,
106  Epetra_MultiVector *& b,
107  Epetra_MultiVector *& bt,
108  Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
109 
110 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
111  int myPID, long long * & myGlobalElements);
112 
114  Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
115 #ifdef EPETRA_HAVE_JADMATRIX
117  Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
118 #endif
121  bool StaticProfile, bool verbose, bool summary);
122 int main(int argc, char *argv[])
123 {
124  int ierr = 0;
125  double elapsed_time;
126  double total_flops;
127  double MFLOPs;
128 
129 
130 #ifdef EPETRA_MPI
131 
132  // Initialize MPI
133  MPI_Init(&argc,&argv);
134  Epetra_MpiComm comm( MPI_COMM_WORLD );
135 #else
136  Epetra_SerialComm comm;
137 #endif
138 
139  bool verbose = false;
140  bool summary = false;
141 
142  // Check if we should print verbose results to standard out
143  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;
144 
145  // Check if we should print verbose results to standard out
146  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;
147 
148  if(argc < 6) {
149  cerr << "Usage: " << argv[0]
150  << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
151  << "where:" << endl
152  << "NumNodesX - Number of mesh nodes in X direction per processor" << endl
153  << "NumNodesY - Number of mesh nodes in Y direction per processor" << endl
154  << "NumProcX - Number of processors to use in X direction" << endl
155  << "NumProcY - Number of processors to use in Y direction" << endl
156  << "NumPoints - Number of points to use in stencil (5, 9 or 25 only)" << endl
157  << "-v|-s - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
158  << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
159  << " Serial example:" << endl
160  << argv[0] << " 16 12 1 1 25 -v" << endl
161  << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
162  << " MPI example:" << endl
163  << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
164  << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
165  << " in the X direction and 8 in the Y direction. Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
166  << endl;
167  return(1);
168 
169  }
170  //char tmp;
171  //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
172  //if (comm.MyPID()==0) cin >> tmp;
173  //comm.Barrier();
174 
175  comm.SetTracebackMode(0); // This should shut down any error traceback reporting
176  if (verbose && comm.MyPID()==0)
177  cout << Epetra_Version() << endl << endl;
178  if (summary && comm.MyPID()==0) {
179  if (comm.NumProc()==1)
180  cout << Epetra_Version() << endl << endl;
181  else
182  cout << endl << endl; // Print two blank line to keep output columns lined up
183  }
184 
185  if (verbose) cout << comm <<endl;
186 
187 
188  // Redefine verbose to only print on PE 0
189 
190  if (verbose && comm.MyPID()!=0) verbose = false;
191  if (summary && comm.MyPID()!=0) summary = false;
192 
193  int numNodesX = atoi(argv[1]);
194  int numNodesY = atoi(argv[2]);
195  int numProcsX = atoi(argv[3]);
196  int numProcsY = atoi(argv[4]);
197  int numPoints = atoi(argv[5]);
198 
199  if (verbose || (summary && comm.NumProc()==1)) {
200  cout << " Number of local nodes in X direction = " << numNodesX << endl
201  << " Number of local nodes in Y direction = " << numNodesY << endl
202  << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
203  << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
204  << " Number of local nonzero entries = " << numNodesX*numNodesY*numPoints << endl
205  << " Number of global nonzero entries = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
206  << " Number of Processors in X direction = " << numProcsX << endl
207  << " Number of Processors in Y direction = " << numProcsY << endl
208  << " Number of Points in stencil = " << numPoints << endl << endl;
209  }
210  // Print blank line to keep output columns lined up
211  if (summary && comm.NumProc()>1)
212  cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;
213 
214  if (numProcsX*numProcsY!=comm.NumProc()) {
215  cerr << "Number of processors = " << comm.NumProc() << endl
216  << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
217  return(1);
218  }
219 
220  if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
221  cerr << "Number of points specified = " << numPoints << endl
222  << " is not 5, 9, 25" << endl << endl;
223  return(1);
224  }
225 
226  if (numNodesX*numNodesY<=0) {
227  cerr << "Product of number of nodes is <= zero" << endl << endl;
228  return(1);
229  }
230 
231  Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
232  Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
233  if (numPoints==5) {
234 
235  // Generate a 5-point 2D Finite Difference matrix
236  Xoff.Size(5);
237  Yoff.Size(5);
238  Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0; Xoff[4] = 0;
239  Yoff[0] = 0; Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1;
240 
241  // Generate a 2-point 2D Lower triangular Finite Difference matrix
242  XLoff.Size(2);
243  YLoff.Size(2);
244  XLoff[0] = -1; XLoff[1] = 0;
245  YLoff[0] = 0; YLoff[1] = -1;
246 
247  // Generate a 3-point 2D upper triangular Finite Difference matrix
248  XUoff.Size(3);
249  YUoff.Size(3);
250  XUoff[0] = 0; XUoff[1] = 1; XUoff[2] = 0;
251  YUoff[0] = 0; YUoff[1] = 0; YUoff[2] = 1;
252  }
253  else if (numPoints==9) {
254  // Generate a 9-point 2D Finite Difference matrix
255  Xoff.Size(9);
256  Yoff.Size(9);
257  Xoff[0] = -1; Xoff[1] = 0; Xoff[2] = 1;
258  Yoff[0] = -1; Yoff[1] = -1; Yoff[2] = -1;
259  Xoff[3] = -1; Xoff[4] = 0; Xoff[5] = 1;
260  Yoff[3] = 0; Yoff[4] = 0; Yoff[5] = 0;
261  Xoff[6] = -1; Xoff[7] = 0; Xoff[8] = 1;
262  Yoff[6] = 1; Yoff[7] = 1; Yoff[8] = 1;
263 
264  // Generate a 5-point lower triangular 2D Finite Difference matrix
265  XLoff.Size(5);
266  YLoff.Size(5);
267  XLoff[0] = -1; XLoff[1] = 0; Xoff[2] = 1;
268  YLoff[0] = -1; YLoff[1] = -1; Yoff[2] = -1;
269  XLoff[3] = -1; XLoff[4] = 0;
270  YLoff[3] = 0; YLoff[4] = 0;
271 
272  // Generate a 4-point upper triangular 2D Finite Difference matrix
273  XUoff.Size(4);
274  YUoff.Size(4);
275  XUoff[0] = 1;
276  YUoff[0] = 0;
277  XUoff[1] = -1; XUoff[2] = 0; XUoff[3] = 1;
278  YUoff[1] = 1; YUoff[2] = 1; YUoff[3] = 1;
279 
280  }
281  else {
282  // Generate a 25-point 2D Finite Difference matrix
283  Xoff.Size(25);
284  Yoff.Size(25);
285  int xi = 0, yi = 0;
286  int xo = -2, yo = -2;
287  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
288  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
289  xo = -2, yo++;
290  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
291  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
292  xo = -2, yo++;
293  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
294  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
295  xo = -2, yo++;
296  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
297  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
298  xo = -2, yo++;
299  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
300  Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ;
301 
302  // Generate a 13-point lower triangular 2D Finite Difference matrix
303  XLoff.Size(13);
304  YLoff.Size(13);
305  xi = 0, yi = 0;
306  xo = -2, yo = -2;
307  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
308  YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ;
309  xo = -2, yo++;
310  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
311  YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ;
312  xo = -2, yo++;
313  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
314  YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ;
315 
316  // Generate a 13-point upper triangular 2D Finite Difference matrix
317  XUoff.Size(13);
318  YUoff.Size(13);
319  xi = 0, yi = 0;
320  xo = 0, yo = 0;
321  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
322  YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ;
323  xo = -2, yo++;
324  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
325  YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ;
326  xo = -2, yo++;
327  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
328  YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ;
329 
330  }
331 
332  Epetra_Map * map;
333  Epetra_Map * mapL;
334  Epetra_Map * mapU;
336  Epetra_CrsMatrix * L;
337  Epetra_CrsMatrix * U;
338  Epetra_MultiVector * b;
339  Epetra_MultiVector * bt;
340  Epetra_MultiVector * xexact;
341  Epetra_MultiVector * bL;
342  Epetra_MultiVector * btL;
343  Epetra_MultiVector * xexactL;
344  Epetra_MultiVector * bU;
345  Epetra_MultiVector * btU;
346  Epetra_MultiVector * xexactU;
347  Epetra_SerialDenseVector resvec(0);
348 
349  //Timings
350  Epetra_Flops flopcounter;
351  Epetra_Time timer(comm);
352 
353 #ifdef EPETRA_VERY_SHORT_PERFTEST
354  int jstop = 1;
355 #elif EPETRA_SHORT_PERFTEST
356  int jstop = 1;
357 #else
358  int jstop = 2;
359 #endif
360  for (int j=0; j<jstop; j++) {
361  for (int k=1; k<17; k++) {
362 #ifdef EPETRA_VERY_SHORT_PERFTEST
363  if (k<3 || (k%4==0 && k<9)) {
364 #elif EPETRA_SHORT_PERFTEST
365  if (k<6 || k%4==0) {
366 #else
367  if (k<7 || k%2==0) {
368 #endif
369  int nrhs=k;
370  if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";
371 
372  bool StaticProfile = (j!=0);
373  if (verbose) {
374  if (StaticProfile) cout << " static profile\n";
375  else cout << " dynamic profile\n";
376  }
377  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
378  Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
379  map, A, b, bt, xexact, StaticProfile, false);
380 
381 
382 #ifdef EPETRA_HAVE_JADMATRIX
383 
384  timer.ResetStartTime();
385  Epetra_JadMatrix JA(*A);
386  elapsed_time = timer.ElapsedTime();
387  if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;
388 
389  //cout << "A = " << *A << endl;
390  //cout << "JA = " << JA << endl;
391 
392  runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);
393 
394 #endif
395  runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);
396 
397  delete A;
398  delete b;
399  delete bt;
400  delete xexact;
401 
402  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
403  XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
404  mapL, L, bL, btL, xexactL, StaticProfile, true);
405 
406 
407  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
408  XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
409  mapU, U, bU, btU, xexactU, StaticProfile, true);
410 
411 
412  runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);
413 
414  delete L;
415  delete bL;
416  delete btL;
417  delete xexactL;
418  delete mapL;
419 
420  delete U;
421  delete bU;
422  delete btU;
423  delete xexactU;
424  delete mapU;
425 
426  Epetra_MultiVector q(*map, nrhs);
427  Epetra_MultiVector z(q);
428  Epetra_MultiVector r(q);
429 
430  delete map;
431  q.SetFlopCounter(flopcounter);
432  z.SetFlopCounter(q);
433  r.SetFlopCounter(q);
434 
435  resvec.Resize(nrhs);
436 
437 
438  flopcounter.ResetFlops();
439  timer.ResetStartTime();
440 
441  //10 norms
442  for( int i = 0; i < 10; ++i )
443  q.Norm2( resvec.Values() );
444 
445  elapsed_time = timer.ElapsedTime();
446  total_flops = q.Flops();
447  MFLOPs = total_flops/elapsed_time/1000000.0;
448  if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
449 
450  if (summary) {
451  if (comm.NumProc()==1) cout << "Norm2" << '\t';
452  cout << MFLOPs << endl;
453  }
454 
455  flopcounter.ResetFlops();
456  timer.ResetStartTime();
457 
458  //10 dot's
459  for( int i = 0; i < 10; ++i )
460  q.Dot(z, resvec.Values());
461 
462  elapsed_time = timer.ElapsedTime();
463  total_flops = q.Flops();
464  MFLOPs = total_flops/elapsed_time/1000000.0;
465  if (verbose) cout << "Total MFLOPs for 10 Dot's = " << MFLOPs << endl;
466 
467  if (summary) {
468  if (comm.NumProc()==1) cout << "DotProd" << '\t';
469  cout << MFLOPs << endl;
470  }
471 
472  flopcounter.ResetFlops();
473  timer.ResetStartTime();
474 
475  //10 dot's
476  for( int i = 0; i < 10; ++i )
477  q.Update(1.0, z, 1.0, r, 0.0);
478 
479  elapsed_time = timer.ElapsedTime();
480  total_flops = q.Flops();
481  MFLOPs = total_flops/elapsed_time/1000000.0;
482  if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;
483 
484  if (summary) {
485  if (comm.NumProc()==1) cout << "Update" << '\t';
486  cout << MFLOPs << endl;
487  }
488  }
489  }
490  }
491 #ifdef EPETRA_MPI
492  MPI_Finalize() ;
493 #endif
494 
495 return ierr ;
496 }
497 
498 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
499 //
500 // nx (In) - number of grid points in x direction
501 // ny (In) - number of grid points in y direction
502 // The total number of equations will be nx*ny ordered such that the x direction changes
503 // most rapidly:
504 // First equation is at point (0,0)
505 // Second at (1,0)
506 // ...
507 // nx equation at (nx-1,0)
508 // nx+1st equation at (0,1)
509 
510 // numPoints (In) - number of points in finite difference stencil
511 // xoff (In) - stencil offsets in x direction (of length numPoints)
512 // yoff (In) - stencil offsets in y direction (of length numPoints)
513 // A standard 5-point finite difference stencil would be described as:
514 // numPoints = 5
515 // xoff = [-1, 1, 0, 0, 0]
516 // yoff = [ 0, 0, 0, -1, 1]
517 
518 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
519 
520 // comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
521 // map (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
522 // A (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
523 // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil,
524 // diagonal will be slightly diag dominant.
525 // b (Out) - Generated RHS. Values satisfy b = A*xexact
526 // bt (Out) - Generated RHS. Values satisfy b = A'*xexact
527 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
528 
529 // Note: Caller of this function is responsible for deleting all output objects.
530 
531 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
532  int * xoff, int * yoff,
533  const Epetra_Comm &comm, bool verbose, bool summary,
534  Epetra_Map *& map,
535  Epetra_CrsMatrix *& A,
536  Epetra_Vector *& b,
537  Epetra_Vector *& bt,
538  Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
539 
540  Epetra_MultiVector * b1, * bt1, * xexact1;
541 
542  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
543  xoff, yoff, 1, comm, verbose, summary,
544  map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
545 
546  b = dynamic_cast<Epetra_Vector *>(b1);
547  bt = dynamic_cast<Epetra_Vector *>(bt1);
548  xexact = dynamic_cast<Epetra_Vector *>(xexact1);
549 
550  return;
551 }
552 
553 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
554  int * xoff, int * yoff, int nrhs,
555  const Epetra_Comm &comm, bool verbose, bool summary,
556  Epetra_Map *& map,
557  Epetra_CrsMatrix *& A,
558  Epetra_MultiVector *& b,
559  Epetra_MultiVector *& bt,
560  Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
561 
562  Epetra_Time timer(comm);
563  // Determine my global IDs
564  long long * myGlobalElements;
565  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
566 
567  int numMyEquations = numNodesX*numNodesY;
568 
569  map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
570  delete [] myGlobalElements;
571 
572  long long numGlobalEquations = map->NumGlobalElements64();
573 
574  int profile = 0; if (StaticProfile) profile = numPoints;
575 
576 #ifdef EPETRA_HAVE_STATICPROFILE
577 
578  if (MakeLocalOnly)
579  A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
580  else
581  A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix
582 
583 #else
584 
585  if (MakeLocalOnly)
586  A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
587  else
588  A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix
589 
590 #endif
591 
592  long long * indices = new long long[numPoints];
593  double * values = new double[numPoints];
594 
595  double dnumPoints = (double) numPoints;
596  int nx = numNodesX*numProcsX;
597 
598  for (int i=0; i<numMyEquations; i++) {
599 
600  long long rowID = map->GID64(i);
601  int numIndices = 0;
602 
603  for (int j=0; j<numPoints; j++) {
604  long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
605  if (colID>-1 && colID<numGlobalEquations) {
606  indices[numIndices] = colID;
607  double value = - ((double) rand())/ ((double) RAND_MAX);
608  if (colID==rowID)
609  values[numIndices++] = dnumPoints - value; // Make diagonal dominant
610  else
611  values[numIndices++] = value;
612  }
613  }
614  //cout << "Building row " << rowID << endl;
615  A->InsertGlobalValues(rowID, numIndices, values, indices);
616  }
617 
618  delete [] indices;
619  delete [] values;
620  double insertTime = timer.ElapsedTime();
621  timer.ResetStartTime();
622  A->FillComplete(false);
623  double fillCompleteTime = timer.ElapsedTime();
624 
625  if (verbose)
626  cout << "Time to insert matrix values = " << insertTime << endl
627  << "Time to complete fill = " << fillCompleteTime << endl;
628  if (summary) {
629  if (comm.NumProc()==1) cout << "InsertTime" << '\t';
630  cout << insertTime << endl;
631  if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
632  cout << fillCompleteTime << endl;
633  }
634 
635  if (nrhs<=1) {
636  b = new Epetra_Vector(*map);
637  bt = new Epetra_Vector(*map);
638  xexact = new Epetra_Vector(*map);
639  }
640  else {
641  b = new Epetra_MultiVector(*map, nrhs);
642  bt = new Epetra_MultiVector(*map, nrhs);
643  xexact = new Epetra_MultiVector(*map, nrhs);
644  }
645 
646  xexact->Random(); // Fill xexact with random values
647 
648  A->Multiply(false, *xexact, *b);
649  A->Multiply(true, *xexact, *bt);
650 
651  return;
652 }
653 
654 
655 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
656 //
657 // nx (In) - number of grid points in x direction
658 // ny (In) - number of grid points in y direction
659 // The total number of equations will be nx*ny ordered such that the x direction changes
660 // most rapidly:
661 // First equation is at point (0,0)
662 // Second at (1,0)
663 // ...
664 // nx equation at (nx-1,0)
665 // nx+1st equation at (0,1)
666 
667 // numPoints (In) - number of points in finite difference stencil
668 // xoff (In) - stencil offsets in x direction (of length numPoints)
669 // yoff (In) - stencil offsets in y direction (of length numPoints)
670 // A standard 5-point finite difference stencil would be described as:
671 // numPoints = 5
672 // xoff = [-1, 1, 0, 0, 0]
673 // yoff = [ 0, 0, 0, -1, 1]
674 
675 // nsizes (In) - Length of element size list used to create variable block map and matrix
676 // sizes (In) - integer list of element sizes of length nsizes
677 // The map associated with this VbrMatrix will be created by cycling through the sizes list.
678 // For example, if nsize = 3 and sizes = [ 2, 4, 3], the block map will have elementsizes
679 // of 2, 4, 3, 2, 4, 3, ...
680 
681 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
682 
683 // comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
684 // map (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
685 // A (Out) - Epetra_VbrMatrix constructed for nx by ny grid using prescribed stencil
686 // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil,
687 // diagonal will be slightly diag dominant.
688 // b (Out) - Generated RHS. Values satisfy b = A*xexact
689 // bt (Out) - Generated RHS. Values satisfy b = A'*xexact
690 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
691 
692 // Note: Caller of this function is responsible for deleting all output objects.
693 
694 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
695  int * xoff, int * yoff,
696  int nsizes, int * sizes,
697  const Epetra_Comm &comm, bool verbose, bool summary,
698  Epetra_BlockMap *& map,
699  Epetra_VbrMatrix *& A,
700  Epetra_Vector *& b,
701  Epetra_Vector *& bt,
702  Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
703 
704  Epetra_MultiVector * b1, * bt1, * xexact1;
705 
706  GenerateVbrProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
707  xoff, yoff, nsizes, sizes,
708  1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
709 
710  b = dynamic_cast<Epetra_Vector *>(b1);
711  bt = dynamic_cast<Epetra_Vector *>(bt1);
712  xexact = dynamic_cast<Epetra_Vector *>(xexact1);
713 
714  return;
715 }
716 
717 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
718  int * xoff, int * yoff,
719  int nsizes, int * sizes, int nrhs,
720  const Epetra_Comm &comm, bool verbose, bool summary,
721  Epetra_BlockMap *& map,
722  Epetra_VbrMatrix *& A,
723  Epetra_MultiVector *& b,
724  Epetra_MultiVector *& bt,
725  Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
726 
727  int i;
728 
729  // Determine my global IDs
730  long long * myGlobalElements;
731  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
732 
733  int numMyElements = numNodesX*numNodesY;
734 
735  Epetra_Map ptMap((long long)-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
736  delete [] myGlobalElements;
737 
738  Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
739  for (i=0; i<numMyElements; i++)
740  elementSizes[i] = sizes[ptMap.GID64(i)%nsizes]; // cycle through sizes array
741 
742  map = new Epetra_BlockMap((long long)-1, numMyElements, ptMap.MyGlobalElements64(), elementSizes.Values(),
743  ptMap.IndexBase64(), ptMap.Comm());
744 
745 
746 // FIXME: Won't compile until Epetra_VbrMatrix is modified.
747 #if 0
748  int profile = 0; if (StaticProfile) profile = numPoints;
749  int j;
750  long long numGlobalEquations = ptMap.NumGlobalElements64();
751 
752  if (MakeLocalOnly)
753  A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap
754  else
755  A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix
756 
757  long long * indices = new long long[numPoints];
758 
759  // This section of code creates a vector of random values that will be used to create
760  // light-weight dense matrices to pass into the VbrMatrix construction process.
761 
762  int maxElementSize = 0;
763  for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);
764 
765  Epetra_LocalMap lmap((long long)maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
766  Epetra_Vector randvec(lmap);
767  randvec.Random();
768  randvec.Scale(-1.0); // Make value negative
769  int nx = numNodesX*numProcsX;
770 
771 
772  for (i=0; i<numMyElements; i++) {
773  long long rowID = map->GID64(i);
774  int numIndices = 0;
775  int rowDim = sizes[rowID%nsizes];
776  for (j=0; j<numPoints; j++) {
777  long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
778  if (colID>-1 && colID<numGlobalEquations)
779  indices[numIndices++] = colID;
780  }
781 
782  A->BeginInsertGlobalValues(rowID, numIndices, indices);
783 
784  for (j=0; j < numIndices; j++) {
785  int colDim = sizes[indices[j]%nsizes];
786  A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
787  }
788  A->EndSubmitEntries();
789  }
790 
791  delete [] indices;
792 
793  A->FillComplete();
794 
795  // Compute the InvRowSums of the matrix rows
796  Epetra_Vector invRowSums(A->RowMap());
797  Epetra_Vector rowSums(A->RowMap());
798  A->InvRowSums(invRowSums);
799  rowSums.Reciprocal(invRowSums);
800 
801  // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
802  int numBlockDiagonalEntries;
803  int * rowColDims;
804  int * diagoffsets = map->FirstPointInElementList();
805  A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
806  for (i=0; i< numBlockDiagonalEntries; i++) {
807  double * diagVals;
808  int diagLDA;
809  A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
810  int rowDim = map->ElementSize(i);
811  for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
812  }
813 
814  if (nrhs<=1) {
815  b = new Epetra_Vector(*map);
816  bt = new Epetra_Vector(*map);
817  xexact = new Epetra_Vector(*map);
818  }
819  else {
820  b = new Epetra_MultiVector(*map, nrhs);
821  bt = new Epetra_MultiVector(*map, nrhs);
822  xexact = new Epetra_MultiVector(*map, nrhs);
823  }
824 
825  xexact->Random(); // Fill xexact with random values
826 
827  A->Multiply(false, *xexact, *b);
828  A->Multiply(true, *xexact, *bt);
829 
830 #endif // EPETRA_NO_32BIT_GLOBAL_INDICES
831 
832  return;
833 }
834 
835 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
836  int myPID, long long * & myGlobalElements) {
837 
838  myGlobalElements = new long long[numNodesX*numNodesY];
839  int myProcX = myPID%numProcsX;
840  int myProcY = myPID/numProcsX;
841  int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
842  for (int j=0; j<numNodesY; j++) {
843  for (int i=0; i<numNodesX; i++) {
844  myGlobalElements[j*numNodesX+i] = curGID+i;
845  }
846  curGID+=numNodesX*numProcsX;
847  }
848  //for (int i=0; i<numNodesX*numNodesY; i++) cout << "MYPID " << myPID <<" GID "<< myGlobalElements[i] << endl;
849 
850  return;
851 }
852 
854  Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
855 
856  Epetra_MultiVector z(*b);
857  Epetra_MultiVector r(*b);
859 
860  //Timings
861  Epetra_Flops flopcounter;
862  A->SetFlopCounter(flopcounter);
863  Epetra_Time timer(A->Comm());
864  std::string statdyn = "dynamic";
865  if (StaticProfile) statdyn = "static ";
866 
867  for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
868 
869  bool TransA = (j==1 || j==3);
870  std::string contig = "without";
871  if (j>1) contig = "with ";
872 
873 #ifdef EPETRA_SHORT_PERFTEST
874  int kstart = 1;
875 #else
876  int kstart = 0;
877 #endif
878  for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply
879 
880  std::string oldnew = "old";
881  if (k>0) oldnew = "new";
882 
883  if (j==2) A->OptimizeStorage();
884 
885  flopcounter.ResetFlops();
886  timer.ResetStartTime();
887 
888  if (k==0) {
889  //10 matvecs
890 #ifndef EPETRA_SHORT_PERFTEST
891  for( int i = 0; i < 10; ++i )
892  A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method
893 #endif
894  }
895  else {
896  //10 matvecs
897  for( int i = 0; i < 10; ++i )
898  A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact
899  }
900 
901  double elapsed_time = timer.ElapsedTime();
902  double total_flops = A->Flops();
903 
904  // Compute residual
905  if (TransA)
906  r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
907  else
908  r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
909 
910  r.Norm2(resvec.Values());
911 
912  if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
913  double MFLOPs = total_flops/elapsed_time/1000000.0;
914  if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA
915  << ") and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
916  if (summary) {
917  if (A->Comm().NumProc()==1) {
918  if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
919  else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t';
920  }
921  cout << MFLOPs << endl;
922  }
923  }
924  }
925  return;
926 }
927 #ifdef EPETRA_HAVE_JADMATRIX
929  Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
930 
931  Epetra_MultiVector z(*b);
932  Epetra_MultiVector r(*b);
934 
935  //Timings
936  Epetra_Flops flopcounter;
937  A->SetFlopCounter(flopcounter);
938  Epetra_Time timer(A->Comm());
939 
940  for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans
941 
942  bool TransA = (j==1);
943  A->SetUseTranspose(TransA);
944  flopcounter.ResetFlops();
945  timer.ResetStartTime();
946 
947  //10 matvecs
948  for( int i = 0; i < 10; ++i )
949  A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact
950 
951  double elapsed_time = timer.ElapsedTime();
952  double total_flops = A->Flops();
953 
954  // Compute residual
955  if (TransA)
956  r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
957  else
958  r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
959 
960  r.Norm2(resvec.Values());
961 
962  if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
963  double MFLOPs = total_flops/elapsed_time/1000000.0;
964  if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA
965  << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
966  if (summary) {
967  if (A->Comm().NumProc()==1) {
968  if (TransA) cout << "TransMv" << '\t';
969  else cout << "NoTransMv" << '\t';
970  }
971  cout << MFLOPs << endl;
972  }
973  }
974  return;
975 }
976 #endif
977 //=========================================================================================
980  bool StaticProfile, bool verbose, bool summary) {
981 
982  if (L->NoDiagonal()) {
983  bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL
984  btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL
985  }
986  if (U->NoDiagonal()) {
987  bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU
988  btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU
989  }
990 
991  Epetra_MultiVector z(*bL);
992  Epetra_MultiVector r(*bL);
993  Epetra_SerialDenseVector resvec(bL->NumVectors());
994 
995  //Timings
996  Epetra_Flops flopcounter;
997  L->SetFlopCounter(flopcounter);
998  U->SetFlopCounter(flopcounter);
999  Epetra_Time timer(L->Comm());
1000  std::string statdyn = "dynamic";
1001  if (StaticProfile) statdyn = "static ";
1002 
1003  for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
1004 
1005  bool TransA = (j==1 || j==3);
1006  std::string contig = "without";
1007  if (j>1) contig = "with ";
1008 
1009  if (j==2) {
1010  L->OptimizeStorage();
1011  U->OptimizeStorage();
1012  }
1013 
1014  flopcounter.ResetFlops();
1015  timer.ResetStartTime();
1016 
1017  //10 lower solves
1018  bool Upper = false;
1019  bool UnitDiagonal = L->NoDiagonal(); // If no diagonal, then unit must be used
1020  Epetra_MultiVector * b = TransA ? btL : bL; // solve with the appropriate b vector
1021  for( int i = 0; i < 10; ++i )
1022  L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
1023 
1024  double elapsed_time = timer.ElapsedTime();
1025  double total_flops = L->Flops();
1026 
1027  // Compute residual
1028  r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z
1029  r.Norm2(resvec.Values());
1030 
1031  if (resvec.NormInf()>0.000001) {
1032  cout << "resvec = " << resvec << endl;
1033  cout << "z = " << z << endl;
1034  cout << "xexactL = " << *xexactL << endl;
1035  cout << "r = " << r << endl;
1036  }
1037 
1038  if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
1039  double MFLOPs = total_flops/elapsed_time/1000000.0;
1040  if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA
1041  << ") and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
1042  if (summary) {
1043  if (L->Comm().NumProc()==1) {
1044  if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
1045  else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
1046  }
1047  cout << MFLOPs << endl;
1048  }
1049  flopcounter.ResetFlops();
1050  timer.ResetStartTime();
1051 
1052  //10 upper solves
1053  Upper = true;
1054  UnitDiagonal = U->NoDiagonal(); // If no diagonal, then unit must be used
1055  b = TransA ? btU : bU; // solve with the appropriate b vector
1056  for( int i = 0; i < 10; ++i )
1057  U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
1058 
1059  elapsed_time = timer.ElapsedTime();
1060  total_flops = U->Flops();
1061 
1062  // Compute residual
1063  r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z
1064  r.Norm2(resvec.Values());
1065 
1066  if (resvec.NormInf()>0.001) {
1067  cout << "U = " << *U << endl;
1068  //cout << "resvec = " << resvec << endl;
1069  cout << "z = " << z << endl;
1070  cout << "xexactU = " << *xexactU << endl;
1071  //cout << "r = " << r << endl;
1072  cout << "b = " << *b << endl;
1073  }
1074 
1075 
1076  if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
1077  MFLOPs = total_flops/elapsed_time/1000000.0;
1078  if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA
1079  << ") and " << contig << " opt storage = " << MFLOPs <<endl;
1080  if (summary) {
1081  if (L->Comm().NumProc()==1) {
1082  if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
1083  else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
1084  }
1085  cout << MFLOPs << endl;
1086  }
1087  }
1088  return;
1089 }
Epetra_Flops::ResetFlops
void ResetFlops()
Resets the number of floating point operations to zero for this multi-vector.
Definition: Epetra_Flops.h:77
Epetra_SerialDenseVector
Epetra_SerialDenseVector: A class for constructing and using dense vectors.
Definition: Epetra_SerialDenseVector.h:95
Epetra_BlockMap::Comm
const Epetra_Comm & Comm() const
Access function for Epetra_Comm communicator.
Definition: Epetra_BlockMap.h:770
Epetra_Time::ResetStartTime
void ResetStartTime(void)
Epetra_Time function to reset the start time for a timer object.
Definition: Epetra_Time.cpp:129
Epetra_IntSerialDenseVector.h
Epetra_Comm::NumProc
virtual int NumProc() const =0
Returns total number of processes.
Epetra_Version.h
Epetra_BlockMap::ElementSize
int ElementSize() const
Returns the size of elements in the map; only valid if map has constant element size.
Definition: Epetra_BlockMap.h:573
Epetra_SerialDenseVector.h
Epetra_MultiVector::Random
int Random()
Set multi-vector values to random numbers.
Definition: Epetra_MultiVector.cpp:490
Epetra_LocalMap.h
Epetra_BlockMap::IndexBase64
long long IndexBase64() const
Definition: Epetra_BlockMap.h:592
Epetra_CompObject::SetFlopCounter
void SetFlopCounter(const Epetra_Flops &FlopCounter_in)
Set the internal Epetra_Flops() pointer.
Definition: Epetra_CompObject.h:78
Epetra_CrsMatrix.h
Epetra_IntVector
Epetra_IntVector: A class for constructing and using dense integer vectors on a parallel computer.
Definition: Epetra_IntVector.h:124
Copy
Definition: Epetra_DataAccess.h:55
runLUMatrixTests
void runLUMatrixTests(Epetra_CrsMatrix *L, Epetra_MultiVector *bL, Epetra_MultiVector *btL, Epetra_MultiVector *xexactL, Epetra_CrsMatrix *U, Epetra_MultiVector *bU, Epetra_MultiVector *btU, Epetra_MultiVector *xexactU, bool StaticProfile, bool verbose, bool summary)
Definition: test/BasicPerfTest_LL/cxx_main.cpp:978
Epetra_JadMatrix.h
main
int main(int argc, char *argv[])
Definition: test/BasicPerfTest_LL/cxx_main.cpp:122
Epetra_IntSerialDenseVector::Values
int * Values()
Returns pointer to the values in vector.
Definition: Epetra_IntSerialDenseVector.h:211
runMatrixTests
void runMatrixTests(Epetra_CrsMatrix *A, Epetra_MultiVector *b, Epetra_MultiVector *bt, Epetra_MultiVector *xexact, bool StaticProfile, bool verbose, bool summary)
Definition: test/BasicPerfTest_LL/cxx_main.cpp:853
Epetra_Comm
Epetra_Comm: The Epetra Communication Abstract Base Class.
Definition: Epetra_Comm.h:73
Epetra_Vector.h
Epetra_SerialComm::MyPID
int MyPID() const
Return my process ID.
Definition: Epetra_SerialComm.h:432
Epetra_Version
std::string Epetra_Version()
Definition: Epetra_Version.h:47
Epetra_SerialComm::NumProc
int NumProc() const
Returns total number of processes (always returns 1 for SerialComm).
Definition: Epetra_SerialComm.h:435
Epetra_Flops
Epetra_Flops: The Epetra Floating Point Operations Class.
Definition: Epetra_Flops.h:58
Epetra_CrsMatrix::OptimizeStorage
int OptimizeStorage()
Make consecutive row index sections contiguous, minimize internal storage used for constructing graph...
Definition: Epetra_CrsMatrix.cpp:1279
Epetra_SerialComm.h
Epetra_CrsMatrix
Epetra_CrsMatrix: A class for constructing and using real-valued double-precision sparse compressed r...
Definition: Epetra_CrsMatrix.h:173
GenerateCrsProblem
void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int *xoff, int *yoff, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *&map, Epetra_CrsMatrix *&A, Epetra_Vector *&b, Epetra_Vector *&bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly)
Definition: test/BasicPerfTest_LL/cxx_main.cpp:531
Epetra_MpiComm.h
Epetra_SerialDenseVector::Resize
int Resize(int Length_in)
Resize a Epetra_SerialDenseVector object.
Definition: Epetra_SerialDenseVector.h:171
Epetra_JadMatrix
Epetra_JadMatrix: A class for constructing matrix objects optimized for common kernels.
Definition: Epetra_JadMatrix.h:68
Epetra_VbrMatrix.h
Epetra_CrsMatrix::Comm
const Epetra_Comm & Comm() const
Returns a pointer to the Epetra_Comm communicator associated with this matrix.
Definition: Epetra_CrsMatrix.h:1251
Epetra_CompObject::Flops
double Flops() const
Returns the number of floating point operations with this multi-vector.
Definition: Epetra_CompObject.h:93
Epetra_MultiVector::Norm2
int Norm2(double *Result) const
Compute 2-norm of each vector in multi-vector.
Definition: Epetra_MultiVector.cpp:1547
Epetra_MpiComm
Epetra_MpiComm: The Epetra MPI Communication Class.
Definition: Epetra_MpiComm.h:64
Epetra_SerialComm
Epetra_SerialComm: The Epetra Serial Communication Class.
Definition: Epetra_SerialComm.h:61
Epetra_IntVector.h
Epetra_BlockMap.h
Epetra_BlockMap
Epetra_BlockMap: A class for partitioning block element vectors and matrices.
Definition: Epetra_BlockMap.h:194
Epetra_Vector
Epetra_Vector: A class for constructing and using dense vectors on a parallel computer.
Definition: Epetra_Vector.h:142
runJadMatrixTests
void runJadMatrixTests(Epetra_JadMatrix *A, Epetra_MultiVector *b, Epetra_MultiVector *bt, Epetra_MultiVector *xexact, bool StaticProfile, bool verbose, bool summary)
Definition: test/BasicPerfTest_LL/cxx_main.cpp:928
Epetra_CrsMatrix::Solve
int Solve(bool Upper, bool Trans, bool UnitDiagonal, const Epetra_Vector &x, Epetra_Vector &y) const
Returns the result of a local solve using the Epetra_CrsMatrix on a Epetra_Vector x in y.
Definition: Epetra_CrsMatrix.cpp:1629
Epetra_IntSerialDenseVector::Size
int Size(int Length_in)
Set length of a Epetra_IntSerialDenseVector object; init values to zero.
Definition: Epetra_IntSerialDenseVector.h:139
Epetra_SerialDenseVector::NormInf
double NormInf() const
Compute Inf-norm of each vector in multi-vector.
Definition: Epetra_SerialDenseVector.cpp:128
Epetra_Time
Epetra_Time: The Epetra Timing Class.
Definition: Epetra_Time.h:75
GenerateVbrProblem
void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int *xoff, int *yoff, int nsizes, int *sizes, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_BlockMap *&map, Epetra_VbrMatrix *&A, Epetra_Vector *&b, Epetra_Vector *&bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly)
Definition: test/BasicPerfTest_LL/cxx_main.cpp:694
Epetra_IntSerialDenseVector
Epetra_IntSerialDenseVector: A class for constructing and using dense vectors.
Definition: Epetra_IntSerialDenseVector.h:87
Epetra_IntVector::Values
int * Values() const
Returns a pointer to an array containing the values of this vector.
Definition: Epetra_IntVector.h:247
Epetra_BlockMap::FirstPointInElementList
int * FirstPointInElementList() const
Pointer to internal array containing a mapping between the local elements and the first local point n...
Definition: Epetra_BlockMap.cpp:971
Epetra_MultiVector
Epetra_MultiVector: A class for constructing and using dense multi-vectors, vectors and matrices in p...
Definition: Epetra_MultiVector.h:184
Epetra_MultiVector::Dot
int Dot(const Epetra_MultiVector &A, double *Result) const
Computes dot product of each corresponding pair of vectors.
Definition: Epetra_MultiVector.cpp:1123
Epetra_BlockMap::NumGlobalElements64
long long NumGlobalElements64() const
Definition: Epetra_BlockMap.h:552
A
Epetra_Flops.h
GenerateMyGlobalElements
void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs, int myPID, long long *&myGlobalElements)
Definition: test/BasicPerfTest_LL/cxx_main.cpp:835
Epetra_LocalMap
Epetra_LocalMap: A class for replicating vectors and matrices across multiple processors.
Definition: Epetra_LocalMap.h:89
Epetra_Time::ElapsedTime
double ElapsedTime(void) const
Epetra_Time elapsed time function.
Definition: Epetra_Time.cpp:135
Epetra_BlockMap::IndexBase
int IndexBase() const
Index base for this map.
Definition: Epetra_BlockMap.h:586
Epetra_Map.h
Epetra_BlockMap::MyGlobalElements64
long long * MyGlobalElements64() const
Definition: Epetra_BlockMap.cpp:908
Epetra_CrsMatrix::NoDiagonal
bool NoDiagonal() const
If matrix has no diagonal entries in global index space, this query returns true, otherwise it return...
Definition: Epetra_CrsMatrix.h:1028
Epetra_IntSerialDenseVector::Length
int Length() const
Returns length of vector.
Definition: Epetra_IntSerialDenseVector.h:208
Epetra_VbrMatrix
Epetra_VbrMatrix: A class for the construction and use of real-valued double-precision variable block...
Definition: Epetra_VbrMatrix.h:172
Epetra_Object::SetTracebackMode
static void SetTracebackMode(int TracebackModeValue)
Set the value of the Epetra_Object error traceback report mode.
Definition: Epetra_Object.cpp:77
Epetra_BlockMap::GID64
long long GID64(int LID) const
Definition: Epetra_BlockMap.cpp:1252
Epetra_Map
Epetra_Map: A class for partitioning vectors and matrices.
Definition: Epetra_Map.h:119
Epetra_MultiVector.h
Epetra_Comm::MyPID
virtual int MyPID() const =0
Return my process ID.
Epetra_SerialDenseVector::Values
double * Values() const
Returns pointer to the values in vector.
Definition: Epetra_SerialDenseVector.h:274
Epetra_Time.h
EPETRA_MAX
#define EPETRA_MAX(x, y)
Definition: Epetra_ConfigDefs.h:62
Epetra_MultiVector::NumVectors
int NumVectors() const
Returns the number of vectors in the multi-vector.
Definition: Epetra_MultiVector.h:925
Epetra_MultiVector::Update
int Update(double ScalarA, const Epetra_MultiVector &A, double ScalarThis)
Update multi-vector values with scaled values of A, this = ScalarThis*this + ScalarA*A.
Definition: Epetra_MultiVector.cpp:1276