54 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES // FIXME
65 constructedWithFilledGraph_(false),
66 matrixFillCompleteCalled_(false),
67 NumMyBlockRows_(rowMap.NumMyElements()),
72 squareFillCompleteCalled_(false)
95 constructedWithFilledGraph_(false),
96 matrixFillCompleteCalled_(false),
97 NumMyBlockRows_(rowMap.NumMyElements()),
99 squareFillCompleteCalled_(false)
122 constructedWithFilledGraph_(false),
123 matrixFillCompleteCalled_(false),
124 NumMyBlockRows_(rowMap.NumMyElements()),
126 squareFillCompleteCalled_(false)
150 constructedWithFilledGraph_(false),
151 matrixFillCompleteCalled_(false),
152 NumMyBlockRows_(rowMap.NumMyElements()),
154 squareFillCompleteCalled_(false)
177 constructedWithFilledGraph_(false),
178 matrixFillCompleteCalled_(false),
179 NumMyBlockRows_(graph.RowMap().NumMyElements()),
181 squareFillCompleteCalled_(false)
202 Allocated_(Source.Allocated_),
204 UseTranspose_(Source.UseTranspose_),
205 constructedWithFilledGraph_(Source.constructedWithFilledGraph_),
206 matrixFillCompleteCalled_(Source.matrixFillCompleteCalled_),
209 HavePointObjects_(false),
210 squareFillCompleteCalled_(false)
250 bool ConstantShape =
true;
251 const int NOTSETYET = -13 ;
252 int MyLda = NOTSETYET ;
253 int MyColDim = NOTSETYET ;
254 int MyRowDim = NOTSETYET ;
258 assert( ConstantShape ) ;
261 for (j=0; j < NumBlockEntries; j++) {
263 if ( MyLda == NOTSETYET ) {
264 MyLda = ThisBlock->
LDA() ;
265 MyColDim = ThisBlock->
ColDim() ;
266 MyRowDim = ThisBlock->
RowDim() ;
268 if ( MyLda != ThisBlock->
LDA() ) ConstantShape =
false ;
269 if ( MyRowDim != ThisBlock->
RowDim() ) ConstantShape =
false ;
270 if ( MyColDim != ThisBlock->
ColDim() ) ConstantShape =
false ;
274 if (
false && ConstantShape ) {
283 for (j=0; j < NumBlockEntries; j++) {
286 for (
int kk = 0 ; kk < MyColDim ; kk++ ) {
287 for (
int ll = 0 ; ll < MyRowDim ; ll++ ) {
293 MyLda, MyRowDim, MyColDim );
300 for (j=0; j < NumBlockEntries; j++) {
393 if (NumAllocatedBlockEntries > 0) {
395 for (j=0; j < NumAllocatedBlockEntries; j++) {
420 if (NumAllocatedBlockEntries >0) {
422 for (
int j=0; j < NumAllocatedBlockEntries; j++) {
484 for (
int j=0; j< NumBlockEntries; j++) {
487 for (
int col=0; col < ColDim; col++) {
488 double * Entries =
Entries_[i][j]->
A()+col*LDA;
489 for (
int row=0; row < RowDim; row++)
490 *Entries++ = ScalarConstant;
506 for (
int j=0; j< NumBlockEntries; j++) {
509 for (
int col=0; col < ColDim; col++) {
510 double * Entries =
Entries_[i][j]->
A()+col*LDA;
511 for (
int row=0; row < RowDim; row++)
512 *Entries++ *= ScalarConstant;
526 int LocalBlockRow =
LRID(BlockRow);
528 bool indicesAreLocal =
false;
539 bool indicesAreLocal =
true;
547 int * BlockIndices,
bool indicesAreLocal)
570 BlockRow =
LRID(BlockRow);
571 bool indicesAreLocal =
false;
581 bool indicesAreLocal =
true;
590 bool indicesAreLocal)
602 BlockRow =
LRID(BlockRow);
603 bool indicesAreLocal =
false;
611 bool indicesAreLocal =
true;
618 int *BlockIndices,
bool indicesAreLocal) {
630 bool indicesAreLocal,
655 const double *values,
int LDA,
656 int NumRows,
int NumCols,
bool sum_into)
659 int LocalBlockRow =
LRID(GlobalBlockRow);
663 if (
Entries_[LocalBlockRow][Loc]==0) {
670 target->
CopyMat(values, LDA, NumRows, NumCols,
671 target->
A_, target->
LDA_, sum_into);
683 int NumRows,
int NumCols)
692 NumRows, NumCols,
false);
743 bool need_to_delete_temp_entry =
true;
748 int ColDim = src->
N();
752 need_to_delete_temp_entry =
false;
758 target->
A_, target->
LDA_, SumInto);
765 if (need_to_delete_temp_entry) {
785 NumValidBlockIndices = 0;
793 ValidBlockIndices[ NumValidBlockIndices++ ] = j;
807 if (newNumAllocBlocks > oldNumAllocBlocks) {
811 for(j=oldNumBlocks; j<newNumAllocBlocks; ++j) tmp_Entries[j] = NULL;
816 int start = oldNumBlocks;
817 int stop = start + NumValidBlockIndices;
820 if (stop <= NumAllocatedEntries) {
821 for (j=start; j<stop; j++) {
836 delete [] ValidBlockIndices;
849 double *
B,
int LDB,
bool SumInto)
const
859 for (j=0; j<NumCols; j++) {
862 for (i=0; i<NumRows; i++) *ptr1++ += *ptr2++;
866 for (j=0; j<NumCols; j++) {
869 for (i=0; i<NumRows; i++) *ptr1++ = *ptr2++;
951 for (
int j=0; j<max; j++)
953 for (
int k=j; k>=0; k-=m)
955 if (Indices[k+m] >= Indices[k])
958 Entries[k+m] = Entries[k];
961 int itemp = Indices[k+m];
962 Indices[k+m] = Indices[k];
993 for (
int k=1; k<NumEntries; k++) {
994 if (Indices[k]==Indices[k-1]) {
995 if (curBlkEntry->
M() != Entries[curEntry]->
M() ||
996 curBlkEntry->
N() != Entries[curEntry]->
N() ||
997 curBlkEntry->
LDA() != Entries[curEntry]->
LDA()) {
998 std::cerr <<
"Epetra_VbrMatrix ERROR, two dense-matrix contributions to the same column-index have different sizes: ("<<curBlkEntry->
M()<<
"x"<<curBlkEntry->
N()<<
") and ("<<Entries[curEntry]->
M()<<
"x"<<Entries[curEntry]->
N()<<
")"<<std::endl;
1002 CopyMat(Entries[k]->
A(), Entries[k]->LDA(), RowDim, Entries[k]->N(),
1003 curBlkEntry->
A(), curBlkEntry->
LDA(), SumInto);
1006 curBlkEntry = Entries[++curEntry];
1008 curBlkEntry->
Shape(RowDim,Entries[k]->N());
1010 curBlkEntry->
A(), curBlkEntry->
LDA(),
false));
1028 bool ConstantShape =
true;
1030 const int NOTSETYET = -13 ;
1031 int MyLda = NOTSETYET ;
1032 int MyColDim = NOTSETYET ;
1033 int MyRowDim = NOTSETYET ;
1045 if( ConstantShape ) {
1048 for (j=0; j < NumBlockEntries; j++) {
1050 if ( MyLda == NOTSETYET ) {
1051 MyLda = ThisBlock->
LDA() ;
1052 MyColDim = ThisBlock->
ColDim() ;
1053 MyRowDim = ThisBlock->
RowDim() ;
1055 if ( MyLda != ThisBlock->
LDA() ) ConstantShape =
false ;
1056 if ( MyRowDim != ThisBlock->
RowDim() ) ConstantShape =
false ;
1057 if ( MyColDim != ThisBlock->
ColDim() ) ConstantShape =
false ;
1064 if ( ConstantShape ) {
1067 int coef_len = MyColDim*MyRowDim*numMyNonzeros;
1072 for (j=0; j < NumBlockEntries; j++) {
1075 for (
int kk = 0 ; kk < MyColDim ; kk++ ) {
1076 for (
int ll = 0 ; ll < MyRowDim ; ll++ ) {
1084 MyLda, MyRowDim, MyColDim );
1149 int * Indices)
const
1156 std::cout <<
"Must implement..." << std::endl;
1161 int & RowDim,
int & NumBlockEntries,
1165 bool indicesAreLocal =
false;
1167 Entries, indicesAreLocal));
1173 int & RowDim,
int & NumBlockEntries,
1177 bool indicesAreLocal =
true;
1179 Entries, indicesAreLocal));
1185 int & RowDim,
int & NumBlockEntries,
1188 bool indicesAreLocal)
const {
1190 if (!indicesAreLocal) {
1192 NumBlockEntries, BlockIndices);
1193 BlockRow =
LRID(BlockRow);
1197 NumBlockEntries, BlockIndices);
1212 int & RowDim,
int & NumBlockEntries,
1213 int * BlockIndices,
int * ColDims)
const {
1215 bool indicesAreLocal =
false;
1217 ColDims, indicesAreLocal));
1223 int & RowDim,
int & NumBlockEntries,
1224 int * BlockIndices,
int * ColDims)
const {
1226 bool indicesAreLocal =
true;
1228 ColDims, indicesAreLocal));
1234 int & RowDim,
int & NumBlockEntries,
1235 int * BlockIndices,
int * ColDims,
1236 bool indicesAreLocal)
const {
1238 if (!indicesAreLocal)
1244 bool ExtractView =
false;
1245 ierr =
SetupForExtracts(BlockRow, RowDim, NumBlockEntries, ExtractView, indicesAreLocal);
1254 bool indicesAreLocal)
const {
1256 if (!indicesAreLocal) BlockRow =
LRID(BlockRow);
1271 for (
int i=0; i<NumBlockEntries; i++) {
1289 int CurRowDim = CurEntries->
M();
1290 int CurLDA = CurEntries->
LDA();
1294 double* vals = CurEntries->
A();
1295 if (LDA==CurRowDim && CurLDA==CurRowDim)
1296 for (
int ii=0; ii<CurRowDim*CurColDim; ++ii)
1297 values[ii] = vals[ii];
1299 double * CurTargetCol = values;
1300 double * CurSourceCol = vals;
1302 for (
int jj=0; jj<CurColDim; jj++) {
1303 for (
int ii=0; ii<CurRowDim; ++ii)
1304 CurTargetCol[ii] = CurSourceCol[ii];
1305 CurTargetCol += LDA;
1306 CurSourceCol += CurLDA;
1315 int & RowDim,
int & NumBlockEntries,
1316 int * & BlockIndices)
const
1319 bool indicesAreLocal =
false;
1321 BlockIndices, indicesAreLocal));
1327 int * & BlockIndices)
const
1330 bool indicesAreLocal =
true;
1338 int * & BlockIndices,
1339 bool indicesAreLocal)
const
1342 if (!indicesAreLocal)
1348 bool ExtractView =
true;
1349 ierr =
SetupForExtracts(BlockRow, RowDim, NumBlockEntries, ExtractView, indicesAreLocal);
1366 int * & BlockIndices,
1371 bool indicesAreLocal =
false;
1380 int * & BlockIndices,
1385 bool indicesAreLocal =
true;
1396 double * diagptr = Diagonal.
Values();
1398 int BlockRow =
GRID64(i);
1402 for (
int j=0; j<NumEntries; j++) {
1403 int BlockCol =
GCID64(Indices[j]);
1404 if (BlockRow==BlockCol) {
1419 double * diagptr = (
double *) Diagonal.
Values();
1421 int BlockRow =
GRID64(i);
1425 bool DiagMissing =
true;
1426 for (
int j=0; j<NumEntries; j++) {
1427 int BlockCol =
GCID64(Indices[j]);
1428 if (BlockRow==BlockCol) {
1431 DiagMissing =
false;
1435 if (DiagMissing) ierr = 1;
1445 int & NumBlockDiagonalEntries,
int * RowColDims )
const{
1450 if (NumBlockDiagonalEntries>maxNumBlockDiagonalEntries)
EPETRA_CHK_ERR(-2);
1463 for (
int j=0; j<NumEntries; j++) {
1464 int Col = Indices[j];
1465 if (BlockRow==Col) {
1477 int * & RowColDims )
const {
1493 for (
int j=0; j<NumEntries; j++) {
1494 int Col = Indices[j];
1495 if (BlockRow==Col) {
1506 double * Diagonal)
const {
1509 double * ptr1 = Diagonal;
1513 for (i=0; i<ndiags; i++) {
1521 double * Diagonal) {
1524 double * ptr1 = Diagonal;
1528 for (i=0; i<ndiags; i++) {
1542 for (
int j=0; j<NumBlockEntries; j++) NumEntries +=
Entries_[i][j]->N();
1550 int BlockRow, BlockOffset;
1555 for (
int i=0; i<NumBlockEntries; i++) NumEntries +=
Entries_[BlockRow][i]->N();
1563 int * Indices)
const
1569 int BlockRow, BlockOffset;
1573 int RowDim, NumBlockEntries;
1577 BlockIndices, ValBlocks);
1585 for (
int i=0; i<NumBlockEntries; i++) {
1586 int ColDim = ValBlocks[i]->
N();
1587 NumEntries += ColDim;
1589 int LDA = ValBlocks[i]->
LDA();
1590 double *
A = ValBlocks[i]->
A()+BlockOffset;
1591 int Index = ColFirstPointInElementList[BlockIndices[i]];
1592 for (
int j=0; j < ColDim; j++) {
1595 *Indices++ = Index++;
1617 double * xp = (
double*)x.
Values();
1618 double *yp = (
double*)y.
Values();
1623 bool x_and_y_same =
false;
1626 x_and_y_same =
true;
1628 yp = (
double*)ytemp->
Values();
1663 for (i=0; i< NumMyRows_; i++) yp[i] = 0.0;
1666 int NumEntries = *NumBlockEntriesPerRow++;
1667 int * BlockRowIndices = *Indices++;
1669 double * cury = yp + *FirstPointInElement++;
1670 int RowDim = *ElementSize++;
1671 for (j=0; j < NumEntries; j++) {
1673 double *
A = BlockRowValues[j]->
A();
1674 int LDA = BlockRowValues[j]->
LDA();
1675 int Index = BlockRowIndices[j];
1676 double * curx = xp + ColFirstPointInElementList[Index];
1677 int ColDim = ColElementSizeList[Index];
1678 GEMV(
'N', RowDim, ColDim, 1.0,
A, LDA, curx, 1.0, cury);
1716 for (i=0; i < NumMyCols_; i++) yp[i] = 0.0;
1719 int NumEntries = *NumBlockEntriesPerRow++;
1720 int * BlockRowIndices = *Indices++;
1722 double * curx = xp + *FirstPointInElement++;
1723 int RowDim = *ElementSize++;
1724 for (j=0; j < NumEntries; j++) {
1726 double *
A = BlockRowValues[j]->
A();
1727 int LDA = BlockRowValues[j]->
LDA();
1728 int Index = BlockRowIndices[j];
1729 double * cury = yp + ColFirstPointInElementList[Index];
1730 int ColDim = ColElementSizeList[Index];
1731 GEMV(
'T', RowDim, ColDim, 1.0,
A, LDA, curx, 1.0, cury);
1743 if (x_and_y_same ==
true) {
1772 double **Xp = (
double**)X.
Pointers();
1773 double **Yp = (
double**)Y.
Pointers();
1775 bool x_and_y_same =
false;
1778 x_and_y_same =
true;
1824 if ( ! TransA && *RowElementSizeList <= 4 ) {
1826 int RowDim = *RowElementSizeList ;
1829 double *
A = Asub->
A_ ;
1831 if ( NumVectors == 1 ) {
1834 int NumEntries = *NumBlockEntriesPerRow++;
1835 int * BlockRowIndices = *Indices++;
1837 double * xptr = Xp[0];
1844 for (
int j=0; j < NumEntries; ++j) {
1845 int xoff = ColFirstPointInElementList[*BlockRowIndices++];
1847 y0 +=
A[0]*xptr[xoff];
1853 for (
int j=0; j < NumEntries; ++j) {
1854 int xoff = ColFirstPointInElementList[*BlockRowIndices++];
1855 y0 +=
A[0]*xptr[xoff] +
A[2]*xptr[xoff+1];
1856 y1 +=
A[1]*xptr[xoff] +
A[3]*xptr[xoff+1];
1862 for (
int j=0; j < NumEntries; ++j) {
1863 int xoff = ColFirstPointInElementList[*BlockRowIndices++];
1864 y0 +=
A[0]*xptr[xoff+0] +
A[3]*xptr[xoff+1] +
A[6]*xptr[xoff+2];
1865 y1 +=
A[1]*xptr[xoff+0] +
A[4]*xptr[xoff+1] +
A[7]*xptr[xoff+2];
1866 y2 +=
A[2]*xptr[xoff+0] +
A[5]*xptr[xoff+1] +
A[8]*xptr[xoff+2];
1872 for (
int j=0; j < NumEntries; ++j) {
1873 int xoff = ColFirstPointInElementList[*BlockRowIndices++];
1874 y0 +=
A[0]*xptr[xoff+0] +
A[4]*xptr[xoff+1] +
A[8]*xptr[xoff+2] +
A[12]*xptr[xoff+3];
1875 y1 +=
A[1]*xptr[xoff+0] +
A[5]*xptr[xoff+1] +
A[9]*xptr[xoff+2] +
A[13]*xptr[xoff+3];
1876 y2 +=
A[2]*xptr[xoff+0] +
A[6]*xptr[xoff+1] +
A[10]*xptr[xoff+2] +
A[14]*xptr[xoff+3];
1877 y3 +=
A[3]*xptr[xoff+0] +
A[7]*xptr[xoff+1] +
A[11]*xptr[xoff+2] +
A[15]*xptr[xoff+3];
1884 int yoff = *RowFirstPointInElementList++;
1887 *(Yp[0]+yoff+3) = y3;
1889 *(Yp[0]+yoff+2) = y2;
1891 *(Yp[0]+yoff+1) = y1;
1899 int NumEntries = *NumBlockEntriesPerRow++;
1900 int yoff = *RowFirstPointInElementList++;
1901 int * BRI = *Indices++;
1903 for (
int k=0; k<NumVectors; k++) {
1904 int * BlockRowIndices = BRI;
1906 double * xptr = Xp[k];
1913 for (
int j=0; j < NumEntries; ++j) {
1914 int xoff = ColFirstPointInElementList[*BlockRowIndices++];
1916 y0 +=
A[0]*xptr[xoff];
1922 for (
int j=0; j < NumEntries; ++j) {
1923 int xoff = ColFirstPointInElementList[*BlockRowIndices++];
1924 y0 +=
A[0]*xptr[xoff] +
A[2]*xptr[xoff+1];
1925 y1 +=
A[1]*xptr[xoff] +
A[3]*xptr[xoff+1];
1931 for (
int j=0; j < NumEntries; ++j) {
1932 int xoff = ColFirstPointInElementList[*BlockRowIndices++];
1933 y0 +=
A[0]*xptr[xoff+0] +
A[3]*xptr[xoff+1] +
A[6]*xptr[xoff+2];
1934 y1 +=
A[1]*xptr[xoff+0] +
A[4]*xptr[xoff+1] +
A[7]*xptr[xoff+2];
1935 y2 +=
A[2]*xptr[xoff+0] +
A[5]*xptr[xoff+1] +
A[8]*xptr[xoff+2];
1940 for (
int j=0; j < NumEntries; ++j) {
1941 int xoff = ColFirstPointInElementList[*BlockRowIndices++];
1942 y0 +=
A[0]*xptr[xoff+0] +
A[4]*xptr[xoff+1] +
A[8]*xptr[xoff+2] +
A[12]*xptr[xoff+3];
1943 y1 +=
A[1]*xptr[xoff+0] +
A[5]*xptr[xoff+1] +
A[9]*xptr[xoff+2] +
A[13]*xptr[xoff+3];
1944 y2 +=
A[2]*xptr[xoff+0] +
A[6]*xptr[xoff+1] +
A[10]*xptr[xoff+2] +
A[14]*xptr[xoff+3];
1945 y3 +=
A[3]*xptr[xoff+0] +
A[7]*xptr[xoff+1] +
A[11]*xptr[xoff+2] +
A[15]*xptr[xoff+3];
1954 *(Yp[k]+yoff+3) = y3;
1956 *(Yp[k]+yoff+2) = y2;
1958 *(Yp[k]+yoff+1) = y1;
1970 int NumEntries = *NumBlockEntriesPerRow++;
1971 int * BlockRowIndices = *Indices++;
1973 int yoff = *RowFirstPointInElementList++;
1974 int RowDim = *RowElementSizeList++;
1977 ColFirstPointInElementList, ColElementSizeList,
1978 BlockRowValues, Xp, Yp, NumVectors);
1983 int NumEntries = *NumBlockEntriesPerRow++;
1984 int * BlockRowIndices = *Indices++;
1986 int yoff = *RowFirstPointInElementList++;
1987 int RowDim = *RowElementSizeList++;
1989 ColFirstPointInElementList, ColElementSizeList,
1990 BlockRowValues, Xp, Yp, NumVectors);
2039 int NumEntries = *NumBlockEntriesPerRow++;
2040 int * BlockRowIndices = *Indices++;
2042 int xoff = *RowFirstPointInElementList++;
2043 int RowDim = *RowElementSizeList++;
2045 ColFirstPointInElementList, ColElementSizeList,
2046 BlockRowValues, Xp, Yp, NumVectors);
2050 int NumEntries = *NumBlockEntriesPerRow++;
2051 int * BlockRowIndices = *Indices++;
2053 int xoff = *RowFirstPointInElementList++;
2054 int RowDim = *RowElementSizeList++;
2056 ColFirstPointInElementList, ColElementSizeList,
2057 BlockRowValues, Xp, Yp, NumVectors);
2071 if (x_and_y_same ==
true) {
2084 int * FirstPointInElementList,
2085 int * ElementSizeList,
2091 int NumVectors)
const
2098 for (j=0; j < NumEntries; j++) {
2100 double *
A = Asub->
A();
2101 int LDA = Asub->
LDA();
2102 int BlockIndex = BlockIndices[j];
2103 int xoff = FirstPointInElementList[BlockIndex];
2104 int ColDim = ElementSizeList[BlockIndex];
2106 for (k=0; k<NumVectors; k++) {
2107 double * curx = X[k] + xoff;
2108 double * cury = Y[k] + RowOff;
2110 GEMV(
'N', RowDim, ColDim, Alpha,
A, LDA, curx, Beta, cury);
2115 for (j=0; j < NumEntries; j++) {
2116 double *
A = As[j]->
A();
2117 int LDA = As[j]->
LDA();
2118 int BlockIndex = BlockIndices[j];
2119 int yoff = FirstPointInElementList[BlockIndex];
2120 int ColDim = ElementSizeList[BlockIndex];
2121 for (k=0; k<NumVectors; k++) {
2122 double * curx = X[k] + RowOff;
2123 double * cury = Y[k] + yoff;
2124 GEMV(
'T', RowDim, ColDim, Alpha,
A, LDA, curx, Beta, cury);
2135 int * BlockRowIndices,
2137 int * ColFirstPointInElementList,
2138 int * ColElementSizeList,
2142 int NumVectors)
const
2146 for (k=0; k<NumVectors; k++) {
2147 double * y = Yp[k] + yoff;
2148 double * xptr = Xp[k];
2151 double *OrigA = Asub->
A_;
2152 int LDA = Asub->
LDA_;
2153 int OrigBlockIndex = BlockRowIndices[0];
2154 int ColDim = ColElementSizeList[OrigBlockIndex];
2156 assert( RowDim == ColDim ) ;
2157 assert( RowDim == LDA ) ;
2165 for (j=0; j < NumEntries; ++j) {
2167 int BlockIndex = BlockRowIndices[j];
2168 int xoff = ColFirstPointInElementList[BlockIndex];
2170 double *
A = OrigA + j * ColDim * LDA ;
2171 double *x = xptr + xoff;
2180 for (j=0; j < NumEntries; ++j) {
2182 int BlockIndex = BlockRowIndices[j];
2183 int xoff = ColFirstPointInElementList[BlockIndex];
2185 y0 +=
A[0]*xptr[xoff] +
A[2]*xptr[xoff+1];
2186 y1 +=
A[1]*xptr[xoff] +
A[3]*xptr[xoff+1];
2194 for (j=0; j < NumEntries; ++j) {
2196 int BlockIndex = BlockRowIndices[j];
2197 int xoff = ColFirstPointInElementList[BlockIndex];
2199 double *
A = OrigA + j * ColDim * LDA ;
2200 double *x = xptr + xoff;
2201 y[0] +=
A[0]*x[0] +
A[3]*x[1] +
A[6]*x[2];
2202 y[1] +=
A[1]*x[0] +
A[4]*x[1] +
A[7]*x[2];
2203 y[2] +=
A[2]*x[0] +
A[5]*x[1] +
A[8]*x[2];
2208 for (j=0; j < NumEntries; ++j) {
2210 int BlockIndex = BlockRowIndices[j];
2211 int xoff = ColFirstPointInElementList[BlockIndex];
2213 double *
A = OrigA + j * ColDim * LDA ;
2214 double *x = xptr + xoff;
2215 y[0] +=
A[0]*x[0] +
A[4]*x[1] +
A[8]*x[2] +
A[12]*x[3];
2216 y[1] +=
A[1]*x[0] +
A[5]*x[1] +
A[9]*x[2] +
A[13]*x[3];
2217 y[2] +=
A[2]*x[0] +
A[6]*x[1] +
A[10]*x[2] +
A[14]*x[3];
2218 y[3] +=
A[3]*x[0] +
A[7]*x[1] +
A[11]*x[2] +
A[15]*x[3];
2223 for (j=0; j < NumEntries; ++j) {
2225 int BlockIndex = BlockRowIndices[j];
2226 int xoff = ColFirstPointInElementList[BlockIndex];
2228 double *
A = OrigA + j * ColDim * LDA ;
2229 double *x = xptr + xoff;
2230 y[0] +=
A[0]*x[0] +
A[5]*x[1] +
A[10]*x[2] +
A[15]*x[3] +
A[20]*x[4];
2231 y[1] +=
A[1]*x[0] +
A[6]*x[1] +
A[11]*x[2] +
A[16]*x[3] +
A[21]*x[4];
2232 y[2] +=
A[2]*x[0] +
A[7]*x[1] +
A[12]*x[2] +
A[17]*x[3] +
A[22]*x[4];
2233 y[3] +=
A[3]*x[0] +
A[8]*x[1] +
A[13]*x[2] +
A[18]*x[3] +
A[23]*x[4];
2234 y[4] +=
A[4]*x[0] +
A[9]*x[1] +
A[14]*x[2] +
A[19]*x[3] +
A[24]*x[4];
2239 for (j=0; j < NumEntries; ++j) {
2241 int BlockIndex = BlockRowIndices[j];
2242 int xoff = ColFirstPointInElementList[BlockIndex];
2244 double *
A = OrigA + j * ColDim * LDA ;
2245 double *x = xptr + xoff;
2246 y[0] +=
A[0]*x[0] +
A[6]*x[1] +
A[12]*x[2] +
A[18]*x[3] +
A[24]*x[4]
2248 y[1] +=
A[1]*x[0] +
A[7]*x[1] +
A[13]*x[2] +
A[19]*x[3] +
A[25]*x[4]
2250 y[2] +=
A[2]*x[0] +
A[8]*x[1] +
A[14]*x[2] +
A[20]*x[3] +
A[26]*x[4]
2252 y[3] +=
A[3]*x[0] +
A[9]*x[1] +
A[15]*x[2] +
A[21]*x[3] +
A[27]*x[4]
2254 y[4] +=
A[4]*x[0] +
A[10]*x[1] +
A[16]*x[2] +
A[22]*x[3] +
A[28]*x[4]
2256 y[5] +=
A[5]*x[0] +
A[11]*x[1] +
A[17]*x[2] +
A[23]*x[3] +
A[29]*x[4]
2262 for (j=0; j < NumEntries; ++j) {
2264 int BlockIndex = BlockRowIndices[j];
2265 int xoff = ColFirstPointInElementList[BlockIndex];
2267 double *
A = OrigA + j * ColDim * LDA ;
2268 double *x = xptr + xoff;
2269 GEMV(
'N', RowDim, ColDim, 1.0,
A, LDA, x, 1.0, y);
2276 for (j=0; j < NumEntries; j++) {
2277 double *
A = BlockRowValues[j]->
A();
2278 int LDA = BlockRowValues[j]->
LDA();
2279 int BlockIndex = BlockRowIndices[j];
2280 int Yyoff = ColFirstPointInElementList[BlockIndex];
2281 int ColDim = ColElementSizeList[BlockIndex];
2282 for (k=0; k<NumVectors; k++) {
2283 double * x = Xp[k] + yoff;
2284 double * y = Yp[k] + Yyoff;
2285 GEMV(
'T', RowDim, ColDim, 1.0,
A, LDA, x, 1.0, y);
2296 int * FirstPointInElementList,
2297 int * ElementSizeList,
2301 int NumVectors)
const
2313 for (k=0; k<NumVectors; k++) {
2314 double * y = Y[k] + RowOff;
2315 double * xptr = X[k];
2317 for (j=0; j < NumEntries; ++j) {
2319 double *
A = Asub->
A_;
2320 int LDA = Asub->
LDA_;
2321 int BlockIndex = BlockIndices[j];
2322 int xoff = FirstPointInElementList[BlockIndex];
2323 int ColDim = ElementSizeList[BlockIndex];
2325 double * x = xptr + xoff;
2328 if (LDA != RowDim || ColDim != RowDim) {
2329 GEMV(
'N', RowDim, ColDim, 1.0,
A, LDA, x, 1.0, y);
2342 y[0] +=
A[0]*x[0] +
A[2]*x[1];
2343 y[1] +=
A[1]*x[0] +
A[3]*x[1];
2347 y[0] +=
A[0]*x[0] +
A[3]*x[1] +
A[6]*x[2];
2348 y[1] +=
A[1]*x[0] +
A[4]*x[1] +
A[7]*x[2];
2349 y[2] +=
A[2]*x[0] +
A[5]*x[1] +
A[8]*x[2];
2353 y[0] +=
A[0]*x[0] +
A[4]*x[1] +
A[8]*x[2] +
A[12]*x[3];
2354 y[1] +=
A[1]*x[0] +
A[5]*x[1] +
A[9]*x[2] +
A[13]*x[3];
2355 y[2] +=
A[2]*x[0] +
A[6]*x[1] +
A[10]*x[2] +
A[14]*x[3];
2356 y[3] +=
A[3]*x[0] +
A[7]*x[1] +
A[11]*x[2] +
A[15]*x[3];
2360 y[0] +=
A[0]*x[0] +
A[5]*x[1] +
A[10]*x[2] +
A[15]*x[3] +
A[20]*x[4];
2361 y[1] +=
A[1]*x[0] +
A[6]*x[1] +
A[11]*x[2] +
A[16]*x[3] +
A[21]*x[4];
2362 y[2] +=
A[2]*x[0] +
A[7]*x[1] +
A[12]*x[2] +
A[17]*x[3] +
A[22]*x[4];
2363 y[3] +=
A[3]*x[0] +
A[8]*x[1] +
A[13]*x[2] +
A[18]*x[3] +
A[23]*x[4];
2364 y[4] +=
A[4]*x[0] +
A[9]*x[1] +
A[14]*x[2] +
A[19]*x[3] +
A[24]*x[4];
2368 y[0] +=
A[0]*x[0] +
A[6]*x[1] +
A[12]*x[2] +
A[18]*x[3] +
A[24]*x[4]
2370 y[1] +=
A[1]*x[0] +
A[7]*x[1] +
A[13]*x[2] +
A[19]*x[3] +
A[25]*x[4]
2372 y[2] +=
A[2]*x[0] +
A[8]*x[1] +
A[14]*x[2] +
A[20]*x[3] +
A[26]*x[4]
2374 y[3] +=
A[3]*x[0] +
A[9]*x[1] +
A[15]*x[2] +
A[21]*x[3] +
A[27]*x[4]
2376 y[4] +=
A[4]*x[0] +
A[10]*x[1] +
A[16]*x[2] +
A[22]*x[3] +
A[28]*x[4]
2378 y[5] +=
A[5]*x[0] +
A[11]*x[1] +
A[17]*x[2] +
A[23]*x[3] +
A[29]*x[4]
2383 GEMV(
'N', RowDim, ColDim, 1.0,
A, LDA, x, 1.0, y);
2389 for (j=0; j < NumEntries; j++) {
2390 double *
A = As[j]->
A();
2391 int LDA = As[j]->
LDA();
2392 int BlockIndex = BlockIndices[j];
2393 int yoff = FirstPointInElementList[BlockIndex];
2394 int ColDim = ElementSizeList[BlockIndex];
2395 for (k=0; k<NumVectors; k++) {
2396 double * x = X[k] + RowOff;
2397 double * y = Y[k] + yoff;
2398 GEMV(
'T', RowDim, ColDim, 1.0,
A, LDA, x, 1.0, y);
2441 double **Yp = (
double**)Y.
Pointers();
2447 bool Case1 = (((!TransA) && Upper) || (TransA && !Upper));
2451 int NumEntries = *NumBlockEntriesPerRow--;
2452 int * BlockRowIndices = *Indices--;
2454 int yoff = *FirstPointInElement--;
2455 int RowDim = *ElementSize--;
2457 ColFirstPointInElementList, ColElementSizeList,
2458 1.0, BlockRowValues, Yp, -1.0, Yp, NumVectors);
2463 int NumEntries = *NumBlockEntriesPerRow++;
2464 int * BlockRowIndices = *Indices++;
2466 int yoff = *FirstPointInElement++;
2467 int RowDim = *ElementSize++;
2469 ColFirstPointInElementList, ColElementSizeList,
2470 1.0, BlockRowValues, Yp, -1.0, Yp, NumVectors);
2502 bool hasOperatorMap =
false;
2506 if( !hasOperatorMap )
2513 if( !hasOperatorMap )
2533 double * xp = (
double*)x.
Values();
2540 xp = (
double*)x_tmp->
Values();
2545 int NumEntries = *NumBlockEntriesPerRow++;
2546 int * BlockRowIndices = *Indices++;
2548 int xoff = *RowFirstPointInElementList++;
2549 int RowDim = *RowElementSizeList++;
2551 for (
int ii=0; ii < NumEntries; ii++) {
2552 double * xptr = xp+xoff;
2553 double *
A = BlockRowValues[ii]->
A();
2554 int LDA = BlockRowValues[ii]->
LDA();
2555 int BlockIndex = BlockRowIndices[ii];
2556 int ColDim = ColElementSizeList[BlockIndex];
2557 for (
int j=0; j<ColDim; j++) {
2558 double * curEntry =
A + j*LDA;
2559 for (
int k=0; k<RowDim; k++)
2560 xptr[k] += std::abs(*curEntry++);
2565 for (
int ii=0; ii < NumEntries; ii++) {
2566 double *
A = BlockRowValues[ii]->
A();
2567 int LDA = BlockRowValues[ii]->
LDA();
2568 int BlockIndex = BlockRowIndices[ii];
2569 int off = ColFirstPointInElementList[BlockIndex];
2570 int ColDim = ColElementSizeList[BlockIndex];
2571 double * curx = xp+off;
2572 for (
int j=0; j<ColDim; j++) {
2573 double * curEntry =
A + j*LDA;
2574 for (
int k=0; k<RowDim; k++)
2575 curx[j] += std::abs(*curEntry++);
2593 xp = (
double*) x.
Values();
2597 {
for (
int i=0; i < NumMyRows_; i++) {
2598 double scale = xp[i];
2600 if (scale==0.0) ierr = 1;
2601 else if (ierr!=1) ierr = 2;
2633 bool hasOperatorMap =
false;
2637 if( !hasOperatorMap )
2644 if( !hasOperatorMap )
2662 double * xp = (
double*)x.
Values();
2675 xp = (
double*)x_tmp->
Values();
2680 int NumEntries = *NumBlockEntriesPerRow++;
2681 int * BlockRowIndices = *Indices++;
2683 int xoff = *RowFirstPointInElementList++;
2684 int RowDim = *RowElementSizeList++;
2686 for (
int ii=0; ii < NumEntries; ii++) {
2687 double * xptr = xp+xoff;
2688 double *
A = BlockRowValues[ii]->
A();
2689 int LDA = BlockRowValues[ii]->
LDA();
2690 int BlockIndex = BlockRowIndices[ii];
2691 int ColDim = ColElementSizeList[BlockIndex];
2692 for (
int j=0; j<ColDim; j++) {
2693 double * curEntry =
A + j*LDA;
2694 for (
int k=0; k<RowDim; k++)
2695 *curEntry++ *= xptr[k];
2700 for (
int ii=0; ii < NumEntries; ii++) {
2701 double *
A = BlockRowValues[ii]->
A();
2702 int LDA = BlockRowValues[ii]->
LDA();
2703 int BlockIndex = BlockRowIndices[ii];
2704 int off = ColFirstPointInElementList[BlockIndex];
2705 int ColDim = ColElementSizeList[BlockIndex];
2706 double * curx = xp+off;
2707 for (
int j=0; j<ColDim; j++) {
2708 double * curEntry =
A + j*LDA;
2709 for (
int k=0; k<RowDim; k++)
2710 *curEntry++ *= curx[j];
2716 if (x_tmp!=0)
delete x_tmp;
2745 double * tempv =
new double[MaxRowDim_];
2751 double Local_NormInf = 0.0;
2753 int NumEntries = *NumBlockEntriesPerRow++ ;
2754 int RowDim = *ElementSize++;
2757 BlockRowValues, tempv);
2758 for (
int j=0; j < RowDim; j++) Local_NormInf =
EPETRA_MAX(Local_NormInf, tempv[j]);
2771 for (k=0; k<RowDim; k++) Y[k] = 0.0;
2773 for (i=0; i < NumEntries; i++) {
2774 double *
A = As[i]->
A();
2775 int LDA = As[i]->
LDA();
2776 int ColDim = As[i]->
N();
2777 for (j=0; j<ColDim; j++) {
2778 for (k=0; k<RowDim; k++) Y[k] += std::abs(
A[k]);
2807 double * xp = (
double*)x->
Values();
2813 xp = (
double*)x_tmp->
Values();
2822 int NumEntries = *NumBlockEntriesPerRow++;
2823 int RowDim = *ElementSize++;
2824 int * BlockRowIndices = *Indices++;
2827 BlockRowValues, ColFirstPointInElementList, xp);
2834 if (x_tmp!=0)
delete x_tmp;
2859 double local_sum = 0.0;
2862 int NumEntries = *NumBlockEntriesPerRow++;
2863 int RowDim = *ElementSize++;
2866 for(
int ii=0; ii<NumEntries; ++ii) {
2867 double*
A = BlockRowValues[ii]->
A();
2868 int LDA = BlockRowValues[ii]->
LDA();
2869 int colDim = BlockRowValues[ii]->
N();
2870 for(
int j=0; j<colDim; ++j) {
2871 for(
int k=0; k<RowDim; ++k) {
2872 local_sum +=
A[k]*
A[k];
2902 double global_sum = 0.0;
2914 int * ColFirstPointInElementList,
double * x)
const {
2917 for (i=0; i < NumEntries; i++) {
2918 double *
A = As[i]->
A();
2919 int LDA = As[i]->
LDA();
2920 int ColDim = As[i]->
N();
2921 double * curx = x + ColFirstPointInElementList[BlockRowIndices[i]];
2922 for (j=0; j<ColDim; j++) {
2923 for (k=0; k<RowDim; k++) curx[j] += std::abs(
A[k]);
2939 int * PermuteToLIDs,
2940 int *PermuteFromLIDs,
2948 int BlockRow, NumBlockEntries;
2952 int FromBlockRow, ToBlockRow;
2956 int maxNumBlockEntries =
A.MaxNumBlockEntries();
2957 BlockIndices =
new int[maxNumBlockEntries];
2960 for (i=0; i<NumSameIDs; i++) {
2964 RowDim, NumBlockEntries,
2965 BlockIndices, Entries));
2988 RowDim, Entries[j]->N());
2991 delete [] BlockIndices;
2995 if (NumPermuteIDs>0) {
2996 int maxNumBlockEntries =
A.MaxNumBlockEntries();
2997 BlockIndices =
new int[maxNumBlockEntries];
2999 for (i=0; i<NumPermuteIDs; i++) {
3000 FromBlockRow =
A.GRID64(PermuteFromLIDs[i]);
3001 ToBlockRow =
GRID64(PermuteToLIDs[i]);
3002 EPETRA_CHK_ERR(
A.ExtractGlobalBlockRowPointers(FromBlockRow, maxNumBlockEntries, RowDim, NumBlockEntries,
3003 BlockIndices, Entries));
3023 Entries[j]->LDA(), RowDim, Entries[j]->N());
3026 delete [] BlockIndices;
3049 double * DoubleExports = 0;
3050 int globalMaxNumNonzeros =
A.GlobalMaxNumNonzeros();
3051 int globalMaxNumBlockEntries =
A.GlobalMaxNumBlockEntries();
3053 int DoublePacketSize = globalMaxNumNonzeros +
3054 (((2*globalMaxNumBlockEntries+3)+(
int)
sizeof(int)-1)*(int)
sizeof(
int))/(
int)
sizeof(double);
3055 SizeOfPacket = DoublePacketSize * (int)
sizeof(
double);
3064 if (NumExportIDs<=0)
return(0);
3068 int NumBlockEntries;
3070 int RowDim, * ColDims;
3073 double * valptr, * dintptr;
3086 valptr = (
double *) Exports;
3087 dintptr = valptr + globalMaxNumNonzeros;
3088 intptr = (
int *) dintptr;
3089 bool NoSumInto =
false;
3090 for (i=0; i<NumExportIDs; i++) {
3091 FromBlockRow =
A.GRID64(ExportLIDs[i]);
3092 BlockIndices = intptr + 3;
3093 ColDims = BlockIndices + globalMaxNumBlockEntries;
3094 EPETRA_CHK_ERR(
A.BeginExtractGlobalBlockRowCopy(FromBlockRow, globalMaxNumBlockEntries, RowDim,
3095 NumBlockEntries, BlockIndices, ColDims));
3098 for (j=0; j<NumBlockEntries; j++) {
3099 int SizeOfValues = RowDim*ColDims[j];
3100 A.ExtractEntryCopy(SizeOfValues, Entries, RowDim, NoSumInto);
3101 Entries += SizeOfValues;
3104 intptr[0] = FromBlockRow;
3106 intptr[2] = NumBlockEntries;
3107 valptr += DoublePacketSize;
3108 dintptr = valptr + globalMaxNumNonzeros;
3109 intptr = (
int *) dintptr;
3130 if (NumImportIDs<=0)
return(0);
3132 if( CombineMode !=
Add
3133 && CombineMode !=
Zero
3134 && CombineMode !=
Insert )
3138 int NumBlockEntries;
3140 int RowDim, * ColDims;
3145 double * valptr, *dintptr;
3147 int globalMaxNumNonzeros =
A.GlobalMaxNumNonzeros();
3148 int globalMaxNumBlockEntries =
A.GlobalMaxNumBlockEntries();
3150 int DoublePacketSize = globalMaxNumNonzeros +
3151 (((2*globalMaxNumBlockEntries+3)+(
int)
sizeof(int)-1)*(int)
sizeof(
int))/(
int)
sizeof(double);
3163 valptr = (
double *) Imports;
3164 dintptr = valptr + globalMaxNumNonzeros;
3165 intptr = (
int *) dintptr;
3167 for (i=0; i<NumImportIDs; i++) {
3168 ToBlockRow =
GRID64(ImportLIDs[i]);
3169 assert((intptr[0])==ToBlockRow);
3171 assert((intptr[1])==RowDim);
3172 NumBlockEntries = intptr[2];
3173 BlockIndices = intptr + 3;
3174 ColDims = BlockIndices + globalMaxNumBlockEntries;
3175 if (CombineMode==
Add) {
3185 else if (CombineMode==
Insert) {
3197 for (j=0; j<NumBlockEntries; j++) {
3199 int ColDim = ColDims[j];
3201 values += (LDA*ColDim);
3204 valptr += DoublePacketSize;
3205 dintptr = valptr + globalMaxNumNonzeros;
3206 intptr = (
int *) dintptr;
3265 int * PtMyGlobalElements = 0;
3266 if (PtNumMyElements>0) PtMyGlobalElements =
new int[PtNumMyElements];
3271 for (
int i=0; i<NumMyElements; i++) {
3272 int StartID = BlockMap.
GID64(i)*MaxElementSize;
3274 for (
int j=0; j<ElementSize; j++) PtMyGlobalElements[curID++] = StartID+j;
3276 assert(curID==PtNumMyElements);
3280 if (PtNumMyElements>0)
delete [] PtMyGlobalElements;
3365 for (
int iproc=0; iproc < NumProc; iproc++) {
3373 os <<
"\nNumber of Global Rows = "; os <<
NumGlobalRows64(); os << std::endl;
3374 os <<
"Number of Global Cols = "; os <<
NumGlobalCols64(); os << std::endl;
3378 if (
LowerTriangular()) { os <<
" ** Matrix is Lower Triangular **"; os << std::endl; }
3379 if (
UpperTriangular()) { os <<
" ** Matrix is Upper Triangular **"; os << std::endl; }
3380 if (
NoDiagonal()) { os <<
" ** Matrix has no diagonal **"; os << std::endl; os << std::endl; }
3383 os <<
"\nNumber of My Block Rows = "; os <<
NumMyBlockRows(); os << std::endl;
3384 os <<
"Number of My Block Cols = "; os <<
NumMyBlockCols(); os << std::endl;
3386 os <<
"Number of My Blk Entries = "; os <<
NumMyBlockEntries(); os << std::endl;
3388 os <<
"\nNumber of My Rows = "; os <<
NumMyRows(); os << std::endl;
3389 os <<
"Number of My Cols = "; os <<
NumMyCols(); os << std::endl;
3390 os <<
"Number of My Diagonals = "; os <<
NumMyDiagonals(); os << std::endl;
3391 os <<
"Number of My Nonzeros = "; os <<
NumMyNonzeros(); os << std::endl;
3392 os <<
"My Maximum Num Entries = "; os <<
MaxNumBlockEntries(); os << std::endl; os << std::endl;
3403 {
for (
int iproc=0; iproc < NumProc; iproc++) {
3407 int * BlockIndices1 =
new int[MaxNumBlockEntries1];
3409 int RowDim1, NumBlockEntries1;
3414 os <<
" Processor ";
3416 os <<
" Block Row Index ";
3418 os <<
" Block Col Index \n";
3423 for (i=0; i<NumBlockRows1; i++) {
3424 int BlockRow1 =
GRID64(i);
3426 NumBlockEntries1, BlockIndices1,
3429 for (j = 0; j < NumBlockEntries1 ; j++) {
3431 os << MyPID ; os <<
" ";
3433 os << BlockRow1 ; os <<
" ";
3435 os << BlockIndices1[j]; os <<
" " << std::endl;
3438 if (Entries1[j] == 0) {
3439 os <<
"Block Entry == NULL"<< std::endl;
3444 RowDim1, Entries1[j]->N());
3445 os << entry_view; os <<
" ";
3450 delete [] BlockIndices1;
3464 #endif // EPETRA_NO_32BIT_GLOBAL_INDICES