00001
00006
00007
00008
00009 #ifndef TBCI_BAND_MATRIX_H
00010 #define TBCI_BAND_MATRIX_H
00011
00012
00013
00014 #include "basics.h"
00015 #include "vector.h"
00016 #include "matrix.h"
00017
00018
00019
00020 #ifndef BD_MINVAL
00021 # define BD_MINVAL 1e-16
00022 #endif
00023
00025 #ifndef BDMAT_NEW_ALLOC
00026 # define BDMAT_ONE_BLOCK
00027 #endif
00028
00029 #ifdef BDMAT_MEM_TRACE
00030 # define BDMATDBG(x) x
00031 #else
00032 # define BDMATDBG(x)
00033 #endif
00034
00035
00036 #if !defined(NO_GD) && !defined(AUTO_DECL)
00037 # include "band_matrix_gd.h"
00038 #endif
00039
00040 NAMESPACE_TBCI
00041
00042 #ifdef EXCEPT
00043
00044
00046 class BdMatrixErr : public NumErr
00047 {
00048 public:
00049 BdMatrixErr()
00050 : NumErr("Error in var conf Band_Matrix class") {}
00051 BdMatrixErr(const char* t, const long i = 0)
00052 : NumErr(t, i) {}
00053 BdMatrixErr(const BdMatrixErr& be)
00054 : NumErr(be) {}
00055 virtual ~BdMatrixErr() throw() {}
00056 };
00057 #endif
00058
00059 #ifdef PRAGMA_I
00060 # pragma interface "band_matrix.h"
00061 #endif
00062
00063
00064 template <typename T> class ILU0_BdMatrixPreconditioner;
00065 template <typename T> class DILU_BdMatrixPreconditioner;
00066
00102 template <typename T>
00103 class BdMatrix : public Matrix_Sig<T>
00104 {
00105 friend class ILU0_BdMatrixPreconditioner<T>;
00106 friend class DILU_BdMatrixPreconditioner<T>;
00107 protected:
00108
00109 unsigned int dim, maxoff;
00110 unsigned long arsz;
00111 BVector <unsigned> diagconf;
00112 BVector <T*> adiag, bdiag;
00113 T* diag;
00114
00115
00116
00117 void constructor(const unsigned int, const bool = true);
00118 T* check(const unsigned r, const unsigned c) const HOT;
00119 inline void free_diags(const unsigned);
00120 inline void do_copy(const BdMatrix<T>&);
00121
00122 private:
00123 mutable T dummy ALIGN(MIN_ALIGN);
00124 mutable int outw, outp, outf;
00125 mutable bool outset;
00126
00127 inline bool search2(unsigned int&, const unsigned int, const unsigned int) HOT;
00128
00129 public:
00130
00131 typedef T value_type;
00132 typedef T element_type;
00133 typedef T aligned_value_type TALIGN(MIN_ALIGN2);
00134
00135
00137 explicit BdMatrix(const unsigned int = 0);
00139 explicit BdMatrix(const unsigned int, const unsigned int);
00141 explicit BdMatrix(const unsigned int, const BVector<unsigned int>&);
00143 BdMatrix(const BdMatrix<T>&);
00145 BdMatrix(const T&, const unsigned int);
00146 BdMatrix(const T&, const unsigned int, const BVector<unsigned int>&);
00148 BdMatrix(const T&, const unsigned int, unsigned int);
00150 BdMatrix(const Matrix<T>&);
00151 ~BdMatrix();
00152
00153
00155 void destroy()
00156 {
00157 free_diags(diagconf.size());
00158 dim = 0; arsz = 0;
00159 adiag.resize(0); bdiag.resize(0); diagconf.resize(0);
00160 }
00161
00162
00163 static const char* mat_info()
00164 { return "BdMatrix"; }
00165
00166 #ifndef HAVE_PROMOTION_BUG
00167
00168 # ifndef HAVE_GCC295_TMPLFRNDCLS_BUG
00169 template <typename U> friend class BdMatrix;
00170 template <typename U> explicit BdMatrix(const BdMatrix<U>& bm)
00171 : diagconf(bm.diagconf), adiag(bm.dim), bdiag(bm.dim)
00172 {
00173 constructor(bm.dim);
00174 for (unsigned k=0; k<dim; ++k)
00175 diag[k] = bm.diag[k];
00176 for (unsigned i=0; i<diagconf.size(); ++i) {
00177 const unsigned di = diagconf(i);
00178 for (unsigned j=0; j<dim-di; ++j) {
00179 adiag(di)[j] = bm.adiag(di)[j];
00180 bdiag(di)[j] = bm.bdiag(di)[j];
00181 }
00182 }
00183 if (bm.outset) {
00184 outw = bm.outw; outp = bm.outp;
00185 outf = bm.outf; outset = true;
00186 }
00187 }
00188 # endif
00189 #endif
00190
00192 operator TMatrix<T>() const;
00193
00194
00196 T& operator() (const unsigned int, const unsigned int) HOT;
00198 typename tbci_traits<T>::const_refval_type
00199 operator() (const unsigned int, const unsigned int) const HOT;
00200
00202 BdMatrix<T>& setval(const T& v, const unsigned int r,
00203 const unsigned int c)
00204 { return autoinsert(v,r,c); }
00205 T& setval(const unsigned int r, const unsigned int c);
00206 typename tbci_traits<T>::const_refval_type
00207 get(const unsigned int i, const unsigned int j) const
00208 { return (*this)(i,j); }
00209
00210
00211
00212
00213
00215 BdMatrix<T>& transpose();
00216
00217 BdMatrix<T>& resize(const unsigned int);
00218 BdMatrix<T>& resize(const T&, const unsigned int);
00219 BdMatrix<T>& resize(const unsigned int, const BVector<unsigned int>&);
00220 BdMatrix<T>& resize(const T&, const unsigned int, const BVector<unsigned int>&);
00222 BdMatrix<T>& resize(const BdMatrix<T>& bdm)
00223 { resize(bdm.dim, bdm.diagconf); return *this = bdm; }
00224
00225 BdMatrix<T>& reconfig(const BVector<unsigned int>&) COLD;
00226 BdMatrix<T>& setunit(const T& = 1);
00227 BdMatrix<T>& fill(const T& = 0);
00228 BdMatrix<T>& clear();
00229 BdMatrix<T>& adddiag(const unsigned);
00230 BdMatrix<T>& removediag(const unsigned);
00231 BdMatrix<T>& autoinsert(const T&, const unsigned, const unsigned);
00232 BdMatrix<T>& expand(unsigned = 0);
00234 BdMatrix<T>& set_row(const Vector<T>& val, const unsigned, const unsigned = 0);
00236 TVector<T> get_row(const unsigned int) const;
00238 TVector<T> get_col(const unsigned int) const;
00239
00241 inline unsigned int size() const { return dim; }
00243 inline unsigned int noelem() const { return arsz; }
00245 inline unsigned int rows() const { return dim; }
00247 inline unsigned int columns() const { return dim; }
00248
00250 const BVector<unsigned int>& getcfg() const { return diagconf; }
00252 unsigned int getmaxoff() const { return maxoff; }
00253
00254 T trace() const
00255 {
00256 T r ALIGN(MIN_ALIGN) = 0;
00257 for (register unsigned i = 0; i < dim; ++i)
00258 r += diag[i];
00259 return r;
00260 }
00261
00262
00263
00264
00266 BdMatrix<T>& operator = (const BdMatrix<T>&);
00267 BdMatrix<T>& operator = (const T&);
00268 BdMatrix<T>& operator += (const BdMatrix<T>&);
00269 BdMatrix<T>& operator -= (const BdMatrix<T>&);
00270
00271 BdMatrix<T>& operator *= (const T&);
00272 BdMatrix<T>& operator /= (const T&);
00273
00274
00275 #ifdef BDMAT_TRANS_FRIENDS
00276 friend NOINST BdMatrix<T> transpose FGD (BdMatrix<T>&);
00277 #endif
00278
00279 bool operator == (const BdMatrix<T>&) const;
00280 bool operator != (const BdMatrix<T>& ma) const { return !(*this == ma); }
00281
00282 BdMatrix<T> operator + (const BdMatrix<T>&) const;
00283 BdMatrix<T> operator - (const BdMatrix<T>&) const;
00284 BdMatrix<T> operator * (const BdMatrix<T>&) const;
00285 BdMatrix<T> operator * (const T&) const;
00286
00287 #ifdef BDMAT_OP_FRIENDS
00288 friend NOINST BdMatrix<T> operator * FGD (const T&, const BdMatrix<T>&);
00289 #endif
00290
00291 BdMatrix<T> operator / (const T&) const;
00292 BdMatrix<T> operator - () const;
00293
00294
00295
00296
00297
00298
00300 TVector<T> operator * (const Vector<T>&) const HOT;
00302 TVector<T> transMult(const Vector<T>&) const HOT;
00303
00304
00305 TVector<T> dotMult(const Vector<T>&) const;
00306
00307 TMatrix<T> operator * (const Matrix<T>&) const;
00308
00310 BdMatrix<T>& mult_rows(const Vector<T>&);
00311 BdMatrix<T>& div_rows(const Vector<T>&);
00312 BdMatrix<T>& mult_row(const T&, const unsigned);
00313 BdMatrix<T>& div_row(const T&, const unsigned);
00314
00315
00316
00317 friend STD__ istream& operator >> FGD (STD__ istream&, BdMatrix<T>&);
00318 friend STD__ ostream& operator << FGD (STD__ ostream&, const BdMatrix<T>&);
00319 const BdMatrix<T>& setoutopts(int w = 5, int p = 4, char f= ' ') const
00320 { outw = w; outp = p; outf = f; outset = true; return *this; }
00321
00322
00323 friend NOINST void gaussj FGDT (const BdMatrix<T>&,
00324 const Matrix<T>&);
00325 friend NOINST int lu_decomp FGDT (BdMatrix<T>&) HOT;
00326
00327
00328 BdMatrix<T> inverse() const;
00329
00330
00331 friend void FRIEND_TBCI2__ do_bdmat_vec_mult FGDT (const unsigned, const unsigned,
00332 TVector<T> *, const BdMatrix<T> *, const Vector<T> *);
00333 friend void FRIEND_TBCI2__ do_bdmat_vec_transmult FGDT (const unsigned, const unsigned,
00334 TVector<T> *, const BdMatrix<T> *, const Vector<T> *);
00335 friend void FRIEND_TBCI2__ do_bdmat_vec_dotmult FGDT (const unsigned, const unsigned,
00336 TVector<T> *, const BdMatrix<T> *, const Vector<T> *);
00337 };
00338
00339
00341 #ifdef BDMAT_ONE_BLOCK
00342
00343 template <typename T>
00344 inline void BdMatrix<T>::free_diags(const unsigned dummy)
00345 {
00346 BDMATDBG(STD__ cerr << "BdMat free " << diag << " (" << dim << ")" << STD__ endl);
00347 if (dim)
00348 TBCIDELETE(T, diag, arsz);
00349 }
00350
00351 template <typename T>
00352 inline void BdMatrix<T>::do_copy(const BdMatrix<T>& bdm)
00353 {
00354 TBCICOPY(diag, bdm.diag, T, arsz);
00355
00356 for (unsigned i = 0; i < diagconf.size(); ++i) {
00357 const unsigned di = diagconf(i);
00358 adiag(di) = (T*)((long)diag + (long)bdm.adiag(di) - (long)bdm.diag);
00359 bdiag(di) = (T*)((long)diag + (long)bdm.bdiag(di) - (long)bdm.diag);
00360 }
00361 }
00362
00363 template <typename T>
00364 void BdMatrix<T>::constructor(const unsigned int d, const bool delout)
00365 {
00366 dim = d; maxoff = 0;
00367
00368 BCHKNR(dim > 0 && maxoff >= dim, BdMatrixErr, Wrong config Vector, maxoff);
00369 adiag.resize((T*)0, d); bdiag.resize((T*)0, d);
00370 unsigned long sz = dim; int t;
00371 const unsigned long dcs = diagconf.size();
00372 for (register unsigned j = 0; j < dcs; ++j)
00373 if ((t = ((signed)dim - (signed)diagconf(j))) > 0)
00374 sz += 2 * t;
00375 if (UNLIKELY(sz)) {
00376 diag = NEW(T, sz);
00377 if (LIKELY(diag)) {
00378 adiag(0) = diag; bdiag(0) = diag; sz = dim;
00379 for (unsigned i = 0; i < dcs; ++i) {
00380 const unsigned int di = diagconf(i);
00381 int s = (signed)dim - (signed)di;
00382 if (UNLIKELY(s <= 0 || s >= (signed)dim)) {
00383 STD__ cerr << "Wrong config. Vector supplied!" << STD__ endl;
00384 s = 0;
00385 } else {
00386 adiag(di) = diag+sz; sz += s;
00387 bdiag(di) = diag+sz; sz += s;
00388 }
00389 if (LIKELY(di > maxoff))
00390 maxoff = di;
00391 }
00392 } else {
00393 adiag.resize(0); bdiag.resize(0); dim = 0;
00394 STD__ cerr << "BdMat ctor: malloc failed to get " << sz << " bytes!" << STD__ endl;
00395 BCHKNR(!diag, BdMatrixErr, memory alloc failure, sz);
00396 sz = 0;
00397 }
00398 }
00399 else
00400 diag = (T*)0;
00401
00402 arsz = sz; dummy = (T)0;
00403 BDMATDBG(STD__ cerr << "BdMat alloc " << diag << " (" << sz << ")" << STD__ endl);
00404 if (delout)
00405 outset = false;
00406 }
00407
00408 #else
00409
00410
00411 template <typename T>
00412 inline void BdMatrix<T>::free_diags(const unsigned u)
00413 {
00414 for (int i = u-1; i >= 0; --i) {
00415
00416 register const unsigned di = diagconf(i);
00417 register T* ptr = MIN(adiag(di), bdiag(di));
00418 BDMATDBG(STD__ cerr << "BdMat free " << ptr << STD__ endl);
00419 TBCIDELETE(T, ptr, 2*(dim-di));
00420 }
00421 BDMATDBG(STD__ cerr << "BdMat free " << diag << " (" << dim << ")" << STD__ endl);
00422 if (LIKELY(dim))
00423 TBCIDELETE(T, diag, dim);
00424 }
00425
00426 template <typename T>
00427 inline void BdMatrix<T>::do_copy(const BdMatrix<T>& bdm)
00428 {
00429 TBCICOPY(diag, bdm.diag, T, dim);
00430 for (unsigned i = 0; i < diagconf.size(); ++i) {
00431 register const unsigned di = diagconf(i);
00432 TBCICOPY(adiag(di), bdm.adiag(di), T, (dim-di));
00433 TBCICOPY(bdiag(di), bdm.bdiag(di), T, (dim-di));
00434 }
00435 }
00436
00437 template <typename T>
00438 void BdMatrix<T>::constructor(const unsigned int d, const bool delout)
00439 {
00440 unsigned long sz = 0; unsigned i = 0;
00441 dim = d; maxoff = 0;
00442
00443 BCHKNR (dim > 0 && maxoff >= dim, BdMatrixErr, Wrong config Vector, maxoff);
00444
00445 adiag.resize((T*)0, d);
00446 bdiag.resize((T*)0, d);
00447
00448 if (LIKELY(!dim))
00449 diag = (T*)0;
00450 else {
00451 diag = NEW(T,dim);
00452 if (UNLIKELY(!diag))
00453 goto memallocfail;
00454 sz += dim;
00455 adiag(0) = diag; bdiag(0) = diag;
00456 }
00457
00458 while (i < diagconf.size()) {
00459 const unsigned di = diagconf(i);
00460 int s = (signed)dim - (signed)di;
00461 if (UNLIKELY(s <= 0 || s >= (signed)dim)) {
00462 BCHKNR(true, BdMatrixErr, Illegal Config BVector, di);
00463
00464 diagconf.remove(i);
00465 } else {
00466 T* ptr = NEW(T, 2*s);
00467 if (UNLIKELY(!ptr))
00468 goto memallocfail;
00469 adiag(di) = ptr;
00470 bdiag(di) = ptr+s;
00471 if (LIKELY(di > maxoff))
00472 maxoff = di;
00473 sz += 2*s; ++i;
00474 }
00475 }
00476 arsz = sz; dummy = (T)0;
00477 if (delout)
00478 outset = false;
00479 BDMATDBG(STD__ cerr << "BdMat alloc " << diag << " (" << sz << ")" << STD__ endl);
00480 return;
00481 memallocfail:
00482 free_diags(i);
00483 adiag.resize(0); bdiag.resize(0); dim = 0;
00484 STD__ cerr << "BdMat ctor: mem alloc (" << sz << ") failed!" << STD__ endl;
00485 BCHKNR(1, BdMatrixErr, mem alloc failure, sz);
00486 return;
00487 }
00488
00489 #endif
00490
00491 template <typename T>
00492 inline BdMatrix<T>::BdMatrix(const unsigned int d)
00493 : diagconf(1, 1), adiag(d), bdiag(d)
00494 {
00495 if (UNLIKELY(d <= 1))
00496 diagconf.resize(0);
00497 constructor(d); clear();
00498 }
00499
00500 template <typename T>
00501 inline BdMatrix<T>::BdMatrix(const unsigned int d, const unsigned int e)
00502 : diagconf(1, 1), adiag(d), bdiag(d)
00503 {
00504 BCHKNR(e != d, BdMatrixErr, "Non square BdMatrix shape impossible!", e);
00505 if (UNLIKELY(d <= 1))
00506 diagconf.resize(0);
00507 constructor(d); clear();
00508 }
00509
00510 template <typename T>
00511 inline BdMatrix<T>::BdMatrix(const unsigned int d, const BVector<unsigned int>& conf)
00512 : diagconf(conf), adiag(d), bdiag(d)
00513 {
00514 constructor(d); clear();
00515 }
00516
00517
00518 template <typename T>
00519 inline BdMatrix<T>::BdMatrix(const BdMatrix<T>& mat)
00520 : diagconf(mat.diagconf), adiag(mat.dim), bdiag(mat.dim)
00521 {
00522 constructor(mat.dim); do_copy(mat);
00523
00524 if (UNLIKELY(mat.outset)) {
00525 outw = mat.outw; outp = mat.outp;
00526 outf = mat.outf; outset = true;
00527 }
00528 }
00529
00530 template <typename T>
00531 inline BdMatrix<T>::BdMatrix(const T& val, const unsigned int d)
00532 : diagconf(1, 1), adiag(d), bdiag(d)
00533 {
00534 if (UNLIKELY(d <= 1))
00535 diagconf.resize(0);
00536 constructor(d);
00537 setunit(val);
00538 }
00539
00540 template <typename T>
00541 inline BdMatrix<T>::BdMatrix(const T& val, const unsigned int d, unsigned int mo)
00542 : diagconf(mo), adiag(d), bdiag(d)
00543 {
00544 if (mo >= d)
00545 mo = d - 1;
00546 diagconf.resize(mo);
00547 for (register unsigned int t = 0; t < mo; ++t)
00548 diagconf(t) = t+1;
00549
00550 constructor(d);
00551 setunit(val);
00552 }
00553
00554
00555 template <typename T>
00556 inline BdMatrix<T>::BdMatrix(const T& val, const unsigned int d,
00557 const BVector<unsigned int>& conf)
00558 : diagconf(conf), adiag(d), bdiag(d)
00559 {
00560 constructor(d);
00561 setunit(val);
00562 }
00563
00564
00565 template <typename T>
00566 BdMatrix<T>::BdMatrix(const Matrix<T>& mat)
00567 : diagconf(0, 0), adiag(mat.rows()), bdiag(mat.columns())
00568 {
00569 BCHKNR(mat.columns() != mat.rows(), BdMatrixErr, Only quadratic matrices, 0);
00570
00571
00572 unsigned int ds = 0;
00573 for (unsigned int l = 1; l < mat.columns(); ++l) {
00574 for (unsigned register j = 0;
00575 (signed)j < ((signed)(mat.rows() - l)); ++j) {
00576 if (mat(j, j+l) != T(0) || mat(j+l, j) != T(0)) {
00577 diagconf.resize(++ds);
00578 diagconf(ds - 1) = l;
00579 break;
00580 }
00581 }
00582 }
00583
00584
00585 constructor(mat.columns());
00586
00587
00588 for (register unsigned int k = 0; k < dim; ++k)
00589 diag[k] = mat(k, k);
00590
00591 for (unsigned int i = 0; i < diagconf.size(); ++i) {
00592 const unsigned int dci = diagconf(i);
00593 for (register unsigned int j = 0; j < dim - dci; ++j) {
00594 adiag(dci)[j] = mat(j, j + dci);
00595 bdiag(dci)[j] = mat(dci + j, j);
00596 }
00597 }
00598 }
00599
00600 template <typename T>
00601 inline BdMatrix<T>::~BdMatrix()
00602 {
00603 BCHKNR(dummy!=(T)0, BdMatrixErr, dummy not zero, 0);
00604 free_diags(diagconf.size());
00605 }
00606
00607
00608 template <typename T>
00609 inline bool BdMatrix<T>::search2(unsigned int& cfg, const unsigned int i,
00610 const unsigned int j)
00611 {
00612 for (register int t = 0; (unsigned)t < diagconf.size(); ++t) {
00613 if (UNLIKELY(CSTD__ abs((int)(j - i)) == (int)diagconf(t))) {
00614 cfg = t;
00615 return true;
00616 }
00617 }
00618 return false;
00619 }
00620
00621
00622 template <typename T>
00623 inline T* BdMatrix<T>::check(const unsigned i, const unsigned j) const
00624 {
00625 BCHK(i<0 || i>=dim, BdMatrixErr, Invalid row index, i, (T*)0);
00626 BCHK(j<0 || j>=dim, BdMatrixErr, Invalid col index, j, (T*)0);
00627
00628 const int df = i-j;
00629 if (LIKELY(!df))
00630 return diag+i;
00631 if (CSTD__ abs(df) > (int)maxoff)
00632 return (T*)0;
00633
00634 T* ptr;
00635 if (df > 0) {
00636 if (LIKELY(ptr = bdiag( df)))
00637 return ptr+j;
00638 else
00639 return (T*)0;
00640 } else {
00641 if (LIKELY(ptr = adiag(-df)))
00642 return ptr+i;
00643 else
00644 return (T*)0;
00645 }
00646 }
00647
00648
00649 template <typename T>
00650 inline T& BdMatrix<T>::operator() (const unsigned int r, const unsigned int c)
00651 {
00652 register T* pt = check(r, c);
00653 if (LIKELY(pt))
00654 return *pt;
00655 else {
00656 BCHK(dummy != (T)0, BdMatrixErr, dummy not zero, 0, (dummy=(T)0));
00657 return (dummy = (T)0);
00658 }
00659 }
00660
00661 template <typename T>
00662 inline typename tbci_traits<T>::const_refval_type
00663 BdMatrix<T>::operator() (const unsigned int r, const unsigned int c) const
00664 {
00665 register T* pt = check(r, c);
00666 if (LIKELY(pt))
00667 return *pt;
00668 else {
00669 BCHK(dummy != (T)0, BdMatrixErr, dummy not zero, 0, (dummy=(T)0));
00670 return (dummy = (T)0);
00671 }
00672 }
00673
00674
00675 template <typename T>
00676 T& BdMatrix<T>::setval(const unsigned int r, const unsigned int c)
00677 {
00678
00679 register T* pt = check(r, c);
00680 if (LIKELY(pt))
00681 return *pt;
00682 else {
00683 adddiag(CSTD__ abs((int)(r-c)));
00684 pt = check(r, c);
00685 BCHK(!pt, BdMatrixErr, Internal autoinsert error, 0, dummy=(T)0);
00686
00687 }
00688 return *pt;
00689 }
00690
00691
00692 #ifdef BDMAT_ONE_BLOCK
00693 template <typename T>
00694 inline BdMatrix<T>& BdMatrix<T>::setunit(const T& val)
00695 {
00696 TBCIFILL(diag, val, T, dim);
00697 if (arsz > dim)
00698 TBCIFILL((diag+dim), (T)0, T, (arsz-dim));
00699 dummy = (T)0;
00700 return *this;
00701 }
00702 #else
00703 template <typename T>
00704 inline BdMatrix<T>& BdMatrix<T>::setunit(const T& val)
00705 {
00706 TBCIFILL(diag, val, T, dim);
00707 for (unsigned i = 0; i < diagconf.size(); ++i) {
00708 const register unsigned di = diagconf(i);
00709 TBCIFILL(adiag(di), (T)0, T, (dim-di));
00710 TBCIFILL(bdiag(di), (T)0, T, (dim-di));
00711 }
00712 dummy = (T)0;
00713 return *this;
00714 }
00715 #endif
00716
00717 template <typename T>
00718 BdMatrix<T>& BdMatrix<T>::resize(const unsigned int nd)
00719 {
00720 if (nd == dim)
00721 return *this;
00722
00723 T* oldd(diag); unsigned int od = dim;
00724 #ifdef BDMAT_ONE_BLOCK
00725 unsigned int oldar = arsz;
00726 #endif
00727 BVector<T*> oldad(adiag), oldbd(bdiag);
00728 BVector<unsigned> oldconf(diagconf);
00729
00730
00731 if (od <= 1 && nd > 1)
00732 diagconf.resize(1, 1);
00733 constructor(nd, false);
00734 int min = MIN(nd, od);
00735
00736 if (min) {
00737
00738 TBCICOPY(diag, oldd, T, min);
00739 if (nd > od)
00740 TBCIFILL((diag+min), (T)0, T, (nd-dim));
00741
00742 for (unsigned int t = 0; t < diagconf.size(); ++t) {
00743 const register int s = diagconf(t);
00744 if (min - s > 0) {
00745 TBCICOPY(adiag(s), oldad(s), T, (min-s));
00746 TBCICOPY(bdiag(s), oldbd(s), T, (min-s));
00747 if (nd > od) {
00748 TBCIFILL((adiag(s)+(min-s)), (T)0, T, (nd-dim));
00749 TBCIFILL((bdiag(s)+(min-s)), (T)0, T, (nd-dim));
00750 }
00751 }
00752 }
00753 }
00754
00755 #ifndef BDMAT_ONE_BLOCK
00756 for (unsigned i = 0; i < oldconf.size(); ++i)
00757 TBCIDELETE(T, MIN(oldad(oldconf(i)), oldbd(oldconf(i))),
00758 2*(od-oldconf(i)));
00759 if (od)
00760 TBCIDELETE(T, oldd, od);
00761 #else
00762 if (oldar)
00763 TBCIDELETE(T, oldd, oldar);
00764 #endif
00765 return *this;
00766 }
00767
00768
00769 template <typename T>
00770 BdMatrix<T>& BdMatrix<T>::resize(const T& val, const unsigned int nd)
00771 {
00772 if (nd != dim) {
00773 if (dim)
00774 free_diags(diagconf.size());
00775
00776 if (dim <= 1 && nd > 1)
00777 diagconf.resize(1, 1);
00778 constructor(nd, false);
00779 }
00780 setunit(val);
00781 return *this;
00782 }
00783
00784
00785
00786 template <typename T>
00787 BdMatrix<T>& BdMatrix<T>::resize(const unsigned int nd, const BVector<unsigned int>& cfg)
00788 {
00789 if (nd == dim && cfg == diagconf)
00790 return *this;
00791 if (dim)
00792 free_diags(diagconf.size());
00793 diagconf.copy(cfg);
00794 constructor(nd, false);
00795 clear();
00796 return *this;
00797 }
00798
00799
00800 template <typename T>
00801 BdMatrix<T>& BdMatrix<T>::resize(const T& val, const unsigned int nd,
00802 const BVector<unsigned int>& cfg)
00803 {
00804 resize(nd, cfg);
00805
00806
00807 for (unsigned i = 0; i < dim; ++i)
00808 diag[i] = val;
00809 return *this;
00810 }
00811
00812
00813
00814 template <typename T>
00815 BdMatrix<T>& BdMatrix<T>::reconfig(const BVector<unsigned int>& cfg)
00816 {
00817 if (dim == 0 || cfg == diagconf)
00818 return *this;
00819 T* oldd = diag;
00820 #ifdef BDMAT_ONE_BLOCK
00821 unsigned int oldar = arsz;
00822 #else
00823 unsigned int olddim = dim;
00824 #endif
00825 BVector<T*> oldad(adiag), oldbd(bdiag);
00826 BVector<unsigned> oldconf(diagconf);
00827
00828 diagconf.resize(cfg);
00829 constructor(dim, false);
00830
00831 clear();
00832
00833 TBCICOPY(diag, oldd, T, dim);
00834
00835 for (register unsigned t = 0; t < diagconf.size(); ++t) {
00836 const unsigned int dct = diagconf(t);
00837 if (oldad(dct)) {
00838 TBCICOPY(adiag(dct), oldad(dct), T, (dim-dct));
00839 TBCICOPY(bdiag(dct), oldbd(dct), T, (dim-dct));
00840 }
00841 }
00842 #ifndef BDMAT_ONE_BLOCK
00843 for (unsigned i = 0; i < oldconf.size(); ++i)
00844 TBCIDELETE(T, MIN(oldad(oldconf(i)), oldbd(oldconf(i))),
00845 2*(olddim-oldconf(i)));
00846 if (oldd)
00847 TBCIDELETE(T, oldd, olddim);
00848 #else
00849 if (oldar)
00850 TBCIDELETE(T, oldd, oldar);
00851 #endif
00852 return *this;
00853 }
00854
00855 #ifdef BDMAT_ONE_BLOCK
00856 template <typename T>
00857 BdMatrix<T>& BdMatrix<T>::adddiag(const unsigned ndg)
00858 {
00859 const int newsz = dim - ndg;
00860 BCHK(newsz <= 0, BdMatrixErr, Diag out of range, ndg, *this);
00861
00862 if (LIKELY(diagconf.contains(ndg)))
00863 return *this;
00864
00865 T* olddiag = diag;
00866 REALLOC(diag, arsz, T, (arsz + 2 * newsz));
00867 if (UNLIKELY(!diag)) {
00868 diag = olddiag;
00869 STD__ cerr << "Allocation error" << STD__ endl;
00870 return *this;
00871 }
00872 BDMATDBG(STD__ cerr << "BdMat realloc " << diag << " ("
00873 << arsz+2*newsz << ")" << STD__ endl);
00874
00875
00876
00877
00878
00879
00880
00881
00882 long diff = (long)diag - (long)olddiag;
00883 if (LIKELY(diff)) {
00884
00885
00886
00887 adiag(0) = (T*)((long)adiag(0) + diff);
00888 bdiag(0) = (T*)((long)bdiag(0) + diff);
00889 const unsigned dcs = diagconf.size();
00890 for (unsigned i = 0; i < dcs; ++i) {
00891 const unsigned di = diagconf(i);
00892 adiag(di) = (T*)((long)adiag(di) + diff);
00893 bdiag(di) = (T*)((long)bdiag(di) + diff);
00894 }
00895
00896 }
00897
00898 BCHKNR(adiag(0) != diag || bdiag(0) != diag, BdMatrixErr, adddiag: mismatch while doing pointer arithmetic, diag - adiag(0));
00899
00900 diagconf.append(ndg);
00901 adiag(ndg) = diag+arsz; bdiag(ndg) = diag+arsz+newsz;
00902 TBCIFILL((diag+arsz), (T)0, T, (unsigned)(2*newsz));
00903
00904 if (ndg > maxoff)
00905 maxoff = ndg;
00906 arsz += 2 * newsz;
00907
00908 return *this;
00909 }
00910
00911 #else
00912
00913 template <typename T>
00914 BdMatrix<T>& BdMatrix<T>::adddiag(const unsigned ndg)
00915 {
00916 const int newsz = dim - ndg;
00917 BCHK(newsz <= 0, BdMatrixErr, Diag out of range, ndg, *this);
00918
00919 if (LIKELY(diagconf.contains(ndg)))
00920 return *this;
00921 T* ptr = NEW(T, 2*newsz);
00922 if (UNLIKELY(!ptr)) {
00923 BCHK(1, BdMatrixErr, Allocation failed in adddiag, 2*newsz*sizeof(T), *this);
00924 STD__ cerr << "BdMat adddiag allocation failed!" << STD__ endl;
00925 return *this;
00926 }
00927 adiag(ndg) = ptr; bdiag(ndg) = ptr+newsz;
00928 TBCIFILL(ptr, (T)0, T, (2*newsz));
00929 diagconf.append(ndg);
00930
00931 if (ndg > maxoff)
00932 maxoff = ndg;
00933 arsz += 2 * newsz;
00934 BDMATDBG(STD__ cerr << "BdMat adddiag " << ptr << " (" << arsz << ")" << STD__ endl);
00935 return *this;
00936 }
00937
00938 #endif
00939
00940
00941 template <typename T>
00942 BdMatrix<T>& BdMatrix<T>::removediag(const unsigned dg)
00943 {
00944 if (dg > maxoff)
00945 return *this;
00946 #ifdef BDMAT_ONE_BLOCK
00947 TBCIFILL(adiag(dg), (T)0, T, (dim-dg));
00948 TBCIFILL(bdiag(dg), (T)0, T, (dim-dg));
00949 #else
00950 if (adiag(dg)) {
00951 BDMATDBG(STD__ cerr << "BdMat rmvdiag "
00952 << MIN(adiag(dg), bdiag(dg)) << STD__ endl);
00953 TBCIDELETE(T, MIN(adiag(dg),bdiag(dg)), 2*(dim-dg));
00954 adiag(dg) = (T*)0; bdiag(dg) = (T*)0;
00955 unsigned ix; diagconf.contains(dg, &ix);
00956 diagconf.remove(ix); arsz -= 2*(dim-dg);
00957 if (maxoff == dg)
00958 maxoff = diagconf.max();
00959 }
00960 #endif
00961 return *this;
00962 }
00963
00964 template <typename T>
00965 BdMatrix<T>& BdMatrix<T>::autoinsert(const T& val, const unsigned r, const unsigned c)
00966 {
00967 register T* pt = check(r, c);
00968 if (LIKELY(pt)) {
00969 *pt = val;
00970 return *this;
00971 } else {
00972 if (LIKELY(MATH__ fabs(val) >= BD_MINVAL)) {
00973 adddiag(CSTD__ abs((int)(r-c)));
00974 pt = check(r, c);
00975 BCHK(!pt, BdMatrixErr, Internal autoinsert error, 0, *this);
00976 *pt = val;
00977 }
00978 }
00979 return *this;
00980 }
00981
00982 template <typename T>
00983 BdMatrix<T>& BdMatrix<T>::set_row(const Vector<T>& val, const unsigned r, const unsigned s)
00984 {
00985 for (unsigned i=0; i<val.size(); ++i)
00986 autoinsert(val(i), r, i+s);
00987 return *this;
00988 }
00989
00990 template <typename T>
00991 TVector<T> BdMatrix<T>::get_row(const unsigned int r) const
00992 {
00993 TVector<T> ret(0, dim);
00994 BCHK(r >= dim, BdMatrixErr, get_row idx out of bounds, r, ret);
00995 ret.set(diag[r], r);
00996 for (unsigned i = 0; i < diagconf.size(); ++i) {
00997 const register unsigned di = diagconf(i);
00998 if (LIKELY(di < dim-r))
00999 ret.set(adiag(di)[r] , r+di);
01000 if (LIKELY(r >= di))
01001 ret.set(bdiag(di)[r-di], r-di);
01002 }
01003 return ret;
01004 }
01005
01006 template <typename T>
01007 TVector<T> BdMatrix<T>::get_col(const unsigned int c) const
01008 {
01009 TVector<T> ret(0, dim);
01010 BCHK(c >= dim, BdMatrixErr, get_col idx out of bounds, c, ret);
01011 ret.set(diag[c], c);
01012 for (unsigned i = 0; i < diagconf.size(); i++) {
01013 const register unsigned di = diagconf(i);
01014 if (LIKELY(di < dim-c))
01015 ret.set(bdiag(di)[c] , c+di);
01016 if (LIKELY(c >= di))
01017 ret.set(adiag(di)[c-di], c-di);
01018 }
01019 return ret;
01020 }
01021
01022 #ifdef BDMAT_ONE_BLOCK
01023 template <typename T>
01024 inline BdMatrix<T>& BdMatrix<T>::fill(const T& val)
01025 {
01026 TBCIFILL(diag, val, T, arsz);
01027 return *this;
01028 }
01029
01030 template <typename T>
01031 inline BdMatrix<T>& BdMatrix<T>::clear()
01032 {
01033 TBCIFILL(diag, (T)0, T, arsz);
01034 return *this;
01035 }
01036
01037 #else
01038
01039 template <typename T>
01040 inline BdMatrix<T>& BdMatrix<T>::fill(const T& val)
01041 {
01042 TBCIFILL(diag, val, T, dim);
01043 for (unsigned i = 0; i < diagconf.size(); ++i) {
01044 const register unsigned di = diagconf(i);
01045 TBCIFILL(adiag(di), val, T, (dim-di));
01046 TBCIFILL(bdiag(di), val, T, (dim-di));
01047 }
01048 return *this;
01049 }
01050
01051 template <typename T>
01052 inline BdMatrix<T>& BdMatrix<T>::clear()
01053 {
01054 TBCIFILL(diag, (T)0, T, dim);
01055 for (unsigned i = 0; i < diagconf.size(); ++i) {
01056 const register unsigned di = diagconf(i);
01057 TBCIFILL(adiag(di), (T)0, T, (dim-di));
01058 TBCIFILL(bdiag(di), (T)0, T, (dim-di));
01059 }
01060 return *this;
01061 }
01062 #endif
01063
01064
01065 template <typename T>
01066 BdMatrix<T>& BdMatrix<T>::expand(unsigned m)
01067 {
01068 if (UNLIKELY(m == 0))
01069 m = maxoff;
01070 BCHK(m>dim, BdMatrixErr, Wrong expansion size, m, *this);
01071 if (LIKELY(maxoff == m && arsz == (dim + 2*dim*m - m*(m+1))))
01072 return *this;
01073 #ifdef BDMAT_ONE_BLOCK
01074 BVector<unsigned> newconf(m);
01075 for (unsigned i = 0; i < m; ++i)
01076 newconf(i) = i+1;
01077 return reconfig(newconf);
01078 #else
01079 for (unsigned i=1; i<=m; ++i)
01080 adddiag(i);
01081 return *this;
01082 #endif
01083 }
01084
01085
01086
01087 #if 0
01088 template <typename T>
01089 T BdMatrix<T>::sdet(unsigned int level, BVector<unsigned int>& arow,
01090 BVector<unsigned int>& acol)
01091 {
01092 T el;
01093 int sz = arow.size();
01094
01095
01096 if (sz == 1)
01097 return this->operator() (arow(0), acol(0));
01098 if (sz == 2) {
01099 el = (this->operator() (arow(0), acol(0)) \
01100 * this->operator() (arow(1), acol(1)) \
01101 - this->operator() (arow(0), acol(1)) \
01102 * this->operator() (arow(1), acol(0)));
01103 return el;
01104 }
01105 T tmp = 0;
01106
01107 BVector<unsigned int> ar(sz-1); BVector<unsigned int> ac(sz-1);
01108 int r = bestrow(level++);
01109 int rw = 0; int mul;
01110
01111 for (register int i = 0; i < sz; i++) {
01112 if (arow(i) == r) {
01113 mul = (i%2? -1: 1); rw--;
01114 } else
01115 ar(rw) = arow(i);
01116 rw++;
01117 }
01118 for (register int j = 0; j < sz; j++) {
01119 el = this->operator() (r, acol(j));
01120 if (MATH__ fabs(el) < BD_MINVAL)
01121 continue;
01122
01123 #if 0
01124 int cl = 0;
01125 for (register int t = 0; t < sz; t++) {
01126 if (t == j)
01127 cl--;
01128 else
01129 ac(cl) = acol(t);
01130 cl++;
01131 }
01132 #else
01133 TBCICOPY(&(ac(0)), &(acol(0)), int, j);
01134 TBCICOPY(&(ac(j)), &(acol(j+1)), int, (sz-j-1));
01135 #endif
01136
01137 tmp += (j%2? -1: 1) * sdet(level, ar, ac) * el;
01138 }
01139 return tmp * mul;
01140 }
01141
01142 template <typename T>
01143 T BdMatrix<T>::subdet(BVector<unsigned int> ri, BVector<unsigned int> ci)
01144 {
01145 if (ri.size() != ci.size()) {
01146 #ifdef EXCEPT
01147 throw (BdMatrixErr("Unequal # of lines and rows to ignore!"));
01148 #else
01149 STD__ cerr << "Unequal # of lines and rows to ignore!" << STD__ endl;
01150 return 0;
01151 #endif
01152 }
01153
01154 BVector<unsigned int> actrow(dim - ri.size());
01155 BVector<unsigned int> actcol(dim - ci.size());
01156 BVector<unsigned int> rowfill(dim);
01157 bestrow.resize(dim);
01158
01159 unsigned int rw = 0; unsigned int cl = 0;
01160 for (register int j = 0; j < dim; j++) {
01161 if (ri.contains (j))
01162 rw--;
01163 else
01164 actrow(rw) = j;
01165 if (ci.contains(j))
01166 cl--;
01167 else
01168 actcol(cl) = j;
01169 rw++; cl++;
01170 }
01171
01172
01173
01174
01175 matmem<T>* el = offdiag.setfirst(); int elemi = (el? el->row: -1);
01176
01177 for (int i = 0; i < dim; i++) {
01178 if (!(actrow.contains(i))) {
01179 rowfill(i) = dim + 1; continue;
01180 }
01181
01182 int fill = 0;
01183 if (actcol.contains(i))
01184 fill++;
01185 if (actcol.contains(i+1) && i+1 < dim)
01186 fill++;
01187 if (actcol.contains(i-1) && i-1 >= 0)
01188 fill++;
01189
01190 for (register int s = 0; s < diagconf.size(); s++) {
01191 if (actcol.contains(i+diagconf(s)) && i+diagconf(s) < dim)
01192 fill++;
01193 if (actcol.contains(i-diagconf(s)) && i-diagconf(s) >= 0)
01194 fill++;
01195 }
01196
01197 while (el && elemi == i) {
01198 if (actcol.contains(el->col))
01199 fill++;
01200 el = offdiag.setnext(); elemi = (el? el->row: -1);
01201 }
01202
01203 rowfill(i) = fill;
01204 }
01205
01206
01207 for (int k = 0; k < dim; k++) {
01208 int min = dim + 2; int minr = -1;
01209 for (int s = 0; s < dim; s++)
01210 if (rowfill(s) < min) {
01211 min = rowfill(s); minr = s;
01212 }
01213 bestrow(k) = minr;
01214 if (minr >= 0)
01215 rowfill(minr) = dim + 1;
01216 }
01217 return sdet(0, actrow, actcol);
01218 }
01219 #endif
01220
01221
01222 template <typename T>
01223 BdMatrix<T>& BdMatrix<T>::operator = (const BdMatrix<T>& mat)
01224 {
01225 BCHK(mat.dim != dim, BdMatrixErr, Assignment from wrong size Matrix, mat.dim, *this);
01226 if (diagconf != mat.diagconf) {
01227 #ifdef BOUNDCHECK
01228 STD__ cout << "old:" << mat.diagconf << "\nnew:" << diagconf << STD__ endl;
01229 STD__ cout << "Warning: Automatic reconfiguring BdMatrix in assignment!" << STD__ endl;
01230 #endif
01231 free_diags(diagconf.size());
01232 diagconf.resize(mat.diagconf.size());
01233 diagconf = mat.diagconf;
01234 constructor(dim);
01235 }
01236
01237 dummy = mat.dummy;
01238 do_copy(mat);
01239 return *this;
01240 }
01241
01242
01243
01244 template <typename T>
01245 inline BdMatrix<T>& BdMatrix<T>::operator = (const T& val)
01246 {
01247 setunit(val); return *this;
01248 }
01249
01250
01251
01252 #define SFORALL_S(op) \
01253 template <typename T> \
01254 BdMatrix<T>& BdMatrix<T>::operator op (const BdMatrix<T>& mat) \
01255 { \
01256 BCHK(mat.dim != dim, BdMatrixErr, Adding wrong size BdMatrix, mat.dim, *this); \
01257 unsigned i, j; \
01258 for (i = 0; i < dim; ++i) \
01259 diag[i] op mat.diag[i]; \
01260 for (j = 0; j < diagconf.size(); ++j) { \
01261 const register unsigned di = diagconf(j); \
01262 if (LIKELY(mat.adiag(di))) \
01263 for (i = 0; i < (dim-di); ++i) { \
01264 adiag(di)[i] op mat.adiag(di)[i]; \
01265 bdiag(di)[i] op mat.bdiag(di)[i]; \
01266 } \
01267 } \
01268 if (diagconf == mat.diagconf) \
01269 return *this; \
01270 for (j = 0; j < mat.diagconf.size(); ++j) { \
01271 const register unsigned di = mat.diagconf(j); \
01272 if (!diagconf.contains(di)) { \
01273 adddiag(di); \
01274 for (i = 0; i < (dim-di); ++i) { \
01275 adiag(di)[i] op mat.adiag(di)[i]; \
01276 bdiag(di)[i] op mat.bdiag(di)[i]; \
01277 } \
01278 } \
01279 } \
01280 return *this; \
01281 }
01282
01283 SFORALL_S(+=)
01284 SFORALL_S(-=)
01285
01286
01287 #define SFORALL_T(op) \
01288 template <typename T> \
01289 BdMatrix<T>& BdMatrix<T>::operator op (const T& mult) \
01290 { \
01291 for (register unsigned int i = 0; i < dim; ++i) \
01292 diag[i] op mult; \
01293 for (unsigned j = 0; j < diagconf.size(); ++j) { \
01294 const register unsigned di = diagconf(j); \
01295 for (unsigned k = 0; k < (dim-di); ++k) { \
01296 adiag(di)[k] op mult; \
01297 bdiag(di)[k] op mult; \
01298 } \
01299 } \
01300 return *this; \
01301 }
01302
01303 SFORALL_T(*=)
01304
01305
01306
01307
01308 template <typename T>
01309 inline BdMatrix<T>& BdMatrix<T>::operator /= (const T& div)
01310 {
01311 BCHK((div==(T)0), BdMatrixErr, "Divide BdMatrix by zero", 0, *this);
01312 register T mult ALIGN(MIN_ALIGN) = (T)1 / div;
01313 for (register unsigned int i = 0; i < dim; ++i)
01314 diag[i] *= mult;
01315 for (unsigned j = 0; j < diagconf.size(); ++j) {
01316 const register unsigned di = diagconf(j);
01317 for (unsigned k = 0; k < (dim-di); ++k) {
01318 adiag(di)[k] *= mult;
01319 bdiag(di)[k] *= mult;
01320 }
01321 }
01322 return *this;
01323 }
01324
01325 #define INTDIVEQ(T) \
01326 template <> \
01327 inline BdMatrix < T >& BdMatrix< T >::operator /= (const T & div) \
01328 { \
01329 BCHK((div==( T )0), BdMatrixErr, "Divide BdMatrix by zero", 0, *this); \
01330 for (register unsigned i = 0; i < dim; ++i) \
01331 diag[i] /= div; \
01332 for (unsigned j = 0; j < diagconf.size(); ++j) { \
01333 const register unsigned di = diagconf(j); \
01334 for (unsigned k = 0; k < (dim-di); ++k) { \
01335 adiag(di)[k] /= div; \
01336 bdiag(di)[k] /= div; \
01337 } \
01338 } \
01339 return *this; \
01340 }
01341
01342 INTDIVEQ(int)
01343 INTDIVEQ(unsigned int)
01344
01345 template <typename T>
01346 inline BdMatrix<T>& BdMatrix<T>::transpose()
01347 {
01348 #if 0 //def BDMAT_ONE_BLOCK
01349
01350 for (register unsigned int t = 0; t < diagconf.size(); ++t) {
01351 const unsigned int s = diagconf(t);
01352 T* tmp = adiag(s); adiag(s) = bdiag(s); bdiag(s) = tmp;
01353 }
01354 #else
01355 adiag.swap(bdiag);
01356 #endif
01357 return *this;
01358 }
01359
01360 INST(template <typename T> class BdMatrix friend BdMatrix<T> transpose(BdMatrix<T>&);)
01361 template <typename T>
01362 inline BdMatrix<T> transpose(BdMatrix<T>& mat)
01363 {
01364 return (BdMatrix<T>(mat)).transpose();
01365 }
01366
01367 #define STDDEF_ST(op) \
01368 template <typename T> \
01369 inline BdMatrix<T> BdMatrix<T>::operator op (const T& val) const \
01370 { \
01371 return (BdMatrix<T> (*this)) op##= val; \
01372 }
01373
01374 STDDEF_ST(/)
01375 STDDEF_ST(*)
01376
01377
01378
01379 #define STDDEF_SS(op) \
01380 template <typename T> \
01381 BdMatrix<T> BdMatrix<T>::operator op (const BdMatrix<T>& m2) const \
01382 { \
01383 return (BdMatrix<T> (*this)) op##= m2; \
01384 }
01385
01386 STDDEF_SS(+)
01387 STDDEF_SS(-)
01388
01389
01390 template <typename T>
01391 BdMatrix<T> BdMatrix<T>::operator * (const BdMatrix<T>& mat2) const
01392 {
01393 const unsigned int newoff = maxoff + mat2.maxoff;
01394
01395 BdMatrix<T> prd(0, dim, newoff);
01396 BCHK(dim != mat2.dim, BdMatrixErr, Multiplying diff size matrices, 0, prd);
01397
01398 for (int i = 0; i < (signed)dim; ++i) {
01399
01400 const int jend = MIN(dim, i+newoff+1);
01401 const int kstart = MAX(0, i-(signed)maxoff);
01402 const int kend = MIN(dim, i+maxoff+1);
01403 for (int j = MAX(0,i-(signed)newoff); j < jend; ++j) {
01404 register T el ALIGN(MIN_ALIGN) = T(0);
01405 for (register int k = kstart; k < kend; ++k)
01406 el += this->operator()(i, k) * mat2(k, j);
01407
01408 if (el != T(0))
01409 prd(i, j) = el;
01410 }
01411 }
01412 return prd;
01413 }
01414
01415 INST(template <typename T> class BdMatrix friend BdMatrix<T> operator * (const T&, const BdMatrix<T>&);)
01416 template <typename T>
01417 inline BdMatrix<T> operator * (const T& v, const BdMatrix<T>& m)
01418 { return m*v; }
01419
01420
01421 #if 0
01422 template <typename T>
01423 TMatrix<T> BdMatrix<T>::operator * (const Matrix<T>& mat) const
01424 {
01425 TMatrix<T> tm (mat.rows(), mat.columns());
01426 if (UNLIKELY(tm.rows() != dim)) {
01427 #ifdef EXCEPT
01428 throw (BdMatrixErr("Multiplying Matrix with Matrix: wrong sizes!"));
01429 #else
01430 STD__ cerr << "Multiplying Matrix with Matrix: wrong sizes!" << STD__ endl;
01431 return tm;
01432 #endif
01433 }
01434
01435 T val ALIGN(MIN_ALIGN);
01436 for (int c = 0; c < tm.columns(); c++)
01437 for (int i = 0; i < mat.rows(); i++) {
01438 val = 0;
01439 for (register int j=0; j < dim; j++)
01440 val += this->operator()(i, j) * mat(j, c);
01441 tm(i, c) = val;
01442 }
01443 return tm
01444 }
01445 #endif
01446
01447 template <typename T>
01448 BdMatrix<T> BdMatrix<T>::operator - () const
01449 {
01450 BdMatrix<T> neg(dim, diagconf);
01451
01452 for (register unsigned int j = 0; j < dim; ++j)
01453 neg.diag[j] = -diag[j];
01454
01455 for (unsigned int t = 0; t < diagconf.size(); ++t) {
01456 const unsigned int dc = diagconf(t);
01457 for (register unsigned int i = 0; i < (dim - dc); ++i) {
01458 neg.adiag(dc)[i] = -adiag(dc)[i];
01459 neg.bdiag(dc)[i] = -bdiag(dc)[i];
01460 }
01461 }
01462 return neg;
01463 }
01464
01465 #if !defined(BDMATVEC_LNWISE) && !defined(BDMATVEC_LNWISE_OPT) && !defined(BDMATVEC_DIAGWISE)
01466 # define BDMATVEC_LNWISE_OPT
01467 #endif
01468
01469
01470 #define COST_BDMATVEC_LN(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\
01471 +o*(COST_LOOP+2*COST_BRANCH+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT))
01472 #define COST_BDMATVEC_LNO(d,o,m) (d*(COST_MULT+2*COST_UNIT_LOAD+COST_UNIT_STORE+COST_LOOP\
01473 +o*(COST_LOOP+COST_UNIT_LOAD))+(2*d-m)*o*(3*COST_NU_LOAD+COST_ADD+COST_MULT+COST_BRANCH/2))
01474 #define COST_BDMATVEC_DIAG(d,o,m) (d*(2*COST_UNIT_LOAD+COST_MULT+COST_UNIT_STORE+COST_LOOP)\
01475 +o*(6*COST_NU_LOAD+COST_LOOP+3*COST_UNIT_LOAD+2*COST_NU_STORE\
01476 +(2*d-m)*(3*COST_UNIT_LOAD+COST_LOOP+COST_MULT+COST_ADD+COST_UNIT_STORE)))
01477
01478 template <typename T>
01479 inline void do_bdmat_vec_mult(const unsigned start, const unsigned end,
01480 TVector<T> *res, const BdMatrix<T> *mat,
01481 const Vector<T> *vec)
01482
01483 {
01484 const unsigned int od = mat->diagconf.size();
01485 #ifdef BDMATVEC_LNWISE
01486 const unsigned int dim = mat->dim;
01487
01488 for (int i = start; i < end; ++i) {
01489
01490 register T val ALIGN(MIN_ALIGN) = mat->diag[i] * (*vec)(i);
01491
01492 for (int j = 0; j < od; ++j) {
01493 register const int s = mat->diagconf(j);
01494 if (LIKELY(s <= i))
01495 val += mat->bdiag(s)[i-s] * (*vec)(i-s);
01496 if (LIKELY(s < (signed)dim - i))
01497 val += mat->adiag(s)[i] * (*vec)(i+s);
01498 }
01499 res->set(val, i);
01500 }
01501 #elif defined(BDMATVEC_LNWISE_OPT)
01502 const unsigned int dim = mat->dim;
01503
01504
01505
01506
01507 unsigned int i;
01508 #ifdef USE_PREFETCH
01509 PREFETCH_R(&mat->diagconf.getcref(0), 3);
01510
01511
01512 for (unsigned int _j = 0; _j < od; ++_j) {
01513 const unsigned s = mat->diagconf(_j);
01514 PREFETCH_R(&mat->adiag.getcref(s), 3);
01515 PREFETCH_R(&mat->bdiag.getcref(s), 3);
01516 PREFETCH_R(mat->adiag(s), 3);
01517 PREFETCH_R(mat->bdiag(s), 3);
01518 }
01519 PREFETCH_R(mat->diag, 3);
01520 PREFETCH_R(&vec->getcref(0), 3);
01521 PREFETCH_W(&res->setval(0), 3);
01522 #endif
01523
01524 const unsigned int maxoff = mat->maxoff;
01525
01526 unsigned iend = MIN(maxoff, dim-maxoff);
01527 iend = MIN(iend, end);
01528
01529
01530
01531 for (i = start; i < iend; ++i) {
01532
01533 register T val ALIGN(MIN_ALIGN2) = mat->diag[i] * (*vec)(i);
01534 #ifdef USE_PREFETCH
01535 const unsigned i1 = i+1;
01536 PREFETCH_R(&vec->getcref(i1), 3);
01537 PREFETCH_R(mat->diag+i1, 3);
01538 #endif
01539
01540 for (unsigned int j = 0; j < od; ++j) {
01541 register const unsigned int s = mat->diagconf(j);
01542 if (s <= i) {
01543 val += mat->bdiag(s)[i-s] * (*vec)(i-s);
01544 #ifdef USE_PREFETCH
01545 PREFETCH_R(mat->bdiag(s)+i1-s,3);
01546 PREFETCH_R(&vec->getcref(i1-s), 3);
01547 #endif
01548 }
01549
01550
01551 val += mat->adiag(s)[i] * (*vec)(i+s);
01552 #ifdef USE_PREFETCH
01553 PREFETCH_R(mat->adiag(s)+i1,3);
01554 PREFETCH_R(&vec->getcref(i1+s), 3);
01555 #endif
01556 }
01557 res->set(val, i);
01558 #ifdef USE_PREFETCH
01559 PREFETCH_W(&res->setval(i1), 3);
01560 #endif
01561 }
01562 iend = MIN(end, dim - maxoff);
01563
01564 if (i < iend) {
01565
01566 for (; i < iend; ++i) {
01567
01568 register T val ALIGN(MIN_ALIGN2) = mat->diag[i] * (*vec)(i);
01569 #ifdef USE_PREFETCH
01570 const unsigned i1 = i+1;
01571 PREFETCH_R(&vec->getcref(i1), 3);
01572 PREFETCH_R(mat->diag+i1, 3);
01573 #endif
01574
01575 for (unsigned int j = 0; j < od; ++j) {
01576 register const unsigned int s = mat->diagconf(j);
01577 val += mat->bdiag(s)[i-s] * (*vec)(i-s);
01578 #ifdef USE_PREFETCH
01579 PREFETCH_R(mat->bdiag(s)+i1-s,3);
01580 PREFETCH_R(&vec->getcref(i1-s), 3);
01581 #endif
01582 val += mat->adiag(s)[i] * (*vec)(i+s);
01583 #ifdef USE_PREFETCH
01584 PREFETCH_R(mat->adiag(s)+i1,3);
01585 PREFETCH_R(&vec->getcref(i1+s), 3);
01586 #endif
01587 }
01588 res->set(val, i);
01589 #ifdef USE_PREFETCH
01590 PREFETCH_W(&res->setval(i1), 3);
01591 #endif
01592 }
01593 } else {
01594 iend = MIN(maxoff, end);
01595
01596 for (; i < iend; ++i) {
01597
01598 register T val ALIGN(MIN_ALIGN2) = mat->diag[i] * (*vec)(i);
01599 #ifdef USE_PREFETCH
01600 const unsigned i1 = i+1;
01601 PREFETCH_R(&vec->getcref(i1), 3);
01602 PREFETCH_R(mat->diag+i1, 3);
01603 #endif
01604
01605 for (unsigned int j = 0; j < od; ++j) {
01606 register const unsigned int s = mat->diagconf(j);
01607 if (s <= i) {
01608 val += mat->bdiag(s)[i-s] * (*vec)(i-s);
01609 #ifdef USE_PREFETCH
01610 PREFETCH_R(mat->bdiag(s)+i1-s,3);
01611 PREFETCH_R(&vec->getcref(i1-s), 3);
01612 #endif
01613 }
01614 if (s < (signed)dim - i) {
01615 val += mat->adiag(s)[i] * (*vec)(i+s);
01616 #ifdef USE_PREFETCH
01617 PREFETCH_R(mat->adiag(s)+i1,3);
01618 PREFETCH_R(&vec->getcref(i1+s), 3);
01619 #endif
01620 }
01621 }
01622 res->set(val, i);
01623 #ifdef USE_PREFETCH
01624 PREFETCH_W(&res->setval(i1), 3);
01625 #endif
01626 }
01627 }
01628
01629
01630
01631 for (; i < end; ++i) {
01632
01633 register T val ALIGN(MIN_ALIGN2) = mat->diag[i] * (*vec)(i);
01634 #ifdef USE_PREFETCH
01635 const unsigned i1 = i+1;
01636 PREFETCH_R(&vec->getcref(i1), 3);
01637 PREFETCH_R(mat->diag+i1, 3);
01638 #endif
01639
01640 for (unsigned int j = 0; j < od; ++j) {
01641 register const unsigned int s = mat->diagconf(j);
01642
01643 val += mat->bdiag(s)[i-s] * (*vec)(i-s);
01644 #ifdef USE_PREFETCH
01645 PREFETCH_R(mat->bdiag(s)+i1-s,3);
01646 PREFETCH_R(&vec->getcref(i1-s), 3);
01647 #endif
01648 if (s < (signed)dim - i) {
01649 val += mat->adiag(s)[i] * (*vec)(i+s);
01650 #ifdef USE_PREFETCH
01651 PREFETCH_R(mat->adiag(s)+i1,3);
01652 PREFETCH_R(&vec->getcref(i1+s), 3);
01653 #endif
01654 }
01655 }
01656 res->set(val, i);
01657 #ifdef USE_PREFETCH
01658 PREFETCH_W(&res->setval(i1), 3);
01659 #endif
01660 }
01661 #elif defined(BDMATVEC_DIAGWISE)
01662
01663
01664 do_vec_vec_mul(end-start, &res->setval(start),
01665 &mat->diag[start], &vec->getcref(start));
01666
01667 for (unsigned j = 0; j < od; j++) {
01668
01669 const int s = mat->diagconf(j);
01670 const int iend = MIN(end,mat->dim-s);
01671 const int iend2 = end-s;
01672 const int istart2 = MAX((int)(start-s),0);
01673 do_add_vec_vec_mul(iend-start, &res->setval(start),
01674 mat->adiag(s)+start, &vec->getcref(start+s));
01675 do_add_vec_vec_mul(iend2-istart2, &res->setval(istart2+s),
01676 mat->bdiag(s)+istart2, &vec->getcref(istart2));
01677 }
01678 #endif
01679 }
01680
01681 #if defined(BDMATVEC_LNWISE)
01682 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LN(d,o,m)
01683 #elif defined(BDMATVEC_LNWISE_OPT)
01684 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_LNO(d,o,m)
01685 #elif defined(BDMATVEC_DIAGWISE)
01686 # define COST_BDMATVEC(d,o,m) COST_BDMATVEC_DIAG(d,o,m)
01687 #endif
01688
01689 #define COST_BDMATVEC_TRANS(d,o,m) COST_BDMATVEC(d,o,m)
01690 template <typename T>
01691 inline void do_bdmat_vec_transmult(const unsigned start, const unsigned end,
01692 TVector<T> *res, const BdMatrix<T> *mat,
01693 const Vector<T> *vec)
01694 {
01695 const unsigned od = mat->diagconf.size();
01696 #ifdef BDMATVEC_LNWISE
01697 const unsigned int dim = mat->dim;
01698 for (int i = start; i < (signed)end; ++i) {
01699
01700 register T val ALIGN(MIN_ALIGN) = mat->diag[i] * (*vec)(i);
01701
01702 for (unsigned int j = 0; j < od; ++j) {
01703 register const int s = mat->diagconf(j);
01704 if (LIKELY(s <= i))
01705 val += mat->adiag(s)[i-s] * (*vec)(i-s);
01706 if (LIKELY(s < (signed)dim - i))
01707 val += mat->bdiag(s)[i] * (*vec)(i+s);
01708 }
01709 res->set(val, i);
01710 }
01711 #elif defined(BDMATVEC_LNWISE_OPT)
01712 const unsigned int dim = mat->dim;
01713
01714
01715 #ifdef USE_PREFETCH
01716 PREFETCH_R(&mat->diagconf.getcref(0), 3);
01717
01718
01719 for (unsigned int _j = 0; _j < od; ++_j) {
01720 const unsigned s = mat->diagconf(_j);
01721 PREFETCH_R(&mat->adiag.getcref(s), 3);
01722 PREFETCH_R(&mat->bdiag.getcref(s), 3);
01723 PREFETCH_R(mat->adiag(s), 3);
01724 PREFETCH_R(mat->bdiag(s), 3);
01725 }
01726 PREFETCH_R(mat->diag, 3);
01727 PREFETCH_R(&vec->getcref(0), 3);
01728 PREFETCH_W(&res->setval(0), 3);
01729 #endif
01730
01731 unsigned int i;
01732 const unsigned int maxoff = mat->maxoff;
01733
01734 unsigned iend = MIN(maxoff, mat->dim-maxoff);
01735 iend = MIN(end, iend);
01736
01737
01738
01739 for (i = start; i < iend; ++i) {
01740
01741 register T val ALIGN(MIN_ALIGN) = mat->diag[i] * (*vec)(i);
01742 #ifdef USE_PREFETCH
01743 const unsigned i1 = i+1;
01744 PREFETCH_R(&vec->getcref(i1), 3);
01745 PREFETCH_R(mat->diag+i1, 3);
01746 #endif
01747
01748 for (unsigned int j = 0; j < od; ++j) {
01749 register const unsigned int s = mat->diagconf(j);
01750 if (s <= i) {
01751 val += mat->adiag(s)[i-s] * (*vec)(i-s);
01752 PREFETCH_R(mat->adiag(s)+i1-s, 3);
01753 PREFETCH_R(&vec->getcref(i1-s), 3);
01754 }
01755
01756 val += mat->bdiag(s)[i] * (*vec)(i+s);
01757 PREFETCH_R(mat->bdiag(s)+i1, 3);
01758 PREFETCH_R(&vec->getcref(i1+s), 3);
01759 }
01760 res->set(val, i);
01761 PREFETCH_W(&res->setval(i1), 3);
01762 }
01763 iend = MIN(end, dim - maxoff);
01764 if (i < iend) {
01765
01766 for (; i < iend; ++i) {
01767
01768 register T val ALIGN(MIN_ALIGN) = mat->diag[i] * (*vec)(i);
01769 #ifdef USE_PREFETCH
01770 const unsigned i1 = i+1;
01771 PREFETCH_R(&vec->getcref(i1), 3);
01772 PREFETCH_R(mat->diag+i1, 3);
01773 #endif
01774
01775 for (unsigned int j = 0; j < od; ++j) {
01776 register const unsigned int s = mat->diagconf(j);
01777 val += mat->adiag(s)[i-s] * (*vec)(i-s);
01778 PREFETCH_R(mat->adiag(s)+i1-s, 3);
01779 PREFETCH_R(&vec->getcref(i1-s), 3);
01780 val += mat->bdiag(s)[i] * (*vec)(i+s);
01781 PREFETCH_R(mat->bdiag(s)+i1, 3);
01782 PREFETCH_R(&vec->getcref(i1+s), 3);
01783 }
01784 res->set(val, i);
01785 PREFETCH_W(&res->setval(i1), 3);
01786 }
01787 } else {
01788 iend = MIN(end, maxoff);
01789
01790 for (; i < iend; ++i) {
01791
01792 register T val ALIGN(MIN_ALIGN) = mat->diag[i] * (*vec)(i);
01793 #ifdef USE_PREFETCH
01794 const unsigned i1 = i+1;
01795 PREFETCH_R(&vec->getcref(i1), 3);
01796 PREFETCH_R(mat->diag+i1, 3);
01797 #endif
01798
01799 for (unsigned int j = 0; j < od; ++j) {
01800 register const unsigned int s = mat->diagconf(j);
01801 if (s <= i) {
01802 val += mat->adiag(s)[i-s] * (*vec)(i-s);
01803 PREFETCH_R(mat->adiag(s)+i1-s, 3);
01804 PREFETCH_R(&vec->getcref(i1-s), 3);
01805 }
01806 if (s < (signed)dim - i) {
01807 val += mat->bdiag(s)[i] * (*vec)(i+s);
01808 PREFETCH_R(mat->bdiag(s)+i1, 3);
01809 PREFETCH_R(&vec->getcref(i1+s), 3);
01810 }
01811 }
01812 res->set(val, i);
01813 PREFETCH_W(&res->setval(i1), 3);
01814 }
01815 }
01816
01817 for (; i < end; ++i) {
01818
01819 register T val ALIGN(MIN_ALIGN) = mat->diag[i] * (*vec)(i);
01820 #ifdef USE_PREFETCH
01821 const unsigned i1 = i+1;
01822 PREFETCH_R(&vec->getcref(i1), 3);
01823 PREFETCH_R(mat->diag+i1, 3);
01824 #endif
01825
01826 for (unsigned int j = 0; j < od; ++j) {
01827 register const unsigned int s = mat->diagconf(j);
01828
01829 val += mat->adiag(s)[i-s] * (*vec)(i-s);
01830 PREFETCH_R(mat->adiag(s)+i1-s, 3);
01831 PREFETCH_R(&vec->getcref(i1-s), 3);
01832 if (s < (signed)dim - i) {
01833 val += mat->bdiag(s)[i] * (*vec)(i+s);
01834 PREFETCH_R(mat->bdiag(s)+i1, 3);
01835 PREFETCH_R(&vec->getcref(i1+s), 3);
01836 }
01837 }
01838 res->set(val, i);
01839 PREFETCH_W(&res->setval(i1), 3);
01840 }
01841 #elif defined(BDMATVEC_DIAGWISE)
01842
01843
01844 do_vec_vec_mul(end-start, &res->setval(start),
01845 &mat->diag[start], &vec->getcref(start));
01846
01847 for (unsigned j = 0; j < od; j++) {
01848
01849 const int s = mat->diagconf(j);
01850 const int iend = MIN(end,mat->dim-s);
01851 const int iend2 = end-s;
01852 const int istart2 = MAX((int)(start-s),0);
01853 do_add_vec_vec_mul(iend2-istart2, &res->setval(istart2+s),
01854 mat->adiag(s)+istart2, &vec->getcref(istart2));
01855 do_add_vec_vec_mul(iend-start, &res->setval(start),
01856 mat->bdiag(s)+start, &vec->getcref(start+s));
01857 }
01858 #endif
01859 }
01860
01861 #define COST_BDMATVEC_DOT(d,o,m) COST_BDMATVEC(d,o,m)
01862 template <typename T>
01863 inline void do_bdmat_vec_dotmult(const unsigned start, const unsigned end,
01864 TVector<T> *res, const BdMatrix<T> *mat,
01865 const Vector<T> *vec)
01866 {
01867 const unsigned int od = mat->diagconf.size();
01868 #ifdef BDMATVEC_LNWISE
01869 const unsigned int dim = mat->dim;
01870
01871 for (int i = start; i < end; ++i) {
01872
01873 register T val ALIGN(MIN_ALIGN) = CPLX__ conj(mat->diag[i]) * (*vec)(i);
01874
01875 for (int j = 0; j < od; ++j) {
01876 register const int s = mat->diagconf(j);
01877 if (LIKELY(s <= i))
01878 val += CPLX__ conj(mat->bdiag(s)[i-s]) * (*vec)(i-s);
01879 if (LIKELY(s < (signed)dim - i))
01880 val += CPLX__ conj(mat->adiag(s)[i]) * (*vec)(i+s);
01881 }
01882 res->set(val, i);
01883 }
01884 #elif defined(BDMATVEC_LNWISE_OPT)
01885 const unsigned int dim = mat->dim;
01886
01887
01888
01889
01890 unsigned int i;
01891 #ifdef USE_PREFETCH
01892 PREFETCH_R(&mat->diagconf.getcref(0), 3);
01893
01894
01895 for (unsigned int _j = 0; _j < od; ++_j) {
01896 const unsigned s = mat->diagconf(_j);
01897 PREFETCH_R(&mat->adiag.getcref(s), 3);
01898 PREFETCH_R(&mat->bdiag.getcref(s), 3);
01899 PREFETCH_R(mat->adiag(s), 3);
01900 PREFETCH_R(mat->bdiag(s), 3);
01901 }
01902 PREFETCH_R(mat->diag, 3);
01903 PREFETCH_R(&vec->getcref(0), 3);
01904 PREFETCH_W(&res->setval(0), 3);
01905 #endif
01906
01907 const unsigned int maxoff = mat->maxoff;
01908
01909 unsigned iend = MIN(maxoff, dim-maxoff);
01910 iend = MIN(iend, end);
01911
01912
01913
01914 for (i = start; i < iend; ++i) {
01915
01916 register T val ALIGN(MIN_ALIGN2) = CPLX__ conj(mat->diag[i]) * (*vec)(i);
01917 #ifdef USE_PREFETCH
01918 const unsigned i1 = i+1;
01919 PREFETCH_R(&vec->getcref(i1), 3);
01920 PREFETCH_R(mat->diag+i1, 3);
01921 #endif
01922
01923 for (unsigned int j = 0; j < od; ++j) {
01924 register const unsigned int s = mat->diagconf(j);
01925 if (s <= i) {
01926 val += CPLX__ conj(mat->bdiag(s)[i-s]) * (*vec)(i-s);
01927 #ifdef USE_PREFETCH
01928 PREFETCH_R(mat->bdiag(s)+i1-s,3);
01929 PREFETCH_R(&vec->getcref(i1-s), 3);
01930 #endif
01931 }
01932
01933
01934 val += CPLX__ conj(mat->adiag(s)[i]) * (*vec)(i+s);
01935 #ifdef USE_PREFETCH
01936 PREFETCH_R(mat->adiag(s)+i1,3);
01937 PREFETCH_R(&vec->getcref(i1+s), 3);
01938 #endif
01939 }
01940 res->set(val, i);
01941 #ifdef USE_PREFETCH
01942 PREFETCH_W(&res->setval(i1), 3);
01943 #endif
01944 }
01945 iend = MIN(end, dim - maxoff);
01946
01947 if (i < iend) {
01948
01949 for (; i < iend; ++i) {
01950
01951 register T val ALIGN(MIN_ALIGN2) = CPLX__ conj(mat->diag[i]) * (*vec)(i);
01952 #ifdef USE_PREFETCH
01953 const unsigned i1 = i+1;
01954 PREFETCH_R(&vec->getcref(i1), 3);
01955 PREFETCH_R(mat->diag+i1, 3);
01956 #endif
01957
01958 for (unsigned int j = 0; j < od; ++j) {
01959 register const unsigned int s = mat->diagconf(j);
01960 val += CPLX__ conj(mat->bdiag(s)[i-s]) * (*vec)(i-s);
01961 #ifdef USE_PREFETCH
01962 PREFETCH_R(mat->bdiag(s)+i1-s,3);
01963 PREFETCH_R(&vec->getcref(i1-s), 3);
01964 #endif
01965 val += CPLX__ conj(mat->adiag(s)[i]) * (*vec)(i+s);
01966 #ifdef USE_PREFETCH
01967 PREFETCH_R(mat->adiag(s)+i1,3);
01968 PREFETCH_R(&vec->getcref(i1+s), 3);
01969 #endif
01970 }
01971 res->set(val, i);
01972 #ifdef USE_PREFETCH
01973 PREFETCH_W(&res->setval(i1), 3);
01974 #endif
01975 }
01976 } else {
01977 iend = MIN(maxoff, end);
01978
01979 for (; i < iend; ++i) {
01980
01981 register T val ALIGN(MIN_ALIGN2) = CPLX__ conj(mat->diag[i]) * (*vec)(i);
01982 #ifdef USE_PREFETCH
01983 const unsigned i1 = i+1;
01984 PREFETCH_R(&vec->getcref(i1), 3);
01985 PREFETCH_R(mat->diag+i1, 3);
01986 #endif
01987
01988 for (unsigned int j = 0; j < od; ++j) {
01989 register const unsigned int s = mat->diagconf(j);
01990 if (s <= i) {
01991 val += CPLX__ conj(mat->bdiag(s)[i-s]) * (*vec)(i-s);
01992 #ifdef USE_PREFETCH
01993 PREFETCH_R(mat->bdiag(s)+i1-s,3);
01994 PREFETCH_R(&vec->getcref(i1-s), 3);
01995 #endif
01996 }
01997 if (s < (signed)dim - i) {
01998 val += CPLX__ conj(mat->adiag(s)[i]) * (*vec)(i+s);
01999 #ifdef USE_PREFETCH
02000 PREFETCH_R(mat->adiag(s)+i1,3);
02001 PREFETCH_R(&vec->getcref(i1+s), 3);
02002 #endif
02003 }
02004 }
02005 res->set(val, i);
02006 #ifdef USE_PREFETCH
02007 PREFETCH_W(&res->setval(i1), 3);
02008 #endif
02009 }
02010 }
02011
02012
02013
02014 for (; i < end; ++i) {
02015
02016 register T val ALIGN(MIN_ALIGN2) = CPLX__ conj(mat->diag[i]) * (*vec)(i);
02017 #ifdef USE_PREFETCH
02018 const unsigned i1 = i+1;
02019 PREFETCH_R(&vec->getcref(i1), 3);
02020 PREFETCH_R(mat->diag+i1, 3);
02021 #endif
02022
02023 for (unsigned int j = 0; j < od; ++j) {
02024 register const unsigned int s = mat->diagconf(j);
02025
02026 val += CPLX__ conj(mat->bdiag(s)[i-s]) * (*vec)(i-s);
02027 #ifdef USE_PREFETCH
02028 PREFETCH_R(mat->bdiag(s)+i1-s,3);
02029 PREFETCH_R(&vec->getcref(i1-s), 3);
02030 #endif
02031 if (s < (signed)dim - i) {
02032 val += CPLX__ conj(mat->adiag(s)[i]) * (*vec)(i+s);
02033 #ifdef USE_PREFETCH
02034 PREFETCH_R(mat->adiag(s)+i1,3);
02035 PREFETCH_R(&vec->getcref(i1+s), 3);
02036 #endif
02037 }
02038 }
02039 res->set(val, i);
02040 #ifdef USE_PREFETCH
02041 PREFETCH_W(&res->setval(i1), 3);
02042 #endif
02043 }
02044 #elif defined(BDMATVEC_DIAGWISE)
02045
02046
02047 do_vec_vec_cmul(end-start, &res->setval(start),
02048 &mat->diag[start], &vec->getcref(start));
02049
02050 for (unsigned j = 0; j < od; j++) {
02051
02052 const int s = mat->diagconf(j);
02053 const int iend = MIN(end,mat->dim-s);
02054 const int iend2 = end-s;
02055 const int istart2 = MAX((int)(start-s),0);
02056 do_add_vec_vec_cmul(iend-start, &res->setval(start),
02057 mat->adiag(s)+start, &vec->getcref(start+s));
02058 do_add_vec_vec_cmul(iend2-istart2, &res->setval(istart2+s),
02059 mat->bdiag(s)+istart2, &vec->getcref(istart2));
02060 }
02061 #endif
02062 }
02063
02064 #if defined(SMP) && !defined(NOSMP_BDMATVEC)
02065
02066 INST(template <typename T> class BdMatrix friend void job_bdmat_vec_mult(struct thr_ctrl*);)
02067 template <typename T>
02068 void job_bdmat_vec_mult(struct thr_ctrl* tc)
02069 {
02070 do_bdmat_vec_mult(tc->t_off, tc->t_size,
02071 (TVector<T>*)(tc->t_par[0]), (const BdMatrix<T>*)(tc->t_par[1]),
02072 (const Vector<T>*)(tc->t_par[2]));
02073 }
02074
02075 INST(template <typename T> class BdMatrix friend void job_bdmat_vec_transmult(struct thr_ctrl*);)
02076 template <typename T>
02077 void job_bdmat_vec_transmult (struct thr_ctrl* tc)
02078 {
02079 do_bdmat_vec_transmult(tc->t_off, tc->t_size,
02080 (TVector<T>*)(tc->t_par[0]), (const BdMatrix<T>*)(tc->t_par[1]),
02081 (const Vector<T>*)(tc->t_par[2]));
02082 }
02083
02084
02085
02086 # if !defined(SMP_BDMATSLICE)
02087 # define SMP_BDMATSLICE 2048
02088 # endif
02089
02090 template <typename T>
02091 TVector<T> BdMatrix<T>::operator * (const Vector<T>& vec) const
02092 {
02093 TVector<T> tv(dim);
02094 const unsigned n_thr = threads_avail(dim / SMP_BDMATSLICE);
02095 BCHK(vec.size() != dim, BdMatrixErr, Multiplication BdMatrix with Vector: wrong sizes, 0, tv);
02096 if (LIKELY(n_thr < 2))
02097 do_bdmat_vec_mult<T>(0UL, dim, &tv, this, &vec);
02098 else {
02099 PREFETCH_R(vec.vec, 3); PREFETCH_W(tv.vec, 3);
02100 const unsigned long first = slice_offset(1, n_thr, dim, tv.vec);
02101 unsigned long st, en = first;
02102 for (unsigned t = 0; t < n_thr-1; ++t) {
02103 st = en; en = slice_offset(t+2, n_thr, dim, tv.vec);
02104 thread_start_off(t, (thr_job_t)job_bdmat_vec_mult<T>,
02105 st, en, &tv, this, &vec, (void*)0);
02106 }
02107 do_bdmat_vec_mult<T>(0UL, first, &tv, this, &vec);
02108 for (unsigned s = 0; s < n_thr-1; ++s)
02109 thread_wait(s);
02110 }
02111 return tv;
02112 }
02113
02114
02115 template <typename T>
02116 TVector<T> BdMatrix<T>::transMult (const Vector<T>& vec) const
02117 {
02118 TVector<T> tv(dim);
02119 const unsigned n_thr = threads_avail(dim / SMP_BDMATSLICE);
02120 BCHK(vec.size() != dim, BdMatrixErr, Multiplication BdMatrix with Vector: wrong sizes, 0, tv);
02121 if (LIKELY(n_thr < 2 ))
02122 do_bdmat_vec_transmult<T> (0UL, dim, &tv, this, &vec);
02123 else {
02124 PREFETCH_R(vec.vec, 3); PREFETCH_W(tv.vec, 3);
02125 const unsigned long first = slice_offset(1, n_thr, dim, tv.vec);
02126 unsigned long st, en = first;
02127 for (unsigned t = 0; t < n_thr-1; ++t) {
02128 st = en; en = slice_offset(t+2, n_thr, dim, tv.vec);
02129 thread_start_off(t, (thr_job_t)job_bdmat_vec_transmult<T>,
02130 st, en, &tv, this, &vec, (void*)0);
02131 }
02132 do_bdmat_vec_transmult<T>(0UL, first, &tv, this, &vec);
02133 for (unsigned s = 0; s < n_thr-1; ++s)
02134 thread_wait(s);
02135 }
02136 return tv;
02137 }
02138
02139 #else
02140
02141 template <typename T>
02142 TVector<T> BdMatrix<T>::operator * (const Vector<T>& vec) const
02143 {
02144 TVector<T> tv(dim);
02145 BCHK(vec.size() != dim, BdMatrixErr, Multiplication BdMatrix with Vector: wrong sizes, 0, tv);
02146 do_bdmat_vec_mult<T>(0, dim, &tv, this, &vec);
02147 return tv;
02148 }
02149
02150 template <typename T>
02151 TVector<T> BdMatrix<T>::transMult (const Vector<T>& vec) const
02152 {
02153 TVector<T> tv(dim);
02154 BCHK(vec.size() != dim, BdMatrixErr, Multiplication BdMatrix with Vector: wrong sizes, 0, tv);
02155 do_bdmat_vec_transmult<T>(0, dim, &tv, this, &vec);
02156 return tv;
02157 }
02158
02159 #endif
02160
02161 template <typename T>
02162 TVector<T> BdMatrix<T>::dotMult(const Vector<T>& vec) const
02163 {
02164 TVector<T> tv(dim);
02165 BCHK(vec.size() != dim, BdMatrixErr, Dot Multiplication BdMatrix with Vector: wrong sizes, 0, tv);
02166 do_bdmat_vec_dotmult<T>(0, dim, &tv, this, &vec);
02167 return tv;
02168 }
02169
02170
02171
02172
02173
02174
02175
02176
02177
02178 template <typename T>
02179 bool BdMatrix<T>::operator == (const BdMatrix<T>& ma) const
02180 {
02181 if (LIKELY(ma.dim != dim))
02182 return false;
02183 if (LIKELY(dim == 0 || ((diag == ma.diag) && (adiag == ma.adiag) && (bdiag == ma.bdiag))))
02184 return true;
02185 if (TBCICOMP(ma.diag, diag, T, dim))
02186 return false;
02187 unsigned i;
02188 for (i = 0; i < diagconf.size(); ++i) {
02189 const unsigned di = diagconf(i);
02190 if (ma.adiag(di)) {
02191 if (TBCICOMP(ma.adiag(di), adiag(di), T, (dim-di))) return false;
02192 if (TBCICOMP(ma.bdiag(di), bdiag(di), T, (dim-di))) return false;
02193 } else {
02194 for (unsigned j = 0; j < dim-di; ++j)
02195 if (MATH__ fabs(adiag(di)[j]) >= BD_MINVAL || MATH__ fabs(bdiag(di)[j]) >= BD_MINVAL)
02196 return false;
02197 }
02198 }
02199 if (LIKELY(diagconf == ma.diagconf))
02200 return true;
02201 for (i = 0; i < ma.diagconf.size(); ++i) {
02202 const unsigned di = ma.diagconf(i);
02203 if (!diagconf.contains (di)) {
02204 for (unsigned j = 0; j < dim-di; ++j)
02205 if (UNLIKELY(MATH__ fabs(ma.adiag(di)[j]) >= BD_MINVAL || MATH__ fabs(ma.bdiag(di)[j]) >= BD_MINVAL))
02206 return false;
02207 }
02208 }
02209 return true;
02210 }
02211
02212 template <typename T>
02213 BdMatrix<T>& BdMatrix<T>::mult_rows(const Vector<T>& v)
02214 {
02215 BCHK(dim != v.size(), BdMatrixErr, multiply rowwise with wrong size, v.size(), *this);
02216
02217 for (unsigned j = 0; j < dim; ++j)
02218 diag[j] *= v(j);
02219
02220 for (unsigned od = 0; od < diagconf.size(); ++od) {
02221 const unsigned off = diagconf (od);
02222 for (unsigned i = 0; i < (dim-off); ++i)
02223 adiag(off)[i] *= v(i);
02224 for (unsigned k = 0; k < (dim-off); ++k)
02225 bdiag(off)[k] *= v(k+off);
02226 }
02227 return *this;
02228 }
02229
02230 template <typename T>
02231 BdMatrix<T>& BdMatrix<T>::div_rows(const Vector<T>& v)
02232 {
02233 BCHK (dim != v.size(), BdMatrixErr, multiply rowwise with wrong size, v.size(), *this);
02234
02235 for (unsigned j = 0; j < dim; ++j)
02236 diag[j] /= v(j);
02237
02238 for (unsigned od = 0; od < diagconf.size(); ++od) {
02239 const unsigned off = diagconf(od);
02240 for (unsigned i = 0; i < (dim-off); ++i)
02241 adiag(off)[i] /= v(i);
02242 for (unsigned k = 0; k < (dim-off); ++k)
02243 bdiag(off)[k] /= v(k+off);
02244 }
02245 return *this;
02246 }
02247
02248 template <typename T>
02249 BdMatrix<T>& BdMatrix<T>::mult_row(const T& f, const unsigned i)
02250 {
02251 BCHK (i>dim, BdMatrixErr, mult_row index out of range, i, *this);
02252 diag[i] *= f;
02253 for (unsigned od = 0; od < diagconf.size(); ++od) {
02254 const unsigned off = diagconf(od);
02255 if (i < dim-off)
02256 adiag(off)[i] *= f;
02257 if (i >= off)
02258 bdiag(off)[i-off] *= f;
02259 }
02260 return *this;
02261 }
02262
02263
02264 template <typename T>
02265 BdMatrix<T>& BdMatrix<T>::div_row(const T& d, const unsigned i)
02266 {
02267 BCHK (i>dim, BdMatrixErr, div_row index out of range, i, *this);
02268 BCHK (d == (T)0, BdMatrixErr, div_row by zero, i, *this);
02269 T f = (T)1.0/d;
02270 diag[i] *= f;
02271 for (unsigned od = 0; od < diagconf.size(); ++od) {
02272 const unsigned off = diagconf(od);
02273 if (i < dim-off)
02274 adiag(off)[i] *= f;
02275 if (i >= off)
02276 bdiag(off)[i-off] *= f;
02277 }
02278 return *this;
02279 }
02280
02281 template <typename T>
02282 STD__ ostream& operator << (STD__ ostream& ostr, const BdMatrix<T>& mat)
02283 {
02284
02285
02286
02287 for (unsigned i = 0; i < mat.dim; ++i) {
02288 for (unsigned j = 0; j < mat.dim; ++j) {
02289 if (UNLIKELY(mat.outset)) {
02290 ostr.width(mat.outw);
02291 ostr.precision(mat.outp);
02292 ostr.fill(mat.outf);
02293 }
02294 if (i == j || mat.diagconf.contains(CSTD__ abs((int)i-(int)j)))
02295 ostr << mat(i, j) << ' ';
02296 else
02297 ostr << (T)0 << ' ';
02298 }
02299
02300
02301
02302 ostr << "\n";
02303 }
02304 BCHK(mat.dummy!=(T)0, BdMatrixErr, dummy not zero, 0, ostr);
02305 return ostr;
02306 }
02307
02308 template <typename T>
02309 STD__ istream& operator >> (STD__ istream& istr, BdMatrix<T>& mat)
02310 {
02311 T tmp;
02312 for (unsigned i = 0; i < mat.dim; ++i) {
02313 for (unsigned j = 0; j < mat.dim; ++j) {
02314 istr >> tmp;
02315 if (i == j || mat.diagconf.contains(CSTD__ abs((int)i-(int)j)))
02316 mat.setval(tmp, i, j);
02317 else
02318 if (tmp != (T)0)
02319 STD__ cerr << "Element (" << i << "," << j
02320 << ") should be zero!" << STD__ endl;
02321 }
02322 }
02323 return istr;
02324 }
02325
02326 template <typename T>
02327 BdMatrix<T>::operator TMatrix<T> () const
02328 {
02329 const unsigned _DIM = dim;
02330 TMatrix<T> tm ((T)0, _DIM, _DIM);
02331
02332 for (unsigned i = 0; i < _DIM; ++i)
02333 tm.setval(diag[i], i, i);
02334
02335 for (unsigned j = 0; j < diagconf.size(); ++j) {
02336 const unsigned di = diagconf(j);
02337 for (unsigned k = 0; k < _DIM-di; ++k) {
02338 tm.setval(adiag(di)[k], k, k+di);
02339 tm.setval(bdiag(di)[k], k+di, k);
02340 }
02341 }
02342 return tm;
02343 }
02344
02345 #if 0
02346 template <typename T>
02347 BdMatrix<T>::operator Matrix<T>() const
02348 {
02349 Matrix<T> tm ((T)0, dim, dim); unsigned i;
02350
02351 for (i = 0; i < dim; ++i)
02352 tm.setval(diag[i], i, i);
02353
02354 for (unsigned j = 0; j < diagconf.size(); ++j)
02355 for (unsigned k = 0; k < dim-(i=diagconf(j)); ++k) {
02356 tm.setval(adiag(i)[k], k, k+i);
02357 tm.setval(bdiag(i)[k], k+i, k);
02358 }
02359 return tm;
02360 }
02361 #endif
02362
02363 #if 0
02364 template <typename T>
02365 BdMatrix<T> BdMatrix<T>::inverse()
02366 {
02367 BdMatrix<T> inv(dim);
02368 BVector<unsigned int> ri(1); BVector<unsigned int> ci(1);
02369 T d = det();
02370
02371 if (MATH__ fabs(d) < BD_MINVAL) {
02372 #ifdef EXCEPT
02373 throw (BdMatrixErr("Trying to invert singular Matrix!"));
02374 #else
02375 STD__ cerr << "Trying to invert singular Matrix!" << STD__ endl;
02376 return inv;
02377 #endif
02378 }
02379 d = 1/d;
02380 for (unsigned i = 0; i < dim; i++)
02381 for (unsigned j = 0; j < dim; j++) {
02382 ri(0) = i; ci(0) = j;
02383 inv.setval(subdet(ri, ci) * ((i+j)%2? -d: d), j, i);
02384 }
02385 return inv;
02386 }
02387 #endif
02388
02389 NAMESPACE_END
02390
02391 #endif