00001
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef TBCI_SOLVER_BD_LU_SOLVER_H
00021 #define TBCI_SOLVER_BD_LU_SOLVER_H
00022
00023 #include "../vector.h"
00024 #include "../band_matrix.h"
00025 #include "../matrix.h"
00026
00027 #ifdef PRAGMA_I
00028 # pragma interface "bd_lu_solver.h"
00029 #endif
00030
00031
00032 #ifndef BDMLU_MINVAL
00033 # define BDMLU_MINVAL 1e-16
00034 #endif
00035
00036
00037 #ifndef BDMLU_RES_WARN
00038 # define BDMLU_RES_WARN 1e-11
00039 #endif
00040
00041 NAMESPACE_TBCI
00042
00053 INST(template <typename T> class BdMatrix friend int lu_decomp (BdMatrix<T>&);)
00054
00055 #if 1 || defined(USE_PLAIN_VEC_KERNELS) || (defined(UNROLL_DEPTH) && UNROLL_DEPTH == 1)
00056 template <typename T>
00057 int lu_decomp (BdMatrix<T>& mat)
00058 {
00059 const unsigned matr = mat.rows();
00060
00061
00062
00063 mat.expand();
00064
00065
00066 const unsigned maxoff = mat.getmaxoff();
00067
00068
00069 for (unsigned j = 0; j < matr; ++j) {
00070 unsigned i = MAX((signed)0, (signed)j - (signed)maxoff);
00071 const unsigned imax = MIN(j,maxoff);
00072 ALIGN2(register T tmp, MIN_ALIGN2); tmp = 0;
00073 for (; i <= imax; ++i) {
00074 tmp = mat(i, j);
00075 for (register unsigned k = 0; k < i; ++k)
00076 tmp -= mat(i, k) * mat(k, j);
00077 mat.setval(tmp, i, j);
00078 }
00079 for (; i <= j; ++i) {
00080 tmp = mat(i, j);
00081 for (register unsigned k = j - maxoff; k < i; ++k)
00082 tmp -= mat(i, k) * mat(k, j);
00083 mat.setval(tmp, i, j);
00084 }
00085 BCHK(MATH__ fabs(tmp) == 0, BdMatrixErr, Diagonal zero encountered, j, -1);
00086 T invdiag = T(1.0)/tmp;
00087
00088
00089
00090 unsigned bound = MIN(matr, j + mat.getmaxoff() + 1);
00091 for (; i < bound; ++i) {
00092 tmp = mat(i, j);
00093 for (register unsigned int k = MAX((signed)0, (signed)j - (signed)maxoff); k < j; ++k)
00094 tmp -= mat(i, k) * mat(k, j);
00095
00096 # ifdef ERRCHECK
00097 if (UNLIKELY (MATH__ fabs(tmp * invdiag) > 1/BDMLU_MINVAL))
00098 STD__ cerr << "\n BdMatrixWarning: BdMatrix badly conditioned, continuing ...\n" << STD__ flush;
00099 # endif
00100 mat.setval(tmp * invdiag, i, j);
00101 }
00102 }
00103 return 0;
00104 }
00105
00106 #else
00107
00108 template <typename T>
00109 int lu_decomp (BdMatrix<T>& mat)
00110 {
00111 const unsigned matr = mat.rows();
00112
00113
00114
00115 mat.expand();
00116
00117
00118 const unsigned maxoff = mat.getmaxoff();
00119
00120
00121 for (unsigned j = 0; j < matr; ++j) {
00122 unsigned i = MAX((signed)0, (signed)j - (signed)maxoff);
00123 PREFETCH_R(&mat(i, 0), 3);
00124 PREFETCH_R(&mat(i, j), 3);
00125 const unsigned imax = MIN(j,maxoff);
00126 PREFETCH_R(&mat(0, j), 2);
00127 PREFETCH_R(&mat(1, j), 2);
00128 ALIGN2(register T tmp, MIN_ALIGN2);
00129 for (; i <= imax; ++i) {
00130 tmp = mat(i, j);
00131 PREFETCH_R(&mat(i, 0), 3);
00132 PREFETCH_R(&mat(i, 1), 3);
00133 for (register unsigned k = 0; k < i; ++k) {
00134 tmp -= mat(i, k) * mat(k, j);
00135 PREFETCH_R(&mat(i, k+2),3);
00136 PREFETCH_R(&mat(k+1, j), 2);
00137 }
00138 mat.setval(tmp, i, j);
00139 PREFETCH_W(&mat(i+1,j),3);
00140 }
00141 for (; i <= j; ++i) {
00142 tmp = mat(i, j);
00143 PREFETCH_R(&mat(i, j-maxoff), 3);
00144 PREFETCH_R(&mat(i, j-maxoff+1), 3);
00145 register unsigned k;
00146 for (k = j - maxoff; k < (signed)i-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
00147 tmp -= mat(i, k) * mat(k, j);
00148 PREFETCH_R(&mat(i,k+4), 3);
00149 PREFETCH_R(&mat(i,k+5), 3);
00150 tmp -= mat(i, k+1) * mat(k+1, j);
00151 PREFETCH_R(&mat(i,k+6), 3);
00152 PREFETCH_R(&mat(i,k+7), 3);
00153 tmp -= mat(i, k+2) * mat(k+2, j);
00154 PREFETCH_R(&mat(k+4,j), 3);
00155 PREFETCH_R(&mat(k+5,j), 3);
00156 tmp -= mat(i, k+3) * mat(k+3, j);
00157 PREFETCH_R(&mat(k+6,j), 3);
00158 PREFETCH_R(&mat(k+7,j), 3);
00159 }
00160 for (; k < i; ++k)
00161 tmp -= mat(i, k) * mat(k, j);
00162 mat.setval(tmp, i, j);
00163 }
00164 BCHK(MATH__ fabs (tmp) == 0, BdMatrixErr, Diagonal zero encountered, j, -1);
00165 T invdiag = T(1.0)/tmp;
00166
00167
00168
00169 unsigned bound = MIN(matr, j + mat.getmaxoff() + 1);
00170 for (; i < bound; ++i) {
00171 const unsigned kstart = MAX((signed)0, (signed)j - (signed)maxoff);
00172 tmp = mat(i, j);
00173 PREFETCH_R(&mat(i, kstart), 3);
00174 PREFETCH_R(&mat(i, kstart+1), 3);
00175 register unsigned int k;
00176 for (k = kstart; (signed)k < (signed)j-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
00177 tmp -= mat(i, k) * mat(k, j);
00178 PREFETCH_R(&mat(i,k+4), 3);
00179 PREFETCH_R(&mat(i,k+5), 3);
00180 tmp -= mat(i, k+1) * mat(k+1, j);
00181 PREFETCH_R(&mat(i,k+6), 3);
00182 PREFETCH_R(&mat(i,k+7), 3);
00183 tmp -= mat(i, k+2) * mat(k+2, j);
00184 PREFETCH_R(&mat(k+4,j), 3);
00185 PREFETCH_R(&mat(k+5,j), 3);
00186 tmp -= mat(i, k+3) * mat(k+3, j);
00187 PREFETCH_R(&mat(k+6,j), 3);
00188 PREFETCH_R(&mat(k+7,j), 3);
00189 }
00190 for (; k < j; ++k)
00191 tmp -= mat(i, k) * mat(k, j);
00192
00193 # ifdef ERRCHECK
00194 if (UNLIKELY (MATH__ fabs(tmp * invdiag) > 1/BDMLU_MINVAL))
00195 STD__ cerr << "\n BdMatrixWarning: BdMatrix badly conditioned, continuing ...\n" << STD__ flush;
00196 # endif
00197 mat.setval(tmp * invdiag, i, j);
00198 }
00199 }
00200 return 0;
00201 }
00202 #endif
00203
00204
00205 INST(template <typename T> class BdMatrix friend TVector<T> LU_fwd_subst(const BdMatrix<T>&, const Vector<T>&);)
00206 template <typename T>
00207 TVector<T> LU_fwd_subst(const BdMatrix<T>& lu, const Vector<T>& y)
00208 {
00209 const unsigned ysize = y.size();
00210 const unsigned maxoff = lu.getmaxoff();
00211 TVector<T> x(ysize);
00212 BCHK(ysize != lu.rows(), BdMatrixErr, LU fwd subst size error, ysize, x);
00213 for (unsigned i = 0; i < ysize; ++i) {
00214 ALIGN2(register T tmp, MIN_ALIGN2) = y(i);
00215 for (register unsigned j = MAX((signed)0, (signed)i - (signed)maxoff); j < i; ++j)
00216 tmp -= x.get(j) * lu(i, j);
00217 x.set(tmp, i);
00218 }
00219 return x;
00220 }
00221
00222 INST(template <typename T> class BdMatrix friend TVector<T> LU_bkw_subst(const BdMatrix<T>&, const Vector<T>&);)
00223 template <typename T>
00224 TVector<T> LU_bkw_subst(const BdMatrix<T>& lu, const Vector<T>& y)
00225 {
00226 const unsigned ysize = y.size();
00227 const unsigned maxoff = lu.getmaxoff();
00228 TVector<T> x(ysize);
00229 BCHK(ysize != lu.rows(), BdMatrixErr, LU bkw subst size error, ysize, x);
00230 for (int i = ysize - 1; (signed)i >= 0; --i) {
00231 ALIGN2(register T tmp, MIN_ALIGN2) = y(i);
00232 for (register int j = MIN(ysize - 1, i + maxoff); j > i; --j)
00233 tmp -= x.get(j) * lu(i, j);
00234 x.set(tmp / lu(i, i), i);
00235 }
00236 return x;
00237 }
00238
00244 INST(template <typename T> class BdMatrix friend TVector<T> LU_solve(const BdMatrix<T>&, const Vector<T>&);)
00245 template <typename T>
00246 inline TVector<T> LU_solve(const BdMatrix<T>& lu, const Vector<T>& b)
00247 {
00248 Vector<T> y (LU_fwd_subst(lu, b));
00249 return (LU_bkw_subst(lu, y));
00250 }
00251
00258 INST(template <typename T> class BdMatrix friend TVector<T> lu_solve(BdMatrix<T>&, const Vector<T>&);)
00259 template <typename T>
00260 TVector<T> lu_solve(BdMatrix<T>& mat, const Vector<T>& b)
00261 {
00262 #ifdef ERRCHECK
00263 BdMatrix<T> m_backup (mat);
00264 double res;
00265 #endif
00266 lu_decomp (mat);
00267 TVector<T> x (LU_solve (mat, b));
00268 #ifdef ERRCHECK
00269
00270 Vector<T> xx(b.size()); xx.copy (x);
00271 if (UNLIKELY((res = MATH__ fabs(m_backup * xx - b)/MATH__ fabs(b)) > BDMLU_RES_WARN))
00272 STD__ cerr << "bd_lu_solver: Inaccurate solution, rel. residuum= " << res << STD__ endl;
00273 #endif
00274 return x;
00275 }
00276
00277 INST(template <typename T> class BdMatrix friend TMatrix<T> LU_solve(const BdMatrix<T>&, const Matrix<T>&);)
00278 template <typename T>
00279 TMatrix<T> LU_solve(const BdMatrix<T>& lu, const Matrix<T>& b)
00280 {
00281 TMatrix<T> x(b.rows(), b.columns());
00282 Vector<T> y(b.rows()), w(b.rows());
00283 const unsigned bc = b.columns();
00284 for (unsigned int i = 0; i < bc; ++i) {
00285 w = b.get_col(i);
00286 y = LU_fwd_subst(lu, w);
00287 x.set_col(LU_bkw_subst(lu, y), i);
00288 }
00289 return x;
00290 }
00291
00292
00293 INST(template <typename T> class BdMatrix friend TMatrix<T> lu_solve(BdMatrix<T>&, const Matrix<T>&);)
00294 template <typename T>
00295 inline TMatrix<T> lu_solve(BdMatrix<T>& mat, const Matrix<T>& b)
00296 {
00297 lu_decomp(mat);
00298 return LU_solve(mat, b);
00299 }
00300
00301 #if 0
00302
00303 INST(template <typename T> class BdMatrix friend TMatrix<T> LU_solve (const BdMatrix<T>&, const BdMatrix<T>&);)
00304 template <typename T>
00305 inline TMatrix<T> LU_solve (const BdMatrix<T>& lu, const BdMatrix<T>& b)
00306 { return LU_solve (lu, Matrix<T>(b)); }
00307
00308 INST(template <typename T> class BdMatrix friend TMatrix<T> lu_solve (BdMatrix<T>&, const BdMatrix<T>&);)
00309 template <typename T>
00310 inline TMatrix<T> lu_solve (BdMatrix<T>& mat, const BdMatrix<T>& b)
00311 { lu_decomp (mat); return LU_solve (mat, b); }
00312 #endif
00313
00315 INST(template <typename T> class BdMatrix friend T LU_det(const BdMatrix<T>&);)
00316 template <typename T>
00317 T LU_det(const BdMatrix<T>& lu)
00318 {
00319 ALIGN2(register T tmp, MIN_ALIGN2) = lu(0, 0);
00320 const unsigned lr = lu.rows();
00321 for (unsigned i = 1; i < lr; ++i)
00322 tmp *= lu(i, i);
00323 return tmp;
00324 }
00325
00327 INST(template <typename T> class BdMatrix friend T lu_det(BdMatrix<T>&);)
00328 template <typename T>
00329 inline T lu_det(BdMatrix<T>& mat)
00330 {
00331 lu_decomp(mat);
00332 return LU_det(mat);
00333 }
00334
00336 INST(template <typename T> class BdMatrix friend TMatrix<T> LU_invert(const BdMatrix<T>&);)
00337 template <typename T>
00338 TMatrix<T> LU_invert(const BdMatrix<T>& lu)
00339 {
00340 TMatrix<T> inv ((T)0, lu.columns(), lu.rows());
00341 const unsigned lr = lu.rows();
00342 for (unsigned int j = 0; j < lr; ++j) {
00343 Vector<T> v ((T)0, lr);
00344 v(j) = (T)1;
00345 Vector<T> y (LU_solve(lu, v));
00346 inv.set_col(y, j);
00347 }
00348 return inv;
00349 }
00350
00351
00353 INST(template <typename T> class BdMatrix friend TMatrix<T> lu_invert(BdMatrix<T>&);)
00354 template <typename T>
00355 inline TMatrix<T> lu_invert(BdMatrix<T>& mat)
00356 {
00357 lu_decomp(mat);
00358 return LU_invert(mat);
00359 }
00360
00361 NAMESPACE_END
00362
00363 #endif