00001
00020 #ifndef TBCI_SOLVER_LU_SOLVER_H
00021 #define TBCI_SOLVER_LU_SOLVER_H
00022
00023 #include "../matrix.h"
00024 #include "../vector.h"
00025
00026 #ifdef PRAGMA_I
00027 # pragma interface "lu_solver.h"
00028 #endif
00029
00030
00031 #ifndef MLU_MINVAL
00032 # define MLU_MINVAL 5e-16
00033 #endif
00034
00035
00036 #ifndef MLU_RES_WARN
00037 # define MLU_RES_WARN 1e-11
00038 #endif
00039
00040 NAMESPACE_TBCI
00041
00051 INST(template <typename T> class Matrix friend int lu_decomp (Matrix<T>&);)
00052 #if defined(USE_PLAIN_VEC_KERNELS)
00053 template <typename T>
00054 int lu_decomp (Matrix<T>& mat)
00055 {
00056 const unsigned matr = mat.rows();
00057 int err = 0, col = -1;
00058
00059 BCHK(matr != mat.columns(), MatErr, LU only for quadratic matrices, matr, -1);
00060
00061
00062
00063
00064
00065 for (unsigned j = 0; j < matr; ++j) {
00066 unsigned i;
00067 ALIGN2(register T tmp, MIN_ALIGN2);
00068 for (i = 0; i <= j; ++i) {
00069 tmp = mat(i, j);
00070 for (register unsigned k = 0; k < i; ++k)
00071 tmp -= mat(i, k) * mat(k, j);
00072 mat.setval(tmp, i, j);
00073 }
00074 if (UNLIKELY(MATH__ fabs(tmp) < MLU_MINVAL)) {
00075 ++err;
00076 col = j;
00077 }
00078 T invdiag = T(1.0)/tmp;
00079
00080
00081
00082 for (i = j + 1; i < matr; ++i) {
00083 tmp = mat(i, j);
00084 for (register unsigned k = 0; k < j; ++k)
00085 tmp -= mat(i, k) * mat(k, j);
00086 mat.setval(tmp * invdiag, i, j);
00087 }
00088 }
00089 BCHK(err != 0, MatErr, Diagonal zero encountered, col, err);
00090 return err;
00091 }
00092 #else
00093 template <typename T>
00094 int lu_decomp (Matrix<T>& mat)
00095 {
00096 const unsigned matr = mat.rows();
00097 int err = 0, col = -1;
00098
00099 BCHK(matr != mat.columns(), MatErr, LU only for quadratic matrices, matr, -1);
00100
00101
00102
00103
00104
00105 for (unsigned j = 0; j < matr; ++j) {
00106 PREFETCH_R(&mat(0,0),3);
00107 PREFETCH_R(&mat(0,j),3);
00108 unsigned i;
00109 ALIGN2(register T tmp, MIN_ALIGN2);
00110 for (i = 0; i <= MIN(j,4); ++i) {
00111 PREFETCH_R(&mat(i,j), 3);
00112 PREFETCH_R(&mat(i+1,0), 3);
00113 tmp = mat(i, j);
00114 for (register unsigned k = 0; k < i; ++k)
00115 tmp -= mat(i, k) * mat(k, j);
00116 mat.setval (tmp, i, j);
00117 }
00118 PREFETCH_R(&mat(1,j),2);
00119 PREFETCH_R(&mat(2,j),2);
00120 PREFETCH_R(&mat(3,j),2);
00121 for (; i <= j; ++i) {
00122 PREFETCH_R(&mat(i,0), 3);
00123 PREFETCH_R(&mat(i,EL_PER_CL(T)), 3);
00124 tmp = mat(i, j);
00125 register unsigned k;
00126 for (k = 0; (signed)k < (signed)i-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
00127 tmp -= mat(i, k) * mat(k, j);
00128 PREFETCH_R(&mat(i,k+2*EL_PER_CL(T)),3);
00129 tmp -= mat(i,k+1) * mat(k+1,j);
00130 PREFETCH_R(&mat(i,k+3*EL_PER_CL(T)),3);
00131 tmp -= mat(i,k+2) * mat(k+2,j);
00132 PREFETCH_R(&mat(k+4,j), 2);
00133 PREFETCH_R(&mat(k+5,j), 2);
00134 tmp -= mat(i,k+3) * mat(k+3,j);
00135 PREFETCH_R(&mat(k+6,j), 2);
00136 PREFETCH_R(&mat(k+7,j), 2);
00137 }
00138 for (; k < i; ++k)
00139 tmp -= mat(i,k) * mat(k,j);
00140 mat.setval (tmp, i, j);
00141 if (LIKELY(i<matr-1))
00142 PREFETCH_R(&mat(i+1,j),3);
00143 }
00144 if (UNLIKELY(MATH__ fabs(tmp) < MLU_MINVAL)) {
00145 ++err;
00146 col = j;
00147 }
00148 T invdiag = T(1.0)/tmp;
00149
00150
00151
00152 for (i = j + 1; i < matr; ++i)
00153 {
00154 PREFETCH_R(&mat(i,j), 3);
00155 PREFETCH_R(&mat(i,0), 3);
00156 PREFETCH_R(&mat(i,EL_PER_CL(T)), 3);
00157 tmp = mat(i, j);
00158 register unsigned k;
00159 for (k = 0; (signed)k < (signed)j-(signed)MAX(8,4*EL_PER_CL(T)); k+=4) {
00160 tmp -= mat(i, k) * mat(k, j);
00161 PREFETCH_R(&mat(i,k+2*EL_PER_CL(T)),3);
00162 tmp -= mat(i,k+1) * mat(k+1,j);
00163 PREFETCH_R(&mat(i,k+3*EL_PER_CL(T)),3);
00164 tmp -= mat(i,k+2) * mat(k+2,j);
00165 PREFETCH_R(&mat(k+4,j), 2);
00166 PREFETCH_R(&mat(k+5,j), 2);
00167 tmp -= mat(i,k+3) * mat(k+3,j);
00168 PREFETCH_R(&mat(k+6,j), 2);
00169 PREFETCH_R(&mat(k+7,j), 2);
00170 }
00171 for (; k < j; ++k)
00172 tmp -= mat(i,k) * mat(k,j);
00173 mat.setval (tmp * invdiag, i, j);
00174 }
00175 }
00176 BCHK(err != 0, MatErr, Diagonal zero encountered, col, err);
00177 return err;
00178 }
00179 #endif
00180
00181 INST(template <typename T> class Matrix friend TVector<T> LU_fwd_subst(const Matrix<T>&, const Vector<T>&);)
00182 template <typename T>
00183 TVector<T> LU_fwd_subst(const Matrix<T>& lu, const Vector<T>& y)
00184 {
00185 const unsigned ysize = y.size();
00186 TVector<T> x(ysize);
00187 BCHK(ysize != lu.rows(), MatErr, LU fwd subst size error, ysize, x);
00188 for (unsigned i = 0; i < ysize; ++i) {
00189 #if 0
00190 ALIGN2(register T tmp, MIN_ALIGN2) = -y(i);
00191 do_vec_mult<T> (i, &x.get(0), &lu(i,0), tmp);
00192 x.set (-tmp, i);
00193 #else
00194 ALIGN2(register T tmp, MIN_ALIGN2) = y(i);
00195 for (register unsigned j = 0; j < i; ++j)
00196 tmp -= x.get(j) * lu(i, j);
00197 x.set (tmp, i);
00198 #endif
00199 }
00200 return x;
00201 }
00202
00203 INST(template <typename T> class Matrix friend TVector<T> LU_bkw_subst(const Matrix<T>&, const Vector<T>&);)
00204 template <typename T>
00205 TVector<T> LU_bkw_subst(const Matrix<T>& lu, const Vector<T>& y)
00206 {
00207 const unsigned ysize = y.size();
00208 TVector<T> x(ysize);
00209 BCHK(ysize != lu.rows(), MatErr, LU bkw subst size error, ysize, x);
00210 for (unsigned i = ysize - 1; (signed)i >= 0; --i) {
00211 ALIGN2(register T tmp, MIN_ALIGN2) = y(i);
00212 for (register unsigned j = ysize - 1; j > i; --j)
00213 tmp -= x.get(j) * lu(i, j);
00214 x.set(tmp / lu(i, i), i);
00215 }
00216 return x;
00217 }
00218
00223 INST(template <typename T> class Matrix friend TVector<T> LU_solve(const Matrix<T>&, const Vector<T>&);)
00224 template <typename T>
00225 inline TVector<T> LU_solve(const Matrix<T>& lu, const Vector<T>& b)
00226 {
00227 Vector<T> y(LU_fwd_subst(lu, b));
00228
00229 return LU_bkw_subst(lu, y);
00230 }
00231
00238 INST(template <typename T> class Matrix friend TVector<T> lu_solve(Matrix<T>&, const Vector<T>&);)
00239 template <typename T>
00240 TVector<T> lu_solve(Matrix<T>& mat, const Vector<T>& b)
00241 {
00242 #ifdef ERRCHECK
00243 Matrix<T> m_backup (mat);
00244 double res;
00245 #endif
00246 lu_decomp(mat);
00247 TVector<T> x (LU_solve(mat, b));
00248 #ifdef ERRCHECK
00249 Vector<T> xx(b.size()); xx.copy (x);
00250 BCHK ((res = MATH__ fabs(m_backup * xx - b)/MATH__ fabs(b)) > MLU_RES_WARN, MatErr,
00251 LU solver: Inaccurate solution, (int)(res / MLU_RES_WARN), x);
00252 #endif
00253 return x;
00254 }
00255
00256 INST(template <typename T> class Matrix friend TMatrix<T> LU_solve(const Matrix<T>&, const Matrix<T>&);)
00257 template <typename T>
00258 TMatrix<T> LU_solve(const Matrix<T>& lu, const Matrix<T>& b)
00259 {
00260 TMatrix<T> x(b.rows(), b.columns());
00261 Vector<T> y(b.rows()), w(b.rows());
00262 for (unsigned i = 0; i < b.columns(); ++i) {
00263 w = b.get_col(i);
00264 y = LU_fwd_subst(lu, w);
00265 x.set_col(LU_bkw_subst(lu, y), i);
00266 }
00267 return x;
00268 }
00269
00270 INST(template <typename T> class Matrix friend TMatrix<T> lu_solve(Matrix<T>&, const Matrix<T>&);)
00271 template <typename T>
00272 inline TMatrix<T> lu_solve(Matrix<T>& mat, const Matrix<T>& b)
00273 {
00274 lu_decomp(mat);
00275 return LU_solve(mat, b);
00276 }
00277
00279 INST(template <typename T> class Matrix friend T LU_det(const Matrix<T>&);)
00280 template <typename T>
00281 T LU_det(const Matrix<T>& lu)
00282 {
00283 ALIGN2(register T tmp, MIN_ALIGN2) = lu(0, 0);
00284 BCHK(lu.rows() != lu.columns(), MatErr, LU determinant only for quadratic matrices, 0, 0);
00285 for (register unsigned i = 1; i < lu.rows(); ++i)
00286 tmp *= lu(i, i);
00287 return tmp;
00288 }
00289
00291 INST(template <typename T> class Matrix friend T lu_det(Matrix<T>&);)
00292 template <typename T>
00293 inline T lu_det(Matrix<T>& mat)
00294 {
00295 lu_decomp(mat);
00296 return LU_det(mat);
00297 }
00298
00299
00301 INST(template <typename T> class Matrix friend TMatrix<T> LU_invert(const Matrix<T>&);)
00302 template <typename T>
00303 TMatrix<T> LU_invert(const Matrix<T>& mat)
00304 {
00305 TMatrix<T> inv (T(0), mat.rows(), mat.columns());
00306 BCHK (mat.rows() != mat.columns(), MatErr, LU invert non quadr. Matrix, 0, inv);
00307 for (unsigned j = 0; j < mat.rows(); ++j) {
00308 Vector<T> v (T(0), mat.columns());
00309 v(j) = T(1);
00310 Vector<T> y (LU_solve(mat, v));
00311 inv.set_col(y, j);
00312 }
00313 return inv;
00314 }
00315
00316
00318 INST(template <typename T> class Matrix friend TMatrix<T> lu_invert(Matrix<T>&);)
00319 template <typename T>
00320 inline TMatrix<T> lu_invert(Matrix<T>& mat)
00321 {
00322 lu_decomp(mat);
00323 return LU_invert(mat);
00324 }
00325
00326 NAMESPACE_END
00327 #endif