45 #ifndef BELOS_BLOCK_GCRODR_SOLMGR_HPP
46 #define BELOS_BLOCK_GCRODR_SOLMGR_HPP
65 #ifdef BELOS_TEUCHOS_TIME_MONITOR
67 #endif // BELOS_TEUCHOS_TIME_MONITOR
126 template<
class ScalarType,
class MV,
class OP>
225 "Belos::BlockGCRODRSolMgr::setProblem: The input LinearProblem cannot be null.");
228 if (! problem->isProblemSet()) {
229 const bool success = problem->setProblem();
231 "Belos::BlockGCRODRSolMgr::setProblem: Calling the input LinearProblem's setProblem() method failed. This likely means that the "
232 "LinearProblem has a missing (null) matrix A, solution vector X, or right-hand side vector B. Please set these items in the LinearProblem and try again.");
254 problem_->setProblem();
292 void initializeStateStorage();
310 int getHarmonicVecsKryl (
int m,
const SDM& HH, SDM& PP);
315 int getHarmonicVecsAugKryl (
int keff,
322 void sort (std::vector<MagnitudeType>& dlist,
int n, std::vector<int>& iperm);
342 ortho_factory_type orthoFactory_;
367 static const bool adaptiveBlockSize_default_;
368 static const std::string recycleMethod_default_;
371 MagnitudeType convTol_, orthoKappa_, achievedTol_;
372 int blockSize_, maxRestarts_, maxIters_, numIters_;
373 int verbosity_, outputStyle_, outputFreq_;
374 bool adaptiveBlockSize_;
375 std::string orthoType_, recycleMethod_;
376 std::string impResScale_, expResScale_;
384 int numBlocks_, recycledBlocks_;
406 std::vector<ScalarType> tau_;
407 std::vector<ScalarType> work_;
409 std::vector<int> ipiv_;
421 bool builtRecycleSpace_;
427 template<
class ScalarType,
class MV,
class OP>
428 const bool BlockGCRODRSolMgr<ScalarType,MV,OP>::adaptiveBlockSize_default_ =
true;
430 template<
class ScalarType,
class MV,
class OP>
431 const std::string BlockGCRODRSolMgr<ScalarType,MV,OP>::recycleMethod_default_ =
"harmvecs";
437 template<
class ScalarType,
class MV,
class OP>
443 template<
class ScalarType,
class MV,
class OP>
452 "Belos::BlockGCRODR constructor: The solver manager's constructor needs "
453 "the linear problem argument 'problem' to be nonnull.");
463 template<
class ScalarType,
class MV,
class OP>
465 adaptiveBlockSize_ = adaptiveBlockSize_default_;
466 recycleMethod_ = recycleMethod_default_;
468 loaDetected_ =
false;
469 builtRecycleSpace_ =
false;
540 template<
class ScalarType,
class MV,
class OP>
542 std::ostringstream oss;
543 oss <<
"Belos::BlockGCRODRSolMgr<" << SCT::name() <<
", ...>";
545 oss <<
"Ortho Type='"<<orthoType_ ;
546 oss <<
", Num Blocks=" <<numBlocks_;
547 oss <<
", Block Size=" <<blockSize_;
548 oss <<
", Num Recycle Blocks=" << recycledBlocks_;
549 oss <<
", Max Restarts=" << maxRestarts_;
554 template<
class ScalarType,
class MV,
class OP>
558 using Teuchos::parameterList;
560 using Teuchos::rcpFromRef;
562 if (defaultParams_.is_null()) {
563 RCP<ParameterList> pl = parameterList ();
565 const MagnitudeType convTol = SMT::squareroot (SCT::magnitude (SCT::eps()));
566 const int maxRestarts = 1000;
567 const int maxIters = 5000;
568 const int blockSize = 2;
569 const int numBlocks = 100;
570 const int numRecycledBlocks = 25;
573 const int outputFreq = 1;
574 RCP<std::ostream> outputStream = rcpFromRef (std::cout);
575 const std::string impResScale (
"Norm of Preconditioned Initial Residual");
576 const std::string expResScale (
"Norm of Initial Residual");
577 const std::string timerLabel (
"Belos");
578 const std::string orthoType (
"DGKS");
579 RCP<const ParameterList> orthoParams = orthoFactory_.getDefaultParameters (orthoType);
586 const MagnitudeType orthoKappa = -SMT::one();
589 pl->set (
"Convergence Tolerance", convTol,
590 "The tolerance that the solver needs to achieve in order for "
591 "the linear system(s) to be declared converged. The meaning "
592 "of this tolerance depends on the convergence test details.");
593 pl->set(
"Maximum Restarts", maxRestarts,
594 "The maximum number of restart cycles (not counting the first) "
595 "allowed for each set of right-hand sides solved.");
596 pl->set(
"Maximum Iterations", maxIters,
597 "The maximum number of iterations allowed for each set of "
598 "right-hand sides solved.");
599 pl->set(
"Block Size", blockSize,
600 "How many linear systems to solve at once.");
601 pl->set(
"Num Blocks", numBlocks,
602 "The maximum number of blocks allowed in the Krylov subspace "
603 "for each set of right-hand sides solved. This includes the "
604 "initial block. Total Krylov basis storage, not counting the "
605 "recycled subspace, is \"Num Blocks\" times \"Block Size\".");
606 pl->set(
"Num Recycled Blocks", numRecycledBlocks,
607 "The maximum number of vectors in the recycled subspace." );
608 pl->set(
"Verbosity", verbosity,
609 "What type(s) of solver information should be written "
610 "to the output stream.");
611 pl->set(
"Output Style", outputStyle,
612 "What style is used for the solver information to write "
613 "to the output stream.");
614 pl->set(
"Output Frequency", outputFreq,
615 "How often convergence information should be written "
616 "to the output stream.");
617 pl->set(
"Output Stream", outputStream,
618 "A reference-counted pointer to the output stream where all "
619 "solver output is sent.");
620 pl->set(
"Implicit Residual Scaling", impResScale,
621 "The type of scaling used in the implicit residual convergence test.");
622 pl->set(
"Explicit Residual Scaling", expResScale,
623 "The type of scaling used in the explicit residual convergence test.");
624 pl->set(
"Timer Label", timerLabel,
625 "The string to use as a prefix for the timer labels.");
626 pl->set(
"Orthogonalization", orthoType,
627 "The orthogonalization method to use. Valid options: " +
628 orthoFactory_.validNamesString());
629 pl->set (
"Orthogonalization Parameters", *orthoParams,
630 "Sublist of parameters specific to the orthogonalization method to use.");
631 pl->set(
"Orthogonalization Constant", orthoKappa,
632 "When using DGKS orthogonalization: the \"depTol\" constant, used "
633 "to determine whether another step of classical Gram-Schmidt is "
634 "necessary. Otherwise ignored. Nonpositive values are ignored.");
637 return defaultParams_;
640 template<
class ScalarType,
class MV,
class OP>
644 using Teuchos::isParameterType;
645 using Teuchos::getParameter;
648 using Teuchos::parameterList;
651 using Teuchos::rcp_dynamic_cast;
652 using Teuchos::rcpFromRef;
658 RCP<const ParameterList> defaultParams = getValidParameters();
665 params_ = parameterList (*defaultParams);
693 const int maxRestarts = params_->
get<
int> (
"Maximum Restarts");
695 "Belos::BlockGCRODRSolMgr: The \"Maximum Restarts\" parameter "
696 "must be nonnegative, but you specified a negative value of "
697 << maxRestarts <<
".");
699 const int maxIters = params_->get<
int> (
"Maximum Iterations");
701 "Belos::BlockGCRODRSolMgr: The \"Maximum Iterations\" parameter "
702 "must be positive, but you specified a nonpositive value of "
705 const int numBlocks = params_->get<
int> (
"Num Blocks");
707 "Belos::BlockGCRODRSolMgr: The \"Num Blocks\" parameter must be "
708 "positive, but you specified a nonpositive value of " << numBlocks
711 const int blockSize = params_->get<
int> (
"Block Size");
713 "Belos::BlockGCRODRSolMgr: The \"Block Size\" parameter must be "
714 "positive, but you specified a nonpositive value of " << blockSize
717 const int recycledBlocks = params_->get<
int> (
"Num Recycled Blocks");
719 "Belos::BlockGCRODRSolMgr: The \"Num Recycled Blocks\" parameter must "
720 "be positive, but you specified a nonpositive value of "
721 << recycledBlocks <<
".");
723 std::invalid_argument,
"Belos::BlockGCRODRSolMgr: The \"Num Recycled "
724 "Blocks\" parameter must be less than the \"Num Blocks\" parameter, "
725 "but you specified \"Num Recycled Blocks\" = " << recycledBlocks
726 <<
" and \"Num Blocks\" = " << numBlocks <<
".");
730 maxRestarts_ = maxRestarts;
731 maxIters_ = maxIters;
732 numBlocks_ = numBlocks;
733 blockSize_ = blockSize;
734 recycledBlocks_ = recycledBlocks;
741 std::string tempLabel = params_->get<std::string> (
"Timer Label");
742 const bool labelChanged = (tempLabel != label_);
744 #ifdef BELOS_TEUCHOS_TIME_MONITOR
745 std::string solveLabel = label_ +
": BlockGCRODRSolMgr total solve time";
746 if (timerSolve_.is_null()) {
749 }
else if (labelChanged) {
758 #endif // BELOS_TEUCHOS_TIME_MONITOR
764 if (params_->isParameter (
"Verbosity")) {
765 if (isParameterType<int> (*params_,
"Verbosity")) {
766 verbosity_ = params_->get<
int> (
"Verbosity");
769 verbosity_ = (int) getParameter<MsgType> (*params_,
"Verbosity");
776 if (params_->isParameter (
"Output Style")) {
777 if (isParameterType<int> (*params_,
"Output Style")) {
778 outputStyle_ = params_->get<
int> (
"Output Style");
781 outputStyle_ = (int) getParameter<OutputType> (*params_,
"Output Style");
809 if (params_->isParameter (
"Output Stream")) {
811 outputStream_ = getParameter<RCP<std::ostream> > (*params_,
"Output Stream");
813 catch (InvalidParameter&) {
814 outputStream_ = rcpFromRef (std::cout);
821 if (outputStream_.is_null()) {
826 outputFreq_ = params_->get<
int> (
"Output Frequency");
829 if (! outputTest_.is_null()) {
830 outputTest_->setOutputFrequency (outputFreq_);
838 if (printer_.is_null()) {
841 printer_->setVerbosity (verbosity_);
842 printer_->setOStream (outputStream_);
853 if (params_->isParameter (
"Orthogonalization")) {
854 const std::string& tempOrthoType =
855 params_->get<std::string> (
"Orthogonalization");
857 if (! orthoFactory_.isValidName (tempOrthoType)) {
858 std::ostringstream os;
859 os <<
"Belos::BlockGCRODRSolMgr: Invalid orthogonalization name \""
860 << tempOrthoType <<
"\". The following are valid options "
861 <<
"for the \"Orthogonalization\" name parameter: ";
862 orthoFactory_.printValidNames (os);
865 if (tempOrthoType != orthoType_) {
867 orthoType_ = tempOrthoType;
884 RCP<ParameterList> orthoParams = sublist (params,
"Orthogonalization Parameters",
true);
886 "Failed to get orthogonalization parameters. "
887 "Please report this bug to the Belos developers.");
913 ortho_ = orthoFactory_.makeMatOrthoManager (orthoType_,
null, printer_,
914 label_, orthoParams);
926 if (params_->isParameter (
"Orthogonalization Constant")) {
927 const MagnitudeType orthoKappa =
928 params_->get<MagnitudeType> (
"Orthogonalization Constant");
929 if (orthoKappa > 0) {
930 orthoKappa_ = orthoKappa;
933 if (orthoType_ ==
"DGKS" && ! ortho_.is_null()) {
939 rcp_dynamic_cast<ortho_man_type>(ortho_)->
setDepTol (orthoKappa_);
949 convTol_ = params_->get<MagnitudeType> (
"Convergence Tolerance");
950 if (! impConvTest_.is_null()) {
953 if (! expConvTest_.is_null()) {
954 expConvTest_->setTolerance (convTol_);
958 if (params_->isParameter (
"Implicit Residual Scaling")) {
959 std::string tempImpResScale =
960 getParameter<std::string> (*params_,
"Implicit Residual Scaling");
963 if (impResScale_ != tempImpResScale) {
965 impResScale_ = tempImpResScale;
967 if (! impConvTest_.is_null()) {
980 if (params_->isParameter(
"Explicit Residual Scaling")) {
981 std::string tempExpResScale =
982 getParameter<std::string> (*params_,
"Explicit Residual Scaling");
985 if (expResScale_ != tempExpResScale) {
987 expResScale_ = tempExpResScale;
989 if (! expConvTest_.is_null()) {
1007 if (maxIterTest_.is_null()) {
1010 maxIterTest_->setMaxIters (maxIters_);
1015 if (impConvTest_.is_null()) {
1016 impConvTest_ =
rcp (
new StatusTestResNorm_t (convTol_));
1022 if (expConvTest_.is_null()) {
1023 expConvTest_ =
rcp (
new StatusTestResNorm_t (convTol_));
1024 expConvTest_->defineResForm (StatusTestResNorm_t::Explicit,
Belos::TwoNorm);
1030 if (convTest_.is_null()) {
1031 convTest_ =
rcp (
new StatusTestCombo_t (StatusTestCombo_t::SEQ,
1039 sTest_ =
rcp (
new StatusTestCombo_t (StatusTestCombo_t::OR,
1040 maxIterTest_, convTest_));
1044 outputTest_ = stoFactory.
create (printer_, sTest_, outputFreq_,
1048 std::string solverDesc =
"Block GCRODR ";
1049 outputTest_->setSolverDesc (solverDesc);
1056 template<
class ScalarType,
class MV,
class OP>
1067 int KrylSpaDim = (numBlocks_ - 1) * blockSize_;
1070 int augSpaDim = KrylSpaDim + recycledBlocks_ + 1;
1073 int augSpaImgDim = KrylSpaDim + blockSize_ + recycledBlocks_+1;
1097 if (U_ == Teuchos::null) {
1098 U_ = MVT::Clone( *rhsMV, recycledBlocks_+1 );
1102 if (MVT::GetNumberVecs(*U_) < recycledBlocks_+1) {
1104 U_ = MVT::Clone( *tmp, recycledBlocks_+1 );
1109 if (C_ == Teuchos::null) {
1110 C_ = MVT::Clone( *rhsMV, recycledBlocks_+1 );
1114 if (MVT::GetNumberVecs(*C_) < recycledBlocks_+1) {
1116 C_ = MVT::Clone( *tmp, recycledBlocks_+1 );
1121 if (U1_ == Teuchos::null) {
1122 U1_ = MVT::Clone( *rhsMV, recycledBlocks_+1 );
1126 if (MVT::GetNumberVecs(*U1_) < recycledBlocks_+1) {
1128 U1_ = MVT::Clone( *tmp, recycledBlocks_+1 );
1133 if (C1_ == Teuchos::null) {
1134 C1_ = MVT::Clone( *rhsMV, recycledBlocks_+1 );
1138 if (MVT::GetNumberVecs(*U1_) < recycledBlocks_+1) {
1140 C1_ = MVT::Clone( *tmp, recycledBlocks_+1 );
1145 if (R_ == Teuchos::null){
1146 R_ = MVT::Clone( *rhsMV, blockSize_ );
1152 if (G_ == Teuchos::null){
1153 G_ =
Teuchos::rcp(
new SDM( augSpaImgDim, augSpaDim ) );
1156 if ( (G_->numRows() != augSpaImgDim) || (G_->numCols() != augSpaDim) )
1158 G_->reshape( augSpaImgDim, augSpaDim );
1160 G_->putScalar(zero);
1164 if (H_ == Teuchos::null){
1165 H_ =
Teuchos::rcp (
new SDM (
Teuchos::View, *G_, KrylSpaDim + blockSize_, KrylSpaDim, recycledBlocks_+1 ,recycledBlocks_+1 ) );
1169 if (F_ == Teuchos::null){
1170 F_ =
Teuchos::rcp(
new SDM( recycledBlocks_+1, recycledBlocks_+1 ) );
1173 if ( (F_->numRows() != recycledBlocks_+1) || (F_->numCols() != recycledBlocks_+1) ){
1174 F_->reshape( recycledBlocks_+1, recycledBlocks_+1 );
1177 F_->putScalar(zero);
1180 if (PP_ == Teuchos::null){
1181 PP_ =
Teuchos::rcp(
new SDM( augSpaImgDim, recycledBlocks_+1 ) );
1184 if ( (PP_->numRows() != augSpaImgDim) || (PP_->numCols() != recycledBlocks_+1) ){
1185 PP_->reshape( augSpaImgDim, recycledBlocks_+1 );
1190 if (HP_ == Teuchos::null)
1191 HP_ =
Teuchos::rcp(
new SDM( augSpaImgDim, augSpaDim ) );
1193 if ( (HP_->numRows() != augSpaImgDim) || (HP_->numCols() != augSpaDim) ){
1194 HP_->reshape( augSpaImgDim, augSpaDim );
1199 tau_.resize(recycledBlocks_+1);
1202 work_.resize(recycledBlocks_+1);
1205 ipiv_.resize(recycledBlocks_+1);
1209 template<
class ScalarType,
class MV,
class OP>
1210 void BlockGCRODRSolMgr<ScalarType,MV,OP>::buildRecycleSpaceKryl(
int& keff,
Teuchos::RCP<BlockGmresIter<ScalarType,MV,OP> > block_gmres_iter){
1215 int p = block_gmres_iter->getState().curDim;
1216 std::vector<int> index(keff);
1221 if(recycledBlocks_ >= p + blockSize_){
1225 for (
int ii=0; ii < p; ++ii) index[ii] = ii;
1227 MVT::SetBlock(*V_, index, *Utmp);
1233 if(recycleMethod_ ==
"harmvecs"){
1234 keff = getHarmonicVecsKryl(p, HH, *PPtmp);
1235 printer_->stream(
Debug) <<
"keff = " << keff << std::endl;
1241 for (
int ii=0; ii<keff; ++ii) index[ii] = ii;
1246 for (
int ii=0; ii < p; ++ii) index[ii] = ii;
1251 MVT::MvTimesMatAddMv( one, *Vtmp, *PPtmp, zero, *U1tmp );
1259 lapack.GEQRF(HPtmp.numRows(),HPtmp.numCols(),HPtmp.values(),HPtmp.stride(),&tau_[0],&work_[0],lwork,&info);
1260 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK _GEQRF failed to compute a workspace size.");
1264 work_.resize(lwork);
1265 lapack.GEQRF(HPtmp.numRows(),HPtmp.numCols(),HPtmp.values(),HPtmp.stride(),&tau_[0],&work_[0],lwork,&info);
1266 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK _GEQRF failed to compute a QR factorization.");
1271 for(
int ii=0;ii<keff;ii++) {
for(
int jj=ii;jj<keff;jj++) Rtmp(ii,jj) = HPtmp(ii,jj); }
1273 lapack.UNGQR(HPtmp.numRows(),HPtmp.numCols(),HPtmp.numCols(),HPtmp.values(),HPtmp.stride(),&tau_[0],&work_[0],lwork,&info);
1274 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK _UNGQR failed to construct the Q factor.");
1278 index.resize( p+blockSize_ );
1279 for (
int ii=0; ii < (p+blockSize_); ++ii) { index[ii] = ii; }
1280 Vtmp = MVT::CloneView( *V_, index );
1281 MVT::MvTimesMatAddMv( one, *Vtmp, HPtmp, zero, *Ctmp );
1288 ipiv_.resize(Rtmp.numRows());
1289 lapack.GETRF(Rtmp.numRows(),Rtmp.numCols(),Rtmp.values(),Rtmp.stride(),&ipiv_[0],&info);
1290 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, BlockGCRODRSolMgrLAPACKFailure,
"Belos::GCRODRSolMgr::solve(): LAPACK _GETRF failed to compute an LU factorization.");
1292 lwork = Rtmp.numRows();
1293 work_.resize(lwork);
1294 lapack.GETRI(Rtmp.numRows(),Rtmp.values(),Rtmp.stride(),&ipiv_[0],&work_[0],lwork,&info);
1297 MVT::MvTimesMatAddMv( one, *U1tmp, Rtmp, zero, *Utmp );
1303 template<
class ScalarType,
class MV,
class OP>
1304 void BlockGCRODRSolMgr<ScalarType,MV,OP>::buildRecycleSpaceAugKryl(
Teuchos::RCP<BlockGCRODRIter<ScalarType,MV,OP> > block_gcrodr_iter){
1308 std::vector<MagnitudeType> d(keff);
1309 std::vector<ScalarType> dscalar(keff);
1310 std::vector<int> index(numBlocks_+1);
1313 BlockGCRODRIterState<ScalarType,MV> oldState = block_gcrodr_iter->getState();
1314 int p = oldState.curDim;
1319 if(recycledBlocks_ >= keff + p){
1322 for (
int ii=0; ii < p; ++ii) { index[ii] = keff+ii; }
1324 for (
int ii=0; ii < p; ++ii) { index[ii] =ii; }
1325 MVT::SetBlock(*V_, index, *Utmp);
1332 for (
int ii=0; ii<keff; ++ii) { index[ii] = ii; }
1335 dscalar.resize(keff);
1336 MVT::MvNorm( *Utmp, d );
1337 for (
int i=0; i<keff; ++i) {
1339 dscalar[i] = (ScalarType)d[i];
1341 MVT::MvScale( *Utmp, dscalar );
1349 for (
int i=0; i<keff; ++i)
1350 (*Gtmp)(i,i) = d[i];
1357 SDM PPtmp(
Teuchos::View, *PP_, p+keff-blockSize_, recycledBlocks_+1 );
1358 keff_new = getHarmonicVecsAugKryl( keff, p-blockSize_, *Gtmp, oldState.V, PPtmp );
1367 index.resize( keff );
1368 for (
int ii=0; ii<keff; ++ii) { index[ii] = ii; }
1370 index.resize( keff_new );
1371 for (
int ii=0; ii<keff_new; ++ii) { index[ii] = ii; }
1372 U1tmp = MVT::CloneViewNonConst( *U1_, index );
1374 MVT::MvTimesMatAddMv( one, *Utmp, PPtmp, zero, *U1tmp );
1379 index.resize(p-blockSize_);
1380 for (
int ii=0; ii < p-blockSize_; ii++) { index[ii] = ii; }
1382 SDM PPtmp(
Teuchos::View, *PP_, p-blockSize_, keff_new, keff );
1383 MVT::MvTimesMatAddMv( one, *Vtmp, PPtmp, one, *U1tmp );
1389 SDM PPtmp(
Teuchos::View, *PP_, p-blockSize_+keff, keff_new );
1394 int info = 0, lwork = -1;
1395 tau_.resize(keff_new);
1396 lapack.GEQRF(HPtmp.numRows(),HPtmp.numCols(),HPtmp.values(),HPtmp.stride(),&tau_[0],&work_[0],lwork,&info);
1397 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK _GEQRF failed to compute a workspace size.");
1400 work_.resize(lwork);
1401 lapack.GEQRF(HPtmp.numRows(),HPtmp.numCols(),HPtmp.values(),HPtmp.stride(),&tau_[0],&work_[0],lwork,&info);
1402 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK _GEQRF failed to compute a QR factorization.");
1407 for(
int i=0;i<keff_new;i++) {
for(
int j=i;j<keff_new;j++) Ftmp(i,j) = HPtmp(i,j); }
1409 lapack.UNGQR(HPtmp.numRows(),HPtmp.numCols(),HPtmp.numCols(),HPtmp.values(),HPtmp.stride(),&tau_[0],&work_[0],lwork,&info);
1410 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK _UNGQR failed to construct the Q factor.");
1420 for (
int i=0; i < keff; i++) { index[i] = i; }
1422 index.resize(keff_new);
1423 for (
int i=0; i < keff_new; i++) { index[i] = i; }
1424 C1tmp = MVT::CloneViewNonConst( *C1_, index );
1426 MVT::MvTimesMatAddMv( one, *Ctmp, PPtmp, zero, *C1tmp );
1431 for (
int i=0; i < p; ++i) { index[i] = i; }
1434 MVT::MvTimesMatAddMv( one, *Vtmp, PPtmp, one, *C1tmp );
1443 ipiv_.resize(Ftmp.numRows());
1444 lapack.GETRF(Ftmp.numRows(),Ftmp.numCols(),Ftmp.values(),Ftmp.stride(),&ipiv_[0],&info);
1445 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK _GETRF failed to compute an LU factorization.");
1448 lwork = Ftmp.numRows();
1449 work_.resize(lwork);
1450 lapack.GETRI(Ftmp.numRows(),Ftmp.values(),Ftmp.stride(),&ipiv_[0],&work_[0],lwork,&info);
1451 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK _GETRI failed to compute an LU factorization.");
1454 index.resize(keff_new);
1455 for (
int i=0; i < keff_new; i++) { index[i] = i; }
1457 MVT::MvTimesMatAddMv( one, *U1tmp, Ftmp, zero, *Utmp );
1462 if (keff != keff_new) {
1464 block_gcrodr_iter->setSize( keff, numBlocks_ );
1466 SDM b1(
Teuchos::View, *G_, recycledBlocks_+2, 1, 0, recycledBlocks_ );
1472 template<
class ScalarType,
class MV,
class OP>
1473 int BlockGCRODRSolMgr<ScalarType,MV,OP>::getHarmonicVecsAugKryl(
int keff,
int m,
const SDM& GG,
const Teuchos::RCP<const MV>& VV, SDM& PP){
1475 int m2 = GG.numCols();
1476 bool xtraVec =
false;
1479 std::vector<int> index;
1482 std::vector<MagnitudeType> wr(m2), wi(m2);
1485 std::vector<MagnitudeType> w(m2);
1488 SDM vr(m2,m2,
false);
1491 std::vector<int> iperm(m2);
1494 builtRecycleSpace_ =
true;
1504 SDM A_tmp( keff+m+blockSize_, keff+m );
1509 for (
int i=0; i<keff; ++i) { index[i] = i; }
1513 MVT::MvTransMv( one, *Ctmp, *Utmp, A11 );
1517 index.resize(m+blockSize_);
1518 for (i=0; i < m+blockSize_; i++) { index[i] = i; }
1520 MVT::MvTransMv( one, *Vp, *Utmp, A21 );
1523 for( i=keff; i<keff+m; i++)
1527 SDM A( m2, A_tmp.numCols() );
1536 char balanc=
'P', jobvl=
'N', jobvr=
'V', sense=
'N';
1537 int ld = A.numRows();
1539 int ldvl = ld, ldvr = ld;
1540 int info = 0,ilo = 0,ihi = 0;
1541 MagnitudeType abnrm = 0.0, bbnrm = 0.0;
1543 std::vector<ScalarType> beta(ld);
1544 std::vector<ScalarType> work(lwork);
1545 std::vector<MagnitudeType> rwork(lwork);
1546 std::vector<MagnitudeType> lscale(ld), rscale(ld);
1547 std::vector<MagnitudeType> rconde(ld), rcondv(ld);
1548 std::vector<int> iwork(ld+6);
1553 lapack.GGEVX(balanc, jobvl, jobvr, sense, ld, A.values(), ld, B.values(), ld, &wr[0], &wi[0],
1554 &beta[0], vl, ldvl, vr.values(), ldvr, &ilo, &ihi, &lscale[0], &rscale[0],
1555 &abnrm, &bbnrm, &rconde[0], &rcondv[0], &work[0], lwork, &rwork[0],
1556 &iwork[0], bwork, &info);
1557 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK GGEVX failed to compute eigensolutions.");
1561 for( i=0; i<ld; i++ )
1564 this->sort(w,ld,iperm);
1569 for( i=0; i<recycledBlocks_; i++ )
1570 for( j=0; j<ld; j++ )
1571 PP(j,i) = vr(j,iperm[ld-recycledBlocks_+i]);
1573 if(scalarTypeIsComplex==
false) {
1576 if (wi[iperm[ld-recycledBlocks_]] != 0.0) {
1578 for ( i=ld-recycledBlocks_; i<ld; i++ )
1579 if (wi[iperm[i]] != 0.0) countImag++;
1581 if (countImag % 2) xtraVec =
true;
1585 if (wi[iperm[ld-recycledBlocks_]] > 0.0) {
1586 for( j=0; j<ld; j++ )
1587 PP(j,recycledBlocks_) = vr(j,iperm[ld-recycledBlocks_]+1);
1590 for( j=0; j<ld; j++ )
1591 PP(j,recycledBlocks_) = vr(j,iperm[ld-recycledBlocks_]-1);
1599 return recycledBlocks_+1;
1601 return recycledBlocks_;
1605 template<
class ScalarType,
class MV,
class OP>
1606 int BlockGCRODRSolMgr<ScalarType,MV,OP>::getHarmonicVecsKryl(
int m,
const SDM& HH, SDM& PP){
1607 bool xtraVec =
false;
1612 std::vector<MagnitudeType> wr(m), wi(m);
1618 std::vector<MagnitudeType> w(m);
1621 std::vector<int> iperm(m);
1625 std::vector<ScalarType> work(lwork);
1626 std::vector<MagnitudeType> rwork(lwork);
1632 builtRecycleSpace_ =
true;
1639 for(
int i=0; i<=blockSize_-1; i++) (*harmRitzMatrix)[blockSize_-1-i][harmRitzMatrix->numRows()-1-i] = 1;
1642 lapack.GESV(m, blockSize_, HHt.values(), HHt.stride(), &iperm[0], harmRitzMatrix->values(), harmRitzMatrix->stride(), &info);
1644 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK GESV failed to compute a solution.");
1655 Htemp =
Teuchos::rcp(
new SDM(H_lbl_t.numRows(), H_lbl_t.numCols()));
1657 H_lbl_t.assign(*Htemp);
1659 Htemp =
Teuchos::rcp(
new SDM(harmRitzMatrix -> numRows(), harmRitzMatrix -> numCols()));
1661 harmRitzMatrix -> assign(*Htemp);
1664 int harmColIndex, HHColIndex;
1666 for(
int i = 0; i<blockSize_; i++){
1667 harmColIndex = harmRitzMatrix -> numCols() - i -1;
1669 for(
int j=0; j<m; j++) (*Htemp)[HHColIndex][j] += (*harmRitzMatrix)[harmColIndex][j];
1671 harmRitzMatrix = Htemp;
1681 lapack.GEEV(
'N',
'V', m, harmRitzMatrix -> values(), harmRitzMatrix -> stride(), &wr[0], &wi[0],
1682 vl, ldvl, vr.values(), vr.stride(), &work[0], lwork, &rwork[0], &info);
1684 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, BlockGCRODRSolMgrLAPACKFailure,
"Belos::BlockGCRODRSolMgr::solve(): LAPACK GEEV failed to compute eigensolutions.");
1687 for(
int i=0; i<m; ++i ) w[i] = Teuchos::ScalarTraits<MagnitudeType>::squareroot( wr[i]*wr[i] + wi[i]*wi[i] );
1689 this->sort(w, m, iperm);
1694 for(
int i=0; i<recycledBlocks_; ++i )
1695 for(
int j=0; j<m; j++ )
1696 PP(j,i) = vr(j,iperm[i]);
1698 if(scalarTypeIsComplex==
false) {
1701 if (wi[iperm[recycledBlocks_-1]] != 0.0) {
1703 for (
int i=0; i<recycledBlocks_; ++i )
1704 if (wi[iperm[i]] != 0.0) countImag++;
1706 if (countImag % 2) xtraVec =
true;
1710 if (wi[iperm[recycledBlocks_-1]] > 0.0) {
1711 for(
int j=0; j<m; ++j )
1712 PP(j,recycledBlocks_) = vr(j,iperm[recycledBlocks_-1]+1);
1715 for(
int j=0; j<m; ++j )
1716 PP(j,recycledBlocks_) = vr(j,iperm[recycledBlocks_-1]-1);
1724 printer_->stream(
Debug) <<
"Recycled " << recycledBlocks_+1 <<
" vectors" << std::endl;
1725 return recycledBlocks_+1;
1728 printer_->stream(
Debug) <<
"Recycled " << recycledBlocks_ <<
" vectors" << std::endl;
1729 return recycledBlocks_;
1734 template<
class ScalarType,
class MV,
class OP>
1735 void BlockGCRODRSolMgr<ScalarType,MV,OP>::sort(std::vector<MagnitudeType>& dlist,
int n, std::vector<int>& iperm) {
1736 int l, r, j, i, flag;
1738 MagnitudeType dRR, dK;
1763 if (dlist[j] > dlist[j - 1]) j = j + 1;
1764 if (dlist[j - 1] > dK) {
1765 dlist[i - 1] = dlist[j - 1];
1766 iperm[i - 1] = iperm[j - 1];
1779 dlist[r] = dlist[0];
1780 iperm[r] = iperm[0];
1794 template<
class ScalarType,
class MV,
class OP>
1798 using Teuchos::rcp_const_cast;
1804 std::vector<int> index(numBlocks_+1);
1820 int numRHS2Solve = MVT::GetNumberVecs( *(problem_->getRHS()) );
1821 int numCurrRHS = ( numRHS2Solve < blockSize_) ? numRHS2Solve : blockSize_;
1825 std::vector<int> currIdx;
1827 if ( adaptiveBlockSize_ ) {
1828 blockSize_ = numCurrRHS;
1829 currIdx.resize( numCurrRHS );
1830 for (
int i=0; i<numCurrRHS; ++i)
1831 currIdx[i] = startPtr+i;
1834 currIdx.resize( blockSize_ );
1835 for (
int i=0; i<numCurrRHS; ++i)
1836 currIdx[i] = startPtr+i;
1837 for (
int i=numCurrRHS; i<blockSize_; ++i)
1842 problem_->setLSIndex( currIdx );
1848 loaDetected_ =
false;
1851 bool isConverged =
true;
1854 initializeStateStorage();
1859 while (numRHS2Solve > 0){
1869 printer_->stream(
Debug) <<
" Now solving RHS index " << currIdx[0] <<
" using recycled subspace of dimension " << keff << std::endl << std::endl;
1873 for (
int ii=0; ii<keff; ++ii) { index[ii] = ii; }
1874 RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1875 RCP<MV> Ctmp = MVT::CloneViewNonConst( *C_, index );
1876 problem_->apply( *Utmp, *Ctmp );
1878 RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1883 int rank = ortho_->normalize(*Ctmp,
rcp(&Ftmp,
false));
1896 work_.resize(lwork);
1901 MVT::MvTimesMatAddMv( one, *Utmp, Ftmp, zero, *U1tmp );
1906 for (
int ii=0; ii<keff; ++ii) { index[ii] = ii; }
1907 Ctmp = MVT::CloneViewNonConst( *C_, index );
1908 Utmp = MVT::CloneView( *U_, index );
1911 SDM Ctr(keff,blockSize_);
1912 problem_->computeCurrPrecResVec( &*R_ );
1913 MVT::MvTransMv( one, *Ctmp, *R_, Ctr );
1916 RCP<MV> update = MVT::Clone( *problem_->getCurrLHSVec(), blockSize_ );
1917 MVT::MvInit( *update, 0.0 );
1918 MVT::MvTimesMatAddMv( one, *Utmp, Ctr, one, *update );
1919 problem_->updateSolution( update,
true );
1922 MVT::MvTimesMatAddMv( -one, *Ctmp, Ctr, one, *R_ );
1930 if (V_ == Teuchos::null) {
1933 V_ = MVT::Clone( *rhsMV, (numBlocks_+1)*blockSize_ );
1937 if (MVT::GetNumberVecs(*V_) < (numBlocks_+1)*blockSize_ ) {
1939 V_ = MVT::Clone( *tmp, (numBlocks_+1)*blockSize_ );
1944 printer_->stream(
Debug) <<
" No recycled subspace available for RHS index " << std::endl << std::endl;
1949 primeList.
set(
"Num Blocks",numBlocks_-1);
1950 primeList.
set(
"Block Size",blockSize_);
1951 primeList.
set(
"Recycled Blocks",0);
1952 primeList.
set(
"Keep Hessenberg",
true);
1953 primeList.
set(
"Initialize Hessenberg",
true);
1955 ptrdiff_t dim = MVT::GetGlobalLength( *(problem_->getRHS()) );
1956 if (blockSize_*static_cast<ptrdiff_t>(numBlocks_) > dim) {
1957 ptrdiff_t tmpNumBlocks = 0;
1958 if (blockSize_ == 1)
1959 tmpNumBlocks = dim / blockSize_;
1961 tmpNumBlocks = ( dim - blockSize_) / blockSize_;
1962 printer_->stream(
Warnings) <<
"Belos::BlockGmresSolMgr::solve(): Warning! Requested Krylov subspace dimension is larger than operator dimension!"
1963 << std::endl <<
"The maximum number of blocks allowed for the Krylov subspace will be adjusted to " << tmpNumBlocks << std::endl;
1964 primeList.
set(
"Num Blocks",Teuchos::as<int>(tmpNumBlocks));
1968 primeList.
set(
"Num Blocks",numBlocks_-1);
1975 block_gmres_iter->setSize( blockSize_, numBlocks_-1 );
1979 if (currIdx[blockSize_-1] == -1) {
1980 V_0 = MVT::Clone( *(problem_->getInitPrecResVec()), blockSize_ );
1981 problem_->computeCurrPrecResVec( &*V_0 );
1984 V_0 = MVT::CloneCopy( *(problem_->getInitPrecResVec()), currIdx );
1991 int rank = ortho_->normalize( *V_0, z_0 );
2000 block_gmres_iter->initializeGmres(newstate);
2002 bool primeConverged =
false;
2005 printer_->stream(
Debug) <<
" Preparing to Iterate!!!!" << std::endl << std::endl;
2006 block_gmres_iter->iterate();
2011 if ( convTest_->getStatus() ==
Passed ) {
2012 printer_->stream(
Debug) <<
"We converged during the prime the pump stage" << std::endl << std::endl;
2013 primeConverged = !(expConvTest_->getLOADetected());
2014 if ( expConvTest_->getLOADetected() ) {
2016 loaDetected_ =
true;
2017 printer_->stream(
Warnings) <<
"Belos::BlockGmresSolMgr::solve(): Warning! Solver has experienced a loss of accuracy!" << std::endl;
2023 else if( maxIterTest_->getStatus() ==
Passed ) {
2025 primeConverged =
false;
2031 printer_->stream(
Debug) <<
" We did not converge on priming cycle of Block GMRES. Therefore we recycle and restart. " << std::endl << std::endl;
2036 if (blockSize_ != 1) {
2037 printer_->stream(
Errors) <<
"Error! Caught std::exception in BlockGmresIter::iterate() at iteration "
2038 << block_gmres_iter->getNumIters() << std::endl
2039 << e.what() << std::endl;
2040 if (convTest_->getStatus() !=
Passed)
2041 primeConverged =
false;
2045 block_gmres_iter->updateLSQR( block_gmres_iter->getCurSubspaceDim() );
2047 sTest_->checkStatus( &*block_gmres_iter );
2048 if (convTest_->getStatus() !=
Passed)
2049 isConverged =
false;
2052 catch (
const std::exception &e) {
2053 printer_->stream(
Errors) <<
"Error! Caught std::exception in BlockGmresIter::iterate() at iteration "
2054 << block_gmres_iter->getNumIters() << std::endl
2055 << e.what() << std::endl;
2063 RCP<MV> update = block_gmres_iter->getCurrentUpdate();
2064 problem_->updateSolution( update,
true );
2068 problem_->computeCurrPrecResVec( &*R_ );
2071 newstate = block_gmres_iter->getState();
2074 H_->assign(*(newstate.
H));
2083 V_ = rcp_const_cast<MV>(newstate.
V);
2086 buildRecycleSpaceKryl(keff, block_gmres_iter);
2087 printer_->stream(
Debug) <<
"Generated recycled subspace using RHS index " << currIdx[0] <<
" of dimension " << keff << std::endl << std::endl;
2091 if (primeConverged) {
2112 problem_->setCurrLS();
2115 startPtr += numCurrRHS;
2116 numRHS2Solve -= numCurrRHS;
2117 if ( numRHS2Solve > 0 ) {
2118 numCurrRHS = ( numRHS2Solve < blockSize_) ? numRHS2Solve : blockSize_;
2119 if ( adaptiveBlockSize_ ) {
2120 blockSize_ = numCurrRHS;
2121 currIdx.resize( numCurrRHS );
2122 for (
int i=0; i<numCurrRHS; ++i) currIdx[i] = startPtr+i;
2125 currIdx.resize( blockSize_ );
2126 for (
int i=0; i<numCurrRHS; ++i) currIdx[i] = startPtr+i;
2127 for (
int i=numCurrRHS; i<blockSize_; ++i) currIdx[i] = -1;
2130 problem_->setLSIndex( currIdx );
2133 currIdx.resize( numRHS2Solve );
2144 blockgcrodrList.
set(
"Num Blocks",numBlocks_);
2145 blockgcrodrList.
set(
"Block Size",blockSize_);
2146 blockgcrodrList.
set(
"Recycled Blocks",keff);
2152 index.resize( blockSize_ );
2153 for(
int ii = 0; ii < blockSize_; ii++) index[ii] = ii;
2157 MVT::Assign(*R_,*V0);
2160 for(
int i=0; i < keff; i++){ index[i] = i;};
2162 H_ =
rcp(
new SDM(
Teuchos::View, *G_, (numBlocks_-1)*blockSize_ + blockSize_, (numBlocks_-1)*blockSize_, keff ,keff ));
2166 newstate.
U = MVT::CloneViewNonConst(*U_, index);
2167 newstate.
C = MVT::CloneViewNonConst(*C_, index);
2169 newstate.
curDim = blockSize_;
2170 block_gcrodr_iter -> initialize(newstate);
2172 int numRestarts = 0;
2176 block_gcrodr_iter -> iterate();
2181 if( convTest_->getStatus() ==
Passed ) {
2189 else if(maxIterTest_->getStatus() ==
Passed ){
2191 isConverged =
false;
2198 else if (block_gcrodr_iter->getCurSubspaceDim() == block_gcrodr_iter->getMaxSubspaceDim()){
2204 problem_->updateSolution(update,
true);
2205 buildRecycleSpaceAugKryl(block_gcrodr_iter);
2207 printer_->stream(
Debug) <<
" Generated new recycled subspace using RHS index " << currIdx[0] <<
" of dimension " << keff << std::endl << std::endl;
2209 if(numRestarts >= maxRestarts_) {
2210 isConverged =
false;
2216 printer_ -> stream(
Debug) <<
" Performing restart number " << numRestarts <<
" of " << maxRestarts_ << std::endl << std::endl;
2219 problem_->computeCurrPrecResVec( &*R_ );
2220 index.resize( blockSize_ );
2221 for (
int ii=0; ii<blockSize_; ++ii) index[ii] = ii;
2223 MVT::SetBlock(*R_,index,*V0);
2227 index.resize( numBlocks_*blockSize_ );
2228 for (
int ii=0; ii<(numBlocks_*blockSize_); ++ii) index[ii] = ii;
2229 restartState.
V = MVT::CloneViewNonConst( *V_, index );
2230 index.resize( keff );
2231 for (
int ii=0; ii<keff; ++ii) index[ii] = ii;
2232 restartState.
U = MVT::CloneViewNonConst( *U_, index );
2233 restartState.
C = MVT::CloneViewNonConst( *C_, index );
2235 H_ =
rcp(
new SDM(
Teuchos::View, *G_, numBlocks_*blockSize_, (numBlocks_-1)*blockSize_, keff ,keff ));
2236 restartState.
B = B_;
2237 restartState.
H = H_;
2238 restartState.
curDim = blockSize_;
2239 block_gcrodr_iter->initialize(restartState);
2248 TEUCHOS_TEST_FOR_EXCEPTION(
true,std::logic_error,
"Belos::BlockGCRODRSolMgr::solve(): Invalid return from BlockGCRODRIter::iterate().");
2254 block_gcrodr_iter->updateLSQR( block_gcrodr_iter->getCurSubspaceDim() );
2256 sTest_->checkStatus( &*block_gcrodr_iter );
2257 if (convTest_->getStatus() !=
Passed) isConverged =
false;
2260 catch(
const std::exception &e){
2261 printer_->stream(
Errors) <<
"Error! Caught exception in BlockGCRODRIter::iterate() at iteration "
2262 << block_gcrodr_iter->getNumIters() << std::endl
2263 << e.what() << std::endl;
2271 problem_->updateSolution( update,
true );
2290 problem_->setCurrLS();
2293 startPtr += numCurrRHS;
2294 numRHS2Solve -= numCurrRHS;
2295 if ( numRHS2Solve > 0 ) {
2296 numCurrRHS = ( numRHS2Solve < blockSize_) ? numRHS2Solve : blockSize_;
2297 if ( adaptiveBlockSize_ ) {
2298 blockSize_ = numCurrRHS;
2299 currIdx.resize( numCurrRHS );
2300 for (
int i=0; i<numCurrRHS; ++i) currIdx[i] = startPtr+i;
2303 currIdx.resize( blockSize_ );
2304 for (
int i=0; i<numCurrRHS; ++i) currIdx[i] = startPtr+i;
2305 for (
int i=numCurrRHS; i<blockSize_; ++i) currIdx[i] = -1;
2308 problem_->setLSIndex( currIdx );
2311 currIdx.resize( numRHS2Solve );
2315 if (!builtRecycleSpace_) {
2316 buildRecycleSpaceAugKryl(block_gcrodr_iter);
2317 printer_->stream(
Debug) <<
" Generated new recycled subspace using RHS index " << currIdx[0] <<
" of dimension " << keff << std::endl << std::endl;
2325 #ifdef BELOS_TEUCHOS_TIME_MONITOR
2331 numIters_ = maxIterTest_->getNumIters();
2334 const std::vector<MagnitudeType>* pTestValues = NULL;
2335 pTestValues = impConvTest_->getTestValue();
2337 "Belos::BlockGCRODRSolMgr::solve(): The implicit convergence test's "
2338 "getTestValue() method returned NULL. Please report this bug to the "
2339 "Belos developers.");
2341 "Belos::BlockGCRODRSolMgr::solve(): The implicit convergence test's "
2342 "getTestValue() method returned a vector of length zero. Please report "
2343 "this bug to the Belos developers.");
2347 achievedTol_ = *std::max_element (pTestValues->begin(), pTestValues->end());