42 #ifndef BELOS_BLOCK_FGMRES_ITER_HPP
43 #define BELOS_BLOCK_FGMRES_ITER_HPP
82 template<
class ScalarType,
class MV,
class OP>
226 if (!initialized_)
return 0;
260 void setSize(
int blockSize,
int numBlocks);
307 bool stateStorageInitialized_;
332 template<
class ScalarType,
class MV,
class OP>
346 stateStorageInitialized_(false),
352 ! params.
isParameter (
"Num Blocks"), std::invalid_argument,
353 "Belos::BlockFGmresIter::constructor: mandatory parameter 'Num Blocks' is not specified.");
354 const int nb = params.
get<
int> (
"Num Blocks");
357 const int bs = params.
get (
"Block Size", 1);
363 template <
class ScalarType,
class MV,
class OP>
369 TEUCHOS_TEST_FOR_EXCEPTION(numBlocks <= 0 || blockSize <= 0, std::invalid_argument,
"Belos::BlockFGmresIter::setSize was passed a non-positive argument.");
370 if (blockSize == blockSize_ && numBlocks == numBlocks_) {
375 if (blockSize!=blockSize_ || numBlocks!=numBlocks_)
376 stateStorageInitialized_ =
false;
378 blockSize_ = blockSize;
379 numBlocks_ = numBlocks;
381 initialized_ =
false;
391 template <
class ScalarType,
class MV,
class OP>
398 if (! stateStorageInitialized_) {
400 RCP<const MV> lhsMV = lp_->getLHS();
401 RCP<const MV> rhsMV = lp_->getRHS();
402 if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
403 stateStorageInitialized_ =
false;
410 int newsd = blockSize_*(numBlocks_+1);
422 blockSize_ * static_cast<ptrdiff_t> (numBlocks_) > MVT::GetGlobalLength (*rhsMV),
423 std::invalid_argument,
"Belos::BlockFGmresIter::setStateSize(): "
424 "Cannot generate a Krylov basis with dimension larger the operator!");
427 if (V_ == Teuchos::null) {
429 RCP<const MV> tmp = (rhsMV != Teuchos::null) ? rhsMV : lhsMV;
431 tmp == Teuchos::null, std::invalid_argument,
432 "Belos::BlockFGmresIter::setStateSize(): "
433 "linear problem does not specify multivectors to clone from.");
434 V_ = MVT::Clone (*tmp, newsd);
438 if (MVT::GetNumberVecs (*V_) < newsd) {
439 RCP<const MV> tmp = V_;
440 V_ = MVT::Clone (*tmp, newsd);
444 if (Z_ == Teuchos::null) {
446 RCP<const MV> tmp = (rhsMV != Teuchos::null) ? rhsMV : lhsMV;
448 tmp == Teuchos::null, std::invalid_argument,
449 "Belos::BlockFGmresIter::setStateSize(): "
450 "linear problem does not specify multivectors to clone from.");
451 Z_ = MVT::Clone (*tmp, newsd);
455 if (MVT::GetNumberVecs (*Z_) < newsd) {
456 RCP<const MV> tmp = Z_;
457 Z_ = MVT::Clone (*tmp, newsd);
462 if (H_ == Teuchos::null) {
463 H_ =
rcp (
new SDM (newsd, newsd-blockSize_));
466 H_->shapeUninitialized (newsd, newsd - blockSize_);
472 if (z_ == Teuchos::null) {
473 z_ =
rcp (
new SDM (newsd, blockSize_));
476 z_->shapeUninitialized (newsd, blockSize_);
480 stateStorageInitialized_ =
true;
486 template <
class ScalarType,
class MV,
class OP>
496 return currentUpdate;
503 currentUpdate = MVT::Clone (*Z_, blockSize_);
511 H_->values (), H_->stride (), y.values (), y.stride ());
514 std::vector<int> index (curDim_);
515 for (
int i = 0; i < curDim_; ++i) {
519 MVT::MvTimesMatAddMv (one, *Zjp1, y, zero, *currentUpdate);
521 return currentUpdate;
525 template <
class ScalarType,
class MV,
class OP>
531 if (norms != NULL && (
int)norms->size() < blockSize_) {
532 norms->resize (blockSize_);
537 for (
int j = 0; j < blockSize_; ++j) {
538 (*norms)[j] = blas.
NRM2 (blockSize_, &(*z_)(curDim_, j), 1);
543 return Teuchos::null;
547 template <
class ScalarType,
class MV,
class OP>
556 const ScalarType ZERO = STS::zero ();
557 const ScalarType ONE = STS::one ();
560 if (! stateStorageInitialized_) {
565 ! stateStorageInitialized_, std::invalid_argument,
566 "Belos::BlockFGmresIter::initialize(): Cannot initialize state storage!");
571 const char errstr[] =
"Belos::BlockFGmresIter::initialize(): The given "
572 "multivectors must have a consistent length and width.";
579 MVT::GetGlobalLength(*newstate.
V) != MVT::GetGlobalLength(*V_),
580 std::invalid_argument, errstr );
582 MVT::GetNumberVecs(*newstate.
V) < blockSize_,
583 std::invalid_argument, errstr );
585 newstate.
curDim > blockSize_*(numBlocks_+1),
586 std::invalid_argument, errstr );
588 curDim_ = newstate.
curDim;
589 const int lclDim = MVT::GetNumberVecs(*newstate.
V);
594 std::invalid_argument, errstr);
597 if (newstate.
V != V_) {
599 if (curDim_ == 0 && lclDim > blockSize_) {
600 std::ostream& warn = om_->stream (
Warnings);
601 warn <<
"Belos::BlockFGmresIter::initialize(): the solver was "
602 <<
"initialized with a kernel of " << lclDim << endl
603 <<
"The block size however is only " << blockSize_ << endl
604 <<
"The last " << lclDim - blockSize_
605 <<
" vectors will be discarded." << endl;
607 std::vector<int> nevind (curDim_ + blockSize_);
608 for (
int i = 0; i < curDim_ + blockSize_; ++i) {
611 RCP<const MV> newV = MVT::CloneView (*newstate.
V, nevind);
612 RCP<MV> lclV = MVT::CloneViewNonConst (*V_, nevind);
613 MVT::MvAddMv (ONE, *newV, ZERO, *newV, *lclV);
616 lclV = Teuchos::null;
620 if (newstate.
z != z_) {
622 SDM newZ (
Teuchos::View, *newstate.
z, curDim_ + blockSize_, blockSize_);
624 lclz =
rcp (
new SDM (
Teuchos::View, *z_, curDim_ + blockSize_, blockSize_));
626 lclz = Teuchos::null;
631 newstate.
V == Teuchos::null,std::invalid_argument,
632 "Belos::BlockFGmresIter::initialize(): BlockFGmresStateIterState does not have initial kernel V_0.");
635 newstate.
z == Teuchos::null,std::invalid_argument,
636 "Belos::BlockFGmresIter::initialize(): BlockFGmresStateIterState does not have initial norms z_0.");
644 template <
class ScalarType,
class MV,
class OP>
655 if (initialized_ ==
false) {
660 const int searchDim = blockSize_ * numBlocks_;
664 while (stest_->checkStatus (
this) !=
Passed && curDim_+blockSize_ <= searchDim) {
668 const int lclDim = curDim_ + blockSize_;
671 std::vector<int> curind (blockSize_);
672 for (
int i = 0; i < blockSize_; ++i) {
673 curind[i] = lclDim + i;
675 RCP<MV> Vnext = MVT::CloneViewNonConst (*V_, curind);
679 for (
int i = 0; i < blockSize_; ++i) {
680 curind[i] = curDim_ + i;
682 RCP<const MV> Vprev = MVT::CloneView (*V_, curind);
683 RCP<MV> Znext = MVT::CloneViewNonConst (*Z_, curind);
686 lp_->applyRightPrec (*Vprev, *Znext);
690 lp_->applyOp (*Znext, *Vnext);
695 std::vector<int> prevind (lclDim);
696 for (
int i = 0; i < lclDim; ++i) {
699 Vprev = MVT::CloneView (*V_, prevind);
700 Array<RCP<const MV> > AVprev (1, Vprev);
703 RCP<SDM> subH =
rcp (
new SDM (
View, *H_, lclDim, blockSize_, 0, curDim_));
704 Array<RCP<SDM> > AsubH;
708 RCP<SDM> subR =
rcp (
new SDM (
View, *H_, blockSize_, blockSize_, lclDim, curDim_));
709 const int rank = ortho_->projectAndNormalize (*Vnext, AsubH, subR, AVprev);
712 "Belos::BlockFGmresIter::iterate(): After orthogonalization, the new "
713 "basis block does not have full rank. It contains " << blockSize_
714 <<
" vector" << (blockSize_ != 1 ?
"s" :
"")
715 <<
", but its rank is " << rank <<
".");
727 curDim_ += blockSize_;
732 template<
class ScalarType,
class MV,
class OP>
738 const ScalarType zero = STS::zero ();
739 const ScalarType two = (STS::one () + STS::one());
740 ScalarType sigma, mu, vscale, maxelem;
747 int curDim = curDim_;
748 if (dim >= curDim_ && dim < getMaxSubspaceDim ()) {
757 if (blockSize_ == 1) {
759 for (
int i = 0; i < curDim; ++i) {
761 blas.
ROT (1, &(*H_)(i, curDim), 1, &(*H_)(i+1, curDim), 1, &cs[i], &sn[i]);
764 blas.
ROTG (&(*H_)(curDim, curDim), &(*H_)(curDim+1, curDim), &cs[curDim], &sn[curDim]);
765 (*H_)(curDim+1, curDim) = zero;
768 blas.
ROT (1, &(*z_)(curDim,0), 1, &(*z_)(curDim+1,0), 1, &cs[curDim], &sn[curDim]);
772 for (
int j = 0; j < blockSize_; ++j) {
774 for (
int i = 0; i < curDim + j; ++i) {
775 sigma = blas.
DOT (blockSize_, &(*H_)(i+1,i), 1, &(*H_)(i+1,curDim+j), 1);
776 sigma += (*H_)(i,curDim+j);
778 blas.
AXPY (blockSize_, ScalarType(-sigma), &(*H_)(i+1,i), 1, &(*H_)(i+1,curDim+j), 1);
779 (*H_)(i,curDim+j) -= sigma;
783 const int maxidx = blas.
IAMAX (blockSize_+1, &(*H_)(curDim+j,curDim+j), 1);
784 maxelem = (*H_)(curDim + j + maxidx - 1, curDim + j);
785 for (
int i = 0; i < blockSize_ + 1; ++i) {
786 (*H_)(curDim+j+i,curDim+j) /= maxelem;
788 sigma = blas.
DOT (blockSize_, &(*H_)(curDim + j + 1, curDim + j), 1,
789 &(*H_)(curDim + j + 1, curDim + j), 1);
791 beta[curDim + j] = zero;
793 mu = STS::squareroot ((*H_)(curDim+j,curDim+j)*(*H_)(curDim+j,curDim+j)+sigma);
794 if (STS::real ((*H_)(curDim + j, curDim + j)) < STM::zero ()) {
795 vscale = (*H_)(curDim+j,curDim+j) - mu;
797 vscale = -sigma / ((*H_)(curDim+j, curDim+j) + mu);
799 beta[curDim+j] = two * vscale * vscale / (sigma + vscale*vscale);
800 (*H_)(curDim+j, curDim+j) = maxelem*mu;
801 for (
int i = 0; i < blockSize_; ++i) {
802 (*H_)(curDim+j+1+i,curDim+j) /= vscale;
807 for (
int i = 0; i < blockSize_; ++i) {
808 sigma = blas.
DOT (blockSize_, &(*H_)(curDim+j+1,curDim+j),
809 1, &(*z_)(curDim+j+1,i), 1);
810 sigma += (*z_)(curDim+j,i);
811 sigma *= beta[curDim+j];
812 blas.
AXPY (blockSize_, ScalarType(-sigma), &(*H_)(curDim+j+1,curDim+j),
813 1, &(*z_)(curDim+j+1,i), 1);
814 (*z_)(curDim+j,i) -= sigma;
820 if (dim >= curDim_ && dim < getMaxSubspaceDim ()) {
821 curDim_ = dim + blockSize_;