42 #ifndef BELOS_BLOCK_GMRES_ITER_HPP
43 #define BELOS_BLOCK_GMRES_ITER_HPP
82 template<
class ScalarType,
class MV,
class OP>
259 void setSize(
int blockSize,
int numBlocks);
339 template<
class ScalarType,
class MV,
class OP>
352 stateStorageInitialized_(false),
353 keepHessenberg_(false),
354 initHessenberg_(false),
366 "Belos::BlockGmresIter::constructor: mandatory parameter 'Num Blocks' is not specified.");
367 int nb = Teuchos::getParameter<int>(params,
"Num Blocks");
370 int bs = params.
get(
"Block Size", 1);
376 template <
class ScalarType,
class MV,
class OP>
382 TEUCHOS_TEST_FOR_EXCEPTION(numBlocks <= 0 || blockSize <= 0, std::invalid_argument,
"Belos::BlockGmresIter::setSize was passed a non-positive argument.");
383 if (blockSize == blockSize_ && numBlocks == numBlocks_) {
388 if (blockSize!=blockSize_ || numBlocks!=numBlocks_)
389 stateStorageInitialized_ =
false;
391 blockSize_ = blockSize;
392 numBlocks_ = numBlocks;
394 initialized_ =
false;
404 template <
class ScalarType,
class MV,
class OP>
407 if (!stateStorageInitialized_) {
412 if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
413 stateStorageInitialized_ =
false;
421 int newsd = blockSize_*(numBlocks_+1);
428 beta.resize( newsd );
432 TEUCHOS_TEST_FOR_EXCEPTION(blockSize_*static_cast<ptrdiff_t>(numBlocks_) > MVT::GetGlobalLength(*rhsMV),std::invalid_argument,
433 "Belos::BlockGmresIter::setStateSize(): Cannot generate a Krylov basis with dimension larger the operator!");
436 if (V_ == Teuchos::null) {
440 "Belos::BlockGmresIter::setStateSize(): linear problem does not specify multivectors to clone from.");
441 V_ = MVT::Clone( *tmp, newsd );
445 if (MVT::GetNumberVecs(*V_) < newsd) {
447 V_ = MVT::Clone( *tmp, newsd );
452 if (R_ == Teuchos::null) {
455 if (initHessenberg_) {
456 R_->shape( newsd, newsd-blockSize_ );
459 if (R_->numRows() < newsd || R_->numCols() < newsd-blockSize_) {
460 R_->shapeUninitialized( newsd, newsd-blockSize_ );
465 if (keepHessenberg_) {
466 if (H_ == Teuchos::null) {
469 if (initHessenberg_) {
470 H_->shape( newsd, newsd-blockSize_ );
473 if (H_->numRows() < newsd || H_->numCols() < newsd-blockSize_) {
474 H_->shapeUninitialized( newsd, newsd-blockSize_ );
484 if (z_ == Teuchos::null) {
487 if (z_-> numRows() < newsd || z_->numCols() < blockSize_) {
488 z_->shapeUninitialized( newsd, blockSize_ );
492 stateStorageInitialized_ =
true;
499 template <
class ScalarType,
class MV,
class OP>
508 return currentUpdate;
513 currentUpdate = MVT::Clone( *V_, blockSize_ );
527 std::vector<int> index(curDim_);
528 for (
int i=0; i<curDim_; i++ ) {
532 MVT::MvTimesMatAddMv( one, *Vjp1, y, zero, *currentUpdate );
534 return currentUpdate;
541 template <
class ScalarType,
class MV,
class OP>
547 if ( norms && (
int)norms->size() < blockSize_ )
548 norms->resize( blockSize_ );
552 for (
int j=0; j<blockSize_; j++) {
553 (*norms)[j] = blas.
NRM2( blockSize_, &(*z_)(curDim_, j), 1);
556 return Teuchos::null;
563 template <
class ScalarType,
class MV,
class OP>
567 if (!stateStorageInitialized_)
571 "Belos::BlockGmresIter::initialize(): Cannot initialize state storage!");
579 std::string errstr(
"Belos::BlockGmresIter::initialize(): Specified multivectors must have a consistent length and width.");
581 if (newstate.
V != Teuchos::null && newstate.
z != Teuchos::null) {
586 std::invalid_argument, errstr );
588 std::invalid_argument, errstr );
590 std::invalid_argument, errstr );
592 curDim_ = newstate.
curDim;
593 int lclDim = MVT::GetNumberVecs(*newstate.
V);
600 if (newstate.
V != V_) {
602 if (curDim_ == 0 && lclDim > blockSize_) {
603 om_->stream(
Warnings) <<
"Belos::BlockGmresIter::initialize(): the solver was initialized with a kernel of " << lclDim << std::endl
604 <<
"The block size however is only " << blockSize_ << std::endl
605 <<
"The last " << lclDim - blockSize_ <<
" vectors will be discarded." << std::endl;
607 std::vector<int> nevind(curDim_+blockSize_);
608 for (
int i=0; i<curDim_+blockSize_; i++) nevind[i] = i;
611 MVT::MvAddMv( one, *newV, zero, *newV, *lclV );
614 lclV = Teuchos::null;
618 if (newstate.
z != z_) {
626 lclZ = Teuchos::null;
633 "Belos::BlockGmresIter::initialize(): BlockGmresStateIterState does not have initial kernel V_0.");
636 "Belos::BlockGmresIter::initialize(): BlockGmresStateIterState does not have initial norms z_0.");
658 template <
class ScalarType,
class MV,
class OP>
664 if (initialized_ ==
false) {
669 int searchDim = blockSize_*numBlocks_;
676 while (stest_->checkStatus(
this) !=
Passed && curDim_+blockSize_ <= searchDim) {
681 int lclDim = curDim_ + blockSize_;
684 std::vector<int> curind(blockSize_);
685 for (
int i=0; i<blockSize_; i++) { curind[i] = lclDim + i; }
690 for (
int i=0; i<blockSize_; i++) { curind[i] = curDim_ + i; }
694 lp_->apply(*Vprev,*Vnext);
695 Vprev = Teuchos::null;
699 std::vector<int> prevind(lclDim);
700 for (
int i=0; i<lclDim; i++) { prevind[i] = i; }
701 Vprev = MVT::CloneView(*V_,prevind);
714 (
Teuchos::View,*H_,blockSize_,blockSize_,lclDim,curDim_ ) );
716 int rank = ortho_->projectAndNormalize(*Vnext,AsubH,subH2,AVprev);
720 if (keepHessenberg_) {
730 (
Teuchos::View,*R_,blockSize_,blockSize_,lclDim,curDim_ ) );
731 subR2->assign(*subH2);
735 "Belos::BlockGmresIter::iterate(): couldn't generate basis of full rank.");
745 Vnext = Teuchos::null;
746 curDim_ += blockSize_;
769 template<
class ScalarType,
class MV,
class OP>
773 ScalarType sigma, mu, vscale, maxelem;
779 int curDim = curDim_;
780 if (dim >= curDim_ && dim < getMaxSubspaceDim()) {
789 if (blockSize_ == 1) {
793 for (i=0; i<curDim; i++) {
797 blas.
ROT( 1, &(*R_)(i,curDim), 1, &(*R_)(i+1, curDim), 1, &cs[i], &sn[i] );
802 blas.
ROTG( &(*R_)(curDim,curDim), &(*R_)(curDim+1,curDim), &cs[curDim], &sn[curDim] );
803 (*R_)(curDim+1,curDim) = zero;
807 blas.
ROT( 1, &(*z_)(curDim,0), 1, &(*z_)(curDim+1,0), 1, &cs[curDim], &sn[curDim] );
813 for (j=0; j<blockSize_; j++) {
817 for (i=0; i<curDim+j; i++) {
818 sigma = blas.
DOT( blockSize_, &(*R_)(i+1,i), 1, &(*R_)(i+1,curDim+j), 1);
819 sigma += (*R_)(i,curDim+j);
821 blas.
AXPY(blockSize_, ScalarType(-sigma), &(*R_)(i+1,i), 1, &(*R_)(i+1,curDim+j), 1);
822 (*R_)(i,curDim+j) -= sigma;
827 maxidx = blas.
IAMAX( blockSize_+1, &(*R_)(curDim+j,curDim+j), 1 );
828 maxelem = (*R_)(curDim+j+maxidx-1,curDim+j);
829 for (i=0; i<blockSize_+1; i++)
830 (*R_)(curDim+j+i,curDim+j) /= maxelem;
831 sigma = blas.
DOT( blockSize_, &(*R_)(curDim+j+1,curDim+j), 1,
832 &(*R_)(curDim+j+1,curDim+j), 1 );
834 beta[curDim + j] = zero;
839 vscale = (*R_)(curDim+j,curDim+j) - mu;
841 vscale = -sigma / ((*R_)(curDim+j,curDim+j) + mu);
843 beta[curDim+j] = Teuchos::as<ScalarType>(2)*one*vscale*vscale/(sigma + vscale*vscale);
844 (*R_)(curDim+j,curDim+j) = maxelem*mu;
845 for (i=0; i<blockSize_; i++)
846 (*R_)(curDim+j+1+i,curDim+j) /= vscale;
851 for (i=0; i<blockSize_; i++) {
852 sigma = blas.
DOT( blockSize_, &(*R_)(curDim+j+1,curDim+j),
853 1, &(*z_)(curDim+j+1,i), 1);
854 sigma += (*z_)(curDim+j,i);
855 sigma *= beta[curDim+j];
856 blas.
AXPY(blockSize_, ScalarType(-sigma), &(*R_)(curDim+j+1,curDim+j),
857 1, &(*z_)(curDim+j+1,i), 1);
858 (*z_)(curDim+j,i) -= sigma;
864 if (dim >= curDim_ && dim < getMaxSubspaceDim()) {
865 curDim_ = dim + blockSize_;