52 #ifndef SACADO_ELRCACHEFAD_GENERALFAD_HPP
53 #define SACADO_ELRCACHEFAD_GENERALFAD_HPP
62 namespace ELRCacheFad {
70 template <
typename T,
typename Storage>
105 Storage(sz, x, zero_out) {}
129 template <
typename S>
135 const int sz = x.size();
138 this->
val() = x.val();
143 if (x.hasFastAccess())
144 for(
int i=0; i<sz; ++i)
147 for(
int i=0; i<sz; ++i)
152 if (x.hasFastAccess()) {
154 FastLocalAccumOp< Expr<S> > op(x);
157 for(op.i=0; op.i<sz; ++op.i) {
173 for(op.i=0; op.i<sz; ++op.i) {
201 void diff(
const int ith,
const int n) {
202 if (this->size() !=
n)
223 template <
typename S>
227 if (x.size() != this->size())
return false;
228 bool eq = IE::eval(x.val(), this->
val());
229 for (
int i=0; i<this->size(); i++)
230 eq = eq && IE::eval(x.dx(i), this->
dx(i));
259 if (is_const && this->size()!=0)
271 template <
typename S>
275 if (this->size()) this->resize(0);
284 Storage::operator=(x);
289 template <
typename S>
294 const int xsz = x.size();
296 if (xsz != this->size())
297 this->resizeAndZero(xsz);
299 const int sz = this->size();
308 if (Expr<S>::is_linear) {
309 if (x.hasFastAccess())
310 for(
int i=0; i<sz; ++i)
313 for(
int i=0; i<sz; ++i)
318 if (x.hasFastAccess()) {
320 FastLocalAccumOp< Expr<S> > op(x);
323 for(op.i=0; op.i<sz; ++op.i) {
336 SlowLocalAccumOp< Expr<S> > op(x);
339 for(op.i=0; op.i<sz; ++op.i) {
355 this->
val() = x.val();
368 template <
typename S>
376 template <
typename S>
384 template <
typename S>
387 const int sz = this->size();
389 for (
int i=0; i<sz; ++i)
395 template <
typename S>
398 const int sz = this->size();
400 for (
int i=0; i<sz; ++i)
408 const int xsz = x.size(), sz = this->size();
410 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
411 if ((xsz != sz) && (xsz != 0) && (sz != 0))
412 throw "Fad Error: Attempt to assign with incompatible sizes";
417 for (
int i=0; i<sz; ++i)
421 this->resizeAndZero(xsz);
422 for (
int i=0; i<xsz; ++i)
427 this->
val() += x.val();
435 const int xsz = x.size(), sz = this->size();
437 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
438 if ((xsz != sz) && (xsz != 0) && (sz != 0))
439 throw "Fad Error: Attempt to assign with incompatible sizes";
444 for(
int i=0; i<sz; ++i)
448 this->resizeAndZero(xsz);
449 for(
int i=0; i<xsz; ++i)
454 this->
val() -= x.val();
463 const int xsz = x.size(), sz = this->size();
467 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
468 if ((xsz != sz) && (xsz != 0) && (sz != 0))
469 throw "Fad Error: Attempt to assign with incompatible sizes";
474 for(
int i=0; i<sz; ++i)
478 this->resizeAndZero(xsz);
479 for(
int i=0; i<xsz; ++i)
485 for (
int i=0; i<sz; ++i)
498 const int xsz = x.size(), sz = this->size();
502 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
503 if ((xsz != sz) && (xsz != 0) && (sz != 0))
504 throw "Fad Error: Attempt to assign with incompatible sizes";
509 for(
int i=0; i<sz; ++i)
511 ( this->
fastAccessDx(i)*xval - v*x.fastAccessDx(i) )/ (xval*xval);
514 this->resizeAndZero(xsz);
515 for(
int i=0; i<xsz; ++i)
516 this->
fastAccessDx(i) = - v*x.fastAccessDx(i) / (xval*xval);
521 for (
int i=0; i<sz; ++i)
532 template <
typename S>
537 const int xsz = x.size(), sz = this->size();
539 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
540 if ((xsz != sz) && (xsz != 0) && (sz != 0))
541 throw "Fad Error: Attempt to assign with incompatible sizes";
544 if (Expr<S>::is_linear) {
547 if (x.hasFastAccess())
548 for (
int i=0; i<sz; ++i)
551 for (
int i=0; i<sz; ++i)
555 this->resizeAndZero(xsz);
556 if (x.hasFastAccess())
557 for (
int i=0; i<xsz; ++i)
560 for (
int i=0; i<xsz; ++i)
570 this->resizeAndZero(xsz);
572 if (x.hasFastAccess()) {
574 FastLocalAccumOp< Expr<S> > op(x);
577 for(op.i=0; op.i<xsz; ++op.i) {
590 SlowLocalAccumOp< Expr<S> > op(x);
593 for(op.i=0; op.i<xsz; ++op.i) {
609 this->
val() += x.val();
615 template <
typename S>
620 const int xsz = x.size(), sz = this->size();
622 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
623 if ((xsz != sz) && (xsz != 0) && (sz != 0))
624 throw "Fad Error: Attempt to assign with incompatible sizes";
627 if (Expr<S>::is_linear) {
630 if (x.hasFastAccess())
631 for(
int i=0; i<sz; ++i)
634 for (
int i=0; i<sz; ++i)
638 this->resizeAndZero(xsz);
639 if (x.hasFastAccess())
640 for(
int i=0; i<xsz; ++i)
643 for (
int i=0; i<xsz; ++i)
653 this->resizeAndZero(xsz);
655 if (x.hasFastAccess()) {
657 FastLocalAccumOp< Expr<S> > op(x);
660 for(op.i=0; op.i<xsz; ++op.i) {
673 SlowLocalAccumOp< Expr<S> > op(x);
676 for(op.i=0; op.i<xsz; ++op.i) {
692 this->
val() -= x.val();
698 template <
typename S>
703 const int xsz = x.size(), sz = this->size();
707 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
708 if ((xsz != sz) && (xsz != 0) && (sz != 0))
709 throw "Fad Error: Attempt to assign with incompatible sizes";
712 if (Expr<S>::is_linear) {
715 if (x.hasFastAccess())
716 for(
int i=0; i<sz; ++i)
719 for (
int i=0; i<sz; ++i)
723 this->resizeAndZero(xsz);
724 if (x.hasFastAccess())
725 for(
int i=0; i<xsz; ++i)
728 for (
int i=0; i<xsz; ++i)
734 for (
int i=0; i<sz; ++i)
745 if (x.hasFastAccess()) {
747 FastLocalAccumOp< Expr<S> > op(x);
750 for(op.i=0; op.i<xsz; ++op.i) {
764 SlowLocalAccumOp< Expr<S> > op(x);
767 for(op.i=0; op.i<xsz; ++op.i) {
784 this->resizeAndZero(xsz);
786 if (x.hasFastAccess()) {
788 FastLocalAccumOp< Expr<S> > op(x);
791 for(op.i=0; op.i<xsz; ++op.i) {
804 SlowLocalAccumOp< Expr<S> > op(x);
807 for(op.i=0; op.i<xsz; ++op.i) {
826 for (
int i=0; i<sz; ++i)
840 template <
typename S>
845 const int xsz = x.size(), sz = this->size();
849 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
850 if ((xsz != sz) && (xsz != 0) && (sz != 0))
851 throw "Fad Error: Attempt to assign with incompatible sizes";
854 if (Expr<S>::is_linear) {
857 if (x.hasFastAccess())
858 for(
int i=0; i<sz; ++i)
861 for (
int i=0; i<sz; ++i)
865 this->resizeAndZero(xsz);
866 if (x.hasFastAccess())
867 for(
int i=0; i<xsz; ++i)
868 this->
fastAccessDx(i) = - v*x.fastAccessDx(i) / (xval*xval);
870 for (
int i=0; i<xsz; ++i)
876 for (
int i=0; i<sz; ++i)
889 if (x.hasFastAccess()) {
891 FastLocalAccumOp< Expr<S> > op(x);
894 for(op.i=0; op.i<xsz; ++op.i) {
908 SlowLocalAccumOp< Expr<S> > op(x);
911 for(op.i=0; op.i<xsz; ++op.i) {
928 this->resizeAndZero(xsz);
930 if (x.hasFastAccess()) {
932 FastLocalAccumOp< Expr<S> > op(x);
935 for(op.i=0; op.i<xsz; ++op.i) {
948 SlowLocalAccumOp< Expr<S> > op(x);
951 for(op.i=0; op.i<xsz; ++op.i) {
970 for (
int i=0; i<sz; ++i)
994 template <
typename ExprT>
995 struct FastLocalAccumOp {
996 typedef typename ExprT::value_type
value_type;
997 static const int N = ExprT::num_args;
1003 FastLocalAccumOp(
const ExprT& x_) : x(x_) {
1006 template <
typename ArgT>
1008 void operator () (ArgT arg)
const {
1009 const int Arg = ArgT::value;
1010 t += partials[Arg] * x.template getTangent<Arg>(i);
1014 template <
typename ExprT>
1018 FastLocalAccumOp<ExprT>(x_) {}
1019 template <
typename ArgT>
1022 const int Arg = ArgT::value;
1023 if (this->x.template isActive<Arg>())
1024 this->t += this->partials[Arg] * this->x.template getTangent<Arg>(this->i);
1031 template <
typename T,
typename Storage>
1034 os << x.val() <<
" [";
1036 for (
int i=0; i< x.size(); i++) {
1037 os <<
" " << x.dx(i);
1048 #endif // SACADO_ELRCACHEFAD_GENERALFAD_HPP