52 #ifndef SACADO_ELRFAD_GENERALFAD_HPP
53 #define SACADO_ELRFAD_GENERALFAD_HPP
72 template <
typename T,
typename Storage>
107 Storage(sz, x, zero_out) {}
131 template <
typename S>
135 const int sz = x.size();
139 if (x.hasFastAccess())
140 for(
int i=0; i<sz; ++i)
143 for(
int i=0; i<sz; ++i)
148 if (x.hasFastAccess()) {
150 FastLocalAccumOp< Expr<S> > op(x);
153 for(op.i=0; op.i<sz; ++op.i) {
169 for(op.i=0; op.i<sz; ++op.i) {
186 this->
val() = x.val();
201 void diff(
const int ith,
const int n) {
202 if (this->size() !=
n)
223 template <
typename S>
227 if (x.size() != this->size())
return false;
228 bool eq = IE::eval(x.val(), this->
val());
229 for (
int i=0; i<this->size(); i++)
230 eq = eq && IE::eval(x.dx(i), this->
dx(i));
259 if (is_const && this->size()!=0)
271 template <
typename S>
275 if (this->size()) this->resize(0);
284 Storage::operator=(x);
289 template <
typename S>
292 const int xsz = x.size();
293 if (xsz != this->size())
294 this->resizeAndZero(xsz);
296 const int sz = this->size();
305 if (Expr<S>::is_linear) {
306 if (x.hasFastAccess())
307 for(
int i=0; i<sz; ++i)
310 for(
int i=0; i<sz; ++i)
315 if (x.hasFastAccess()) {
317 FastLocalAccumOp< Expr<S> > op(x);
320 for(op.i=0; op.i<sz; ++op.i) {
333 SlowLocalAccumOp< Expr<S> > op(x);
336 for(op.i=0; op.i<sz; ++op.i) {
351 this->
val() = x.val();
364 template <
typename S>
372 template <
typename S>
380 template <
typename S>
383 const int sz = this->size();
385 for (
int i=0; i<sz; ++i)
391 template <
typename S>
394 const int sz = this->size();
396 for (
int i=0; i<sz; ++i)
404 const int xsz = x.size(), sz = this->size();
406 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
407 if ((xsz != sz) && (xsz != 0) && (sz != 0))
408 throw "Fad Error: Attempt to assign with incompatible sizes";
413 for (
int i=0; i<sz; ++i)
417 this->resizeAndZero(xsz);
418 for (
int i=0; i<xsz; ++i)
423 this->
val() += x.val();
431 const int xsz = x.size(), sz = this->size();
433 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
434 if ((xsz != sz) && (xsz != 0) && (sz != 0))
435 throw "Fad Error: Attempt to assign with incompatible sizes";
440 for(
int i=0; i<sz; ++i)
444 this->resizeAndZero(xsz);
445 for(
int i=0; i<xsz; ++i)
450 this->
val() -= x.val();
459 const int xsz = x.size(), sz = this->size();
463 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
464 if ((xsz != sz) && (xsz != 0) && (sz != 0))
465 throw "Fad Error: Attempt to assign with incompatible sizes";
470 for(
int i=0; i<sz; ++i)
474 this->resizeAndZero(xsz);
475 for(
int i=0; i<xsz; ++i)
481 for (
int i=0; i<sz; ++i)
494 const int xsz = x.size(), sz = this->size();
498 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
499 if ((xsz != sz) && (xsz != 0) && (sz != 0))
500 throw "Fad Error: Attempt to assign with incompatible sizes";
505 for(
int i=0; i<sz; ++i)
507 ( this->
fastAccessDx(i)*xval - v*x.fastAccessDx(i) )/ (xval*xval);
510 this->resizeAndZero(xsz);
511 for(
int i=0; i<xsz; ++i)
512 this->
fastAccessDx(i) = - v*x.fastAccessDx(i) / (xval*xval);
517 for (
int i=0; i<sz; ++i)
528 template <
typename S>
531 const int xsz = x.size(), sz = this->size();
533 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
534 if ((xsz != sz) && (xsz != 0) && (sz != 0))
535 throw "Fad Error: Attempt to assign with incompatible sizes";
538 if (Expr<S>::is_linear) {
541 if (x.hasFastAccess())
542 for (
int i=0; i<sz; ++i)
545 for (
int i=0; i<sz; ++i)
549 this->resizeAndZero(xsz);
550 if (x.hasFastAccess())
551 for (
int i=0; i<xsz; ++i)
554 for (
int i=0; i<xsz; ++i)
564 this->resizeAndZero(xsz);
566 if (x.hasFastAccess()) {
568 FastLocalAccumOp< Expr<S> > op(x);
571 for(op.i=0; op.i<xsz; ++op.i) {
584 SlowLocalAccumOp< Expr<S> > op(x);
587 for(op.i=0; op.i<xsz; ++op.i) {
604 this->
val() += x.val();
610 template <
typename S>
613 const int xsz = x.size(), sz = this->size();
615 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
616 if ((xsz != sz) && (xsz != 0) && (sz != 0))
617 throw "Fad Error: Attempt to assign with incompatible sizes";
620 if (Expr<S>::is_linear) {
623 if (x.hasFastAccess())
624 for(
int i=0; i<sz; ++i)
627 for (
int i=0; i<sz; ++i)
631 this->resizeAndZero(xsz);
632 if (x.hasFastAccess())
633 for(
int i=0; i<xsz; ++i)
636 for (
int i=0; i<xsz; ++i)
646 this->resizeAndZero(xsz);
648 if (x.hasFastAccess()) {
650 FastLocalAccumOp< Expr<S> > op(x);
653 for(op.i=0; op.i<xsz; ++op.i) {
666 SlowLocalAccumOp< Expr<S> > op(x);
669 for(op.i=0; op.i<xsz; ++op.i) {
684 this->
val() -= x.val();
690 template <
typename S>
693 const int xsz = x.size(), sz = this->size();
697 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
698 if ((xsz != sz) && (xsz != 0) && (sz != 0))
699 throw "Fad Error: Attempt to assign with incompatible sizes";
702 if (Expr<S>::is_linear) {
705 if (x.hasFastAccess())
706 for(
int i=0; i<sz; ++i)
709 for (
int i=0; i<sz; ++i)
713 this->resizeAndZero(xsz);
714 if (x.hasFastAccess())
715 for(
int i=0; i<xsz; ++i)
718 for (
int i=0; i<xsz; ++i)
724 for (
int i=0; i<sz; ++i)
735 if (x.hasFastAccess()) {
737 FastLocalAccumOp< Expr<S> > op(x);
740 for(op.i=0; op.i<xsz; ++op.i) {
754 SlowLocalAccumOp< Expr<S> > op(x);
757 for(op.i=0; op.i<xsz; ++op.i) {
774 this->resizeAndZero(xsz);
776 if (x.hasFastAccess()) {
778 FastLocalAccumOp< Expr<S> > op(x);
781 for(op.i=0; op.i<xsz; ++op.i) {
794 SlowLocalAccumOp< Expr<S> > op(x);
797 for(op.i=0; op.i<xsz; ++op.i) {
816 for (
int i=0; i<sz; ++i)
830 template <
typename S>
833 const int xsz = x.size(), sz = this->size();
837 #if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
838 if ((xsz != sz) && (xsz != 0) && (sz != 0))
839 throw "Fad Error: Attempt to assign with incompatible sizes";
842 if (Expr<S>::is_linear) {
845 if (x.hasFastAccess())
846 for(
int i=0; i<sz; ++i)
849 for (
int i=0; i<sz; ++i)
853 this->resizeAndZero(xsz);
854 if (x.hasFastAccess())
855 for(
int i=0; i<xsz; ++i)
856 this->
fastAccessDx(i) = - v*x.fastAccessDx(i) / (xval*xval);
858 for (
int i=0; i<xsz; ++i)
864 for (
int i=0; i<sz; ++i)
877 if (x.hasFastAccess()) {
879 FastLocalAccumOp< Expr<S> > op(x);
882 for(op.i=0; op.i<xsz; ++op.i) {
896 SlowLocalAccumOp< Expr<S> > op(x);
899 for(op.i=0; op.i<xsz; ++op.i) {
916 this->resizeAndZero(xsz);
918 if (x.hasFastAccess()) {
920 FastLocalAccumOp< Expr<S> > op(x);
923 for(op.i=0; op.i<xsz; ++op.i) {
936 SlowLocalAccumOp< Expr<S> > op(x);
939 for(op.i=0; op.i<xsz; ++op.i) {
958 for (
int i=0; i<sz; ++i)
982 template <
typename ExprT>
983 struct FastLocalAccumOp {
984 typedef typename ExprT::value_type
value_type;
985 static const int N = ExprT::num_args;
991 FastLocalAccumOp(
const ExprT& x_) : x(x_) {
994 template <
typename ArgT>
996 void operator () (ArgT arg)
const {
997 const int Arg = ArgT::value;
998 t += partials[Arg] * x.template getTangent<Arg>(i);
1002 template <
typename ExprT>
1006 FastLocalAccumOp<ExprT>(x_) {}
1007 template <
typename ArgT>
1010 const int Arg = ArgT::value;
1011 if (this->x.template isActive<Arg>())
1012 this->t += this->partials[Arg] * this->x.template getTangent<Arg>(this->i);
1019 template <
typename T,
typename Storage>
1022 os << x.val() <<
" [";
1024 for (
int i=0; i< x.size(); i++) {
1025 os <<
" " << x.dx(i);
1036 #endif // SACADO_ELRFAD_GENERALFAD_HPP