1 #ifndef STK_UTIL_PARALLEL_MPI_hpp
2 #define STK_UTIL_PARALLEL_MPI_hpp
4 #include <stk_util/stk_config.h>
5 #if defined( STK_HAS_MPI )
23 MPI_Datatype * datatype)
25 std::complex<T> *complex_in =
static_cast<std::complex<T> *
>(invec);
26 std::complex<T> *complex_inout =
static_cast<std::complex<T> *
>(inoutvec);
28 for (
int i = 0; i < *len; ++i)
29 complex_inout[i] += complex_in[i];
72 static MPI_Op s_mpi_real_complex_sum;
73 static bool initialized =
false;
78 MPI_Op_create(mpi_real_complex_sum<T>,
true, &s_mpi_real_complex_sum);
80 return s_mpi_real_complex_sum;
104 template <
typename T>
112 Loc(
const T &value,
int loc)
129 TempLoc(
double value,
double other,
int loc)
149 template <
typename T>
155 static MPI_Datatype type() {
161 struct Datatype<signed char>
163 static MPI_Datatype type() {
169 struct Datatype<unsigned char>
171 static MPI_Datatype type() {
179 static MPI_Datatype type() {
185 struct Datatype<unsigned int>
187 static MPI_Datatype type() {
193 struct Datatype<short>
195 static MPI_Datatype type() {
201 struct Datatype<unsigned short>
203 static MPI_Datatype type() {
204 return MPI_UNSIGNED_SHORT;
209 struct Datatype<long>
211 static MPI_Datatype type() {
217 struct Datatype<unsigned long>
219 static MPI_Datatype type() {
220 return MPI_UNSIGNED_LONG;
245 struct Datatype<float>
247 static MPI_Datatype type() {
253 struct Datatype<double>
255 static MPI_Datatype type() {
262 struct Datatype<std::complex<float> >
264 static MPI_Datatype type() {
270 struct Datatype<std::complex<double> >
272 static MPI_Datatype type() {
279 struct Datatype<Loc<int> >
281 static MPI_Datatype type() {
287 struct Datatype<Loc<short> >
289 static MPI_Datatype type() {
290 return MPI_SHORT_INT;
295 struct Datatype<Loc<long> >
297 static MPI_Datatype type() {
303 struct Datatype<Loc<unsigned long> >
305 static MPI_Datatype type() {
321 struct Datatype<Loc<float> >
323 static MPI_Datatype type() {
324 return MPI_FLOAT_INT;
329 struct Datatype<Loc<double> >
331 static MPI_Datatype type() {
332 return MPI_DOUBLE_INT;
337 struct Datatype<TempLoc>
339 static MPI_Datatype type() {
362 AllReduce(MPI_Comm mpi_comm, MPI_Op op, T *src_dest,
size_t size)
364 std::vector<T> source(src_dest, src_dest + size);
366 if (MPI_Allreduce(&source[0], &src_dest[0], (
int) size,
Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
367 throw std::runtime_error(
"MPI_Allreduce failed");
387 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &dest)
389 std::vector<T> source(dest);
391 if (MPI_Allreduce(&source[0], &dest[0], (
int) dest.size(),
Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
392 throw std::runtime_error(
"MPI_Allreduce failed");
415 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest)
417 if (source.size() != dest.size())
418 throw std::runtime_error(
"sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal");
420 if (MPI_Allreduce(&source[0], &dest[0], (
int) dest.size(),
Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
421 throw std::runtime_error(
"MPI_Allreduce failed");
427 AllGather(MPI_Comm mpi_comm, std::vector<T> &source, std::vector<T> &dest)
430 MPI_Comm_size(mpi_comm,&nproc);
431 if (source.size()*nproc != dest.size())
432 throw std::runtime_error(
"sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal");
434 if (MPI_Allgather(&source[0], (
int)source.size(), Datatype<T>::type(),
435 &dest[0], (int)source.size(), Datatype<T>::type(),
436 mpi_comm) != MPI_SUCCESS ){
437 throw std::runtime_error(
"MPI_Allreduce failed");
450 template <
typename T>
453 enum {alignment = (
sizeof(T) >
sizeof(
double) ?
sizeof(double) :
sizeof(T))};
454 enum {mask = alignment - 1};
456 char * c = reinterpret_cast<char *>(p);
457 size_t front_misalign = (c - (
char *)0) & mask;
458 if (front_misalign > 0) {
459 size_t correction = alignment - front_misalign;
460 T *q = reinterpret_cast<T *>((c - (
char *)0) + correction);
464 return reinterpret_cast<T *>(p);
503 virtual void size(
void *&inbuf)
const = 0;
516 virtual void copyin(
void *&inbuf)
const = 0;
529 virtual void copyout(
void *&outbuf)
const = 0;
547 virtual void op(
void *&inbuf,
void *&outbuf)
const = 0;
563 template<
class Op,
class LocalIt,
class GlobalIt = LocalIt>
566 typedef typename std::iterator_traits<LocalIt>::value_type value_type;
567 typedef typename std::iterator_traits<LocalIt>::difference_type difference_type;
569 Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
570 : m_localBegin(local_begin),
571 m_localEnd(local_end),
572 m_globalBegin(global_begin),
573 m_globalEnd(global_end),
574 m_length(local_end - local_begin)
576 if (global_end - global_begin != m_length)
577 throw std::runtime_error(
"sierra::MPI::Reduce::Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) local and global lengths not equal");
583 virtual void size(
void *&inbuf)
const {
584 value_type *t = align_cast<value_type>(inbuf);
589 virtual void copyin(
void *&inbuf)
const {
590 value_type *t = align_cast<value_type>(inbuf);
591 for (LocalIt it = m_localBegin; it != m_localEnd; ++it)
597 value_type *t = align_cast<value_type>(outbuf);
598 for (GlobalIt it = m_globalBegin; it != m_globalEnd; ++it)
603 virtual void op(
void *&inbuf,
void *&outbuf)
const {
604 value_type *tin = align_cast<value_type>(inbuf);
605 value_type *
tout = align_cast<value_type>(outbuf);
607 for (
size_t i = m_length; i; --i)
613 LocalIt m_localBegin;
615 GlobalIt m_globalBegin;
616 GlobalIt m_globalEnd;
617 difference_type m_length;
628 typedef std::vector<ReduceInterface *> ReduceVector;
638 void copyin(
void *
const buffer_in)
const;
640 void copyout(
void *
const buffer_out)
const;
642 void op(
void *
const buffer_in,
void *
const buffer_out)
const;
644 static void void_op(
void * inv,
void * outv,
int *n, MPI_Datatype *datatype);
647 ReduceVector m_reduceVector;
666 template <
typename T>
667 inline Sum(T * dest,
const T *source) {
678 template <
typename T>
679 inline Prod(T * dest,
const T *source) {
690 template <
typename T>
691 inline Min(T * dest,
const T *source) {
692 *dest = std::min(*dest, *source);
702 template <
typename T>
703 inline Max(T * dest,
const T *source) {
704 *dest = std::max(*dest, *source);
714 template <
typename T>
716 if (source->m_value < dest->m_value) {
717 dest->m_value = source->m_value;
718 dest->m_loc = source->m_loc;
720 else if (source->m_value == dest->m_value)
721 dest->m_loc = std::min(dest->m_loc, source->m_loc);
731 template <
typename T>
733 if (source->m_value > dest->m_value) {
734 dest->m_value = source->m_value;
735 dest->m_loc = source->m_loc;
737 else if (source->m_value == dest->m_value)
738 dest->m_loc = std::min(dest->m_loc, source->m_loc);
745 inline MaxTempLoc(TempLoc * dest,
const TempLoc *source) {
746 if (source->m_value > dest->m_value) {
747 dest->m_value = source->m_value;
748 dest->m_other = source->m_other;
749 dest->m_loc = source->m_loc;
751 else if (source->m_value == dest->m_value) {
752 if (dest->m_loc > source->m_loc) {
753 dest->m_other = source->m_other;
754 dest->m_loc = source->m_loc;
762 inline MinTempLoc(TempLoc * dest,
const TempLoc *source) {
763 if (source->m_value < dest->m_value) {
764 dest->m_value = source->m_value;
765 dest->m_other = source->m_other;
766 dest->m_loc = source->m_loc;
768 else if (source->m_value == dest->m_value) {
769 if (dest->m_loc > source->m_loc) {
770 dest->m_other = source->m_other;
771 dest->m_loc = source->m_loc;
912 template<
class LocalIt,
class GlobalIt>
930 template<
class LocalIt,
class GlobalIt>
948 template<
typename T,
class LocalIt,
class GlobalIt>
966 template<
typename T,
class LocalIt,
class GlobalIt>
981 template<
class T,
class U>
985 std::vector<T> source;
987 std::back_insert_iterator<std::vector<T> > source_inserter(source);
988 collector.gather(op, source_inserter);
990 int size = source.size();
998 MPI_Comm_size(mpi_comm, &num_proc);
999 MPI_Comm_rank(mpi_comm, &my_proc);
1002 std::vector<int> local_array_len(num_proc, 0);
1003 local_array_len[my_proc] == size;
1004 std::vector<int> global_array_len(num_proc, 0);
1006 MPI_Allreduce(&local_array_len[0], &global_array_len[0], num_proc, MPI_INT, MPI_SUM, mpi_comm);
1008 for(
unsigned i = 0; i < num_proc; ++i) {
1009 if(global_array_len[i] != size) {
1010 throw std::runtime_error(
"Slib_MPI.h::AllReduceCollected, not all processors have the same length array");
1015 if (source.empty())
return;
1016 std::vector<T> dest(size);
1018 if (MPI_Allreduce(&source[0], &dest[0], size,
Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
1019 throw std::runtime_error(
"MPI_Allreduce failed");
1021 typename std::vector<T>::iterator dest_getter = dest.begin();
1022 collector.scatter(op, dest_getter);
1032 #endif // if defined( STK_HAS_MPI )
1033 #endif // STK_UTIL_PARALLEL_MPI_hpp