Chris@16
|
1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor@gmail.com>.
|
Chris@16
|
2 // Copyright (C) 2004 The Trustees of Indiana University
|
Chris@16
|
3
|
Chris@16
|
4 // Use, modification and distribution is subject to the Boost Software
|
Chris@16
|
5 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
6 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
7
|
Chris@16
|
8 // Authors: Douglas Gregor
|
Chris@16
|
9 // Andrew Lumsdaine
|
Chris@16
|
10
|
Chris@16
|
11 // Message Passing Interface 1.1 -- Section 4.9.1. Scan
|
Chris@16
|
12 #ifndef BOOST_MPI_SCAN_HPP
|
Chris@16
|
13 #define BOOST_MPI_SCAN_HPP
|
Chris@16
|
14
|
Chris@16
|
15 #include <boost/mpi/exception.hpp>
|
Chris@16
|
16 #include <boost/mpi/datatype.hpp>
|
Chris@16
|
17
|
Chris@16
|
18 // For (de-)serializing sends and receives
|
Chris@16
|
19 #include <boost/mpi/packed_oarchive.hpp>
|
Chris@16
|
20 #include <boost/mpi/packed_iarchive.hpp>
|
Chris@16
|
21
|
Chris@16
|
22 // For packed_[io]archive sends and receives
|
Chris@16
|
23 #include <boost/mpi/detail/point_to_point.hpp>
|
Chris@16
|
24
|
Chris@16
|
25 #include <boost/mpi/communicator.hpp>
|
Chris@16
|
26 #include <boost/mpi/environment.hpp>
|
Chris@16
|
27 #include <boost/mpi/detail/computation_tree.hpp>
|
Chris@16
|
28 #include <boost/mpi/operations.hpp>
|
Chris@16
|
29 #include <algorithm>
|
Chris@16
|
30 #include <exception>
|
Chris@16
|
31 #include <boost/assert.hpp>
|
Chris@16
|
32
|
Chris@16
|
33 namespace boost { namespace mpi {
|
Chris@16
|
34
|
Chris@16
|
35
|
Chris@16
|
36 /************************************************************************
|
Chris@16
|
37 * Implementation details *
|
Chris@16
|
38 ************************************************************************/
|
Chris@16
|
39 namespace detail {
|
Chris@16
|
40 /**********************************************************************
|
Chris@16
|
41 * Simple prefix reduction with MPI_Scan *
|
Chris@16
|
42 **********************************************************************/
|
Chris@16
|
43
|
Chris@16
|
44 // We are performing prefix reduction for a type that has an
|
Chris@16
|
45 // associated MPI datatype and operation, so we'll use MPI_Scan
|
Chris@16
|
46 // directly.
|
Chris@16
|
47 template<typename T, typename Op>
|
Chris@16
|
48 void
|
Chris@16
|
49 scan_impl(const communicator& comm, const T* in_values, int n, T* out_values,
|
Chris@16
|
50 Op op, mpl::true_ /*is_mpi_op*/, mpl::true_ /*is_mpi_datatype*/)
|
Chris@16
|
51 {
|
Chris@16
|
52 BOOST_MPI_CHECK_RESULT(MPI_Scan,
|
Chris@16
|
53 (const_cast<T*>(in_values), out_values, n,
|
Chris@16
|
54 boost::mpi::get_mpi_datatype<T>(*in_values),
|
Chris@16
|
55 (is_mpi_op<Op, T>::op()), comm));
|
Chris@16
|
56 }
|
Chris@16
|
57
|
Chris@16
|
58 /**********************************************************************
|
Chris@16
|
59 * User-defined prefix reduction with MPI_Scan *
|
Chris@16
|
60 **********************************************************************/
|
Chris@16
|
61
|
Chris@16
|
62 // We are performing prefix reduction for a type that has an
|
Chris@16
|
63 // associated MPI datatype but with a custom operation. We'll use
|
Chris@16
|
64 // MPI_Scan directly, but we'll need to create an MPI_Op manually.
|
Chris@16
|
65 template<typename T, typename Op>
|
Chris@16
|
66 void
|
Chris@16
|
67 scan_impl(const communicator& comm, const T* in_values, int n, T* out_values,
|
Chris@16
|
68 Op op, mpl::false_ /*is_mpi_op*/, mpl::true_ /*is_mpi_datatype*/)
|
Chris@16
|
69 {
|
Chris@16
|
70 user_op<Op, T> mpi_op(op);
|
Chris@16
|
71 BOOST_MPI_CHECK_RESULT(MPI_Scan,
|
Chris@16
|
72 (const_cast<T*>(in_values), out_values, n,
|
Chris@16
|
73 boost::mpi::get_mpi_datatype<T>(*in_values),
|
Chris@16
|
74 mpi_op.get_mpi_op(), comm));
|
Chris@16
|
75 }
|
Chris@16
|
76
|
Chris@16
|
77 /**********************************************************************
|
Chris@16
|
78 * User-defined, tree-based reduction for non-MPI data types *
|
Chris@16
|
79 **********************************************************************/
|
Chris@16
|
80
|
Chris@16
|
81 template<typename T, typename Op>
|
Chris@16
|
82 void
|
Chris@16
|
83 upper_lower_scan(const communicator& comm, const T* in_values, int n,
|
Chris@16
|
84 T* out_values, Op& op, int lower, int upper)
|
Chris@16
|
85 {
|
Chris@16
|
86 int tag = environment::collectives_tag();
|
Chris@16
|
87 int rank = comm.rank();
|
Chris@16
|
88
|
Chris@16
|
89 if (lower + 1 == upper) {
|
Chris@16
|
90 std::copy(in_values, in_values + n, out_values);
|
Chris@16
|
91 } else {
|
Chris@16
|
92 int middle = (lower + upper) / 2;
|
Chris@16
|
93
|
Chris@16
|
94 if (rank < middle) {
|
Chris@16
|
95 // Lower half
|
Chris@16
|
96 upper_lower_scan(comm, in_values, n, out_values, op, lower, middle);
|
Chris@16
|
97
|
Chris@16
|
98 // If we're the last process in the lower half, send our values
|
Chris@16
|
99 // to everyone in the upper half.
|
Chris@16
|
100 if (rank == middle - 1) {
|
Chris@16
|
101 packed_oarchive oa(comm);
|
Chris@16
|
102 for (int i = 0; i < n; ++i)
|
Chris@16
|
103 oa << out_values[i];
|
Chris@16
|
104
|
Chris@16
|
105 for (int p = middle; p < upper; ++p)
|
Chris@16
|
106 comm.send(p, tag, oa);
|
Chris@16
|
107 }
|
Chris@16
|
108 } else {
|
Chris@16
|
109 // Upper half
|
Chris@16
|
110 upper_lower_scan(comm, in_values, n, out_values, op, middle, upper);
|
Chris@16
|
111
|
Chris@16
|
112 // Receive value from the last process in the lower half.
|
Chris@16
|
113 packed_iarchive ia(comm);
|
Chris@16
|
114 comm.recv(middle - 1, tag, ia);
|
Chris@16
|
115
|
Chris@16
|
116 // Combine value that came from the left with our value
|
Chris@16
|
117 T left_value;
|
Chris@16
|
118 for (int i = 0; i < n; ++i)
|
Chris@16
|
119 {
|
Chris@16
|
120 ia >> left_value;
|
Chris@16
|
121 out_values[i] = op(left_value, out_values[i]);
|
Chris@16
|
122 }
|
Chris@16
|
123 }
|
Chris@16
|
124 }
|
Chris@16
|
125 }
|
Chris@16
|
126
|
Chris@16
|
127 // We are performing prefix reduction for a type that has no
|
Chris@16
|
128 // associated MPI datatype and operation, so we'll use a simple
|
Chris@16
|
129 // upper/lower algorithm.
|
Chris@16
|
130 template<typename T, typename Op>
|
Chris@16
|
131 inline void
|
Chris@16
|
132 scan_impl(const communicator& comm, const T* in_values, int n, T* out_values,
|
Chris@16
|
133 Op op, mpl::false_ /*is_mpi_op*/, mpl::false_/*is_mpi_datatype*/)
|
Chris@16
|
134 {
|
Chris@16
|
135 upper_lower_scan(comm, in_values, n, out_values, op, 0, comm.size());
|
Chris@16
|
136 }
|
Chris@16
|
137 } // end namespace detail
|
Chris@16
|
138
|
Chris@16
|
139
|
Chris@16
|
140 template<typename T, typename Op>
|
Chris@16
|
141 inline void
|
Chris@16
|
142 scan(const communicator& comm, const T& in_value, T& out_value, Op op)
|
Chris@16
|
143 {
|
Chris@16
|
144 detail::scan_impl(comm, &in_value, 1, &out_value, op,
|
Chris@16
|
145 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
146 }
|
Chris@16
|
147
|
Chris@16
|
148 template<typename T, typename Op>
|
Chris@16
|
149 inline void
|
Chris@16
|
150 scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op)
|
Chris@16
|
151 {
|
Chris@16
|
152 detail::scan_impl(comm, in_values, n, out_values, op,
|
Chris@16
|
153 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
154 }
|
Chris@16
|
155
|
Chris@16
|
156 template<typename T, typename Op>
|
Chris@16
|
157 inline T
|
Chris@16
|
158 scan(const communicator& comm, const T& in_value, Op op)
|
Chris@16
|
159 {
|
Chris@16
|
160 T out_value;
|
Chris@16
|
161 detail::scan_impl(comm, &in_value, 1, &out_value, op,
|
Chris@16
|
162 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
163 return out_value;
|
Chris@16
|
164 }
|
Chris@16
|
165
|
Chris@16
|
166 } } // end namespace boost::mpi
|
Chris@16
|
167
|
Chris@16
|
168 #endif // BOOST_MPI_SCAN_HPP
|