Chris@16
|
1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor@gmail.com>.
|
Chris@16
|
2 // Copyright (C) 2004 The Trustees of Indiana University
|
Chris@16
|
3
|
Chris@16
|
4 // Use, modification and distribution is subject to the Boost Software
|
Chris@16
|
5 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
6 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
7
|
Chris@16
|
8 // Authors: Douglas Gregor
|
Chris@16
|
9 // Andrew Lumsdaine
|
Chris@16
|
10
|
Chris@16
|
11 // Message Passing Interface 1.1 -- Section 4.9.1. Reduce
|
Chris@16
|
12 #ifndef BOOST_MPI_REDUCE_HPP
|
Chris@16
|
13 #define BOOST_MPI_REDUCE_HPP
|
Chris@16
|
14
|
Chris@16
|
15 #include <boost/mpi/exception.hpp>
|
Chris@16
|
16 #include <boost/mpi/datatype.hpp>
|
Chris@16
|
17
|
Chris@16
|
18 // For (de-)serializing sends and receives
|
Chris@16
|
19 #include <boost/mpi/packed_oarchive.hpp>
|
Chris@16
|
20 #include <boost/mpi/packed_iarchive.hpp>
|
Chris@16
|
21
|
Chris@16
|
22 // For packed_[io]archive sends and receives
|
Chris@16
|
23 #include <boost/mpi/detail/point_to_point.hpp>
|
Chris@16
|
24
|
Chris@16
|
25 #include <boost/mpi/communicator.hpp>
|
Chris@16
|
26 #include <boost/mpi/environment.hpp>
|
Chris@16
|
27 #include <boost/mpi/detail/computation_tree.hpp>
|
Chris@16
|
28 #include <boost/mpi/operations.hpp>
|
Chris@16
|
29 #include <algorithm>
|
Chris@16
|
30 #include <exception>
|
Chris@16
|
31 #include <boost/assert.hpp>
|
Chris@16
|
32 #include <boost/scoped_array.hpp>
|
Chris@16
|
33
|
Chris@16
|
34 namespace boost { namespace mpi {
|
Chris@16
|
35
|
Chris@16
|
36
|
Chris@16
|
37 /************************************************************************
|
Chris@16
|
38 * Implementation details *
|
Chris@16
|
39 ************************************************************************/
|
Chris@16
|
40 namespace detail {
|
Chris@16
|
41 /**********************************************************************
|
Chris@16
|
42 * Simple reduction with MPI_Reduce *
|
Chris@16
|
43 **********************************************************************/
|
Chris@16
|
44 // We are reducing at the root for a type that has an associated MPI
|
Chris@16
|
45 // datatype and operation, so we'll use MPI_Reduce directly.
|
Chris@16
|
46 template<typename T, typename Op>
|
Chris@16
|
47 void
|
Chris@16
|
48 reduce_impl(const communicator& comm, const T* in_values, int n,
|
Chris@16
|
49 T* out_values, Op op, int root, mpl::true_ /*is_mpi_op*/,
|
Chris@16
|
50 mpl::true_/*is_mpi_datatype*/)
|
Chris@16
|
51 {
|
Chris@16
|
52 BOOST_MPI_CHECK_RESULT(MPI_Reduce,
|
Chris@16
|
53 (const_cast<T*>(in_values), out_values, n,
|
Chris@16
|
54 boost::mpi::get_mpi_datatype<T>(*in_values),
|
Chris@16
|
55 (is_mpi_op<Op, T>::op()), root, comm));
|
Chris@16
|
56 }
|
Chris@16
|
57
|
Chris@16
|
58 // We are reducing to the root for a type that has an associated MPI
|
Chris@16
|
59 // datatype and operation, so we'll use MPI_Reduce directly.
|
Chris@16
|
60 template<typename T, typename Op>
|
Chris@16
|
61 void
|
Chris@16
|
62 reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
|
Chris@16
|
63 int root, mpl::true_ /*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/)
|
Chris@16
|
64 {
|
Chris@16
|
65 BOOST_MPI_CHECK_RESULT(MPI_Reduce,
|
Chris@16
|
66 (const_cast<T*>(in_values), 0, n,
|
Chris@16
|
67 boost::mpi::get_mpi_datatype<T>(*in_values),
|
Chris@16
|
68 (is_mpi_op<Op, T>::op()), root, comm));
|
Chris@16
|
69 }
|
Chris@16
|
70
|
Chris@16
|
71 /**********************************************************************
|
Chris@16
|
72 * User-defined reduction with MPI_Reduce *
|
Chris@16
|
73 **********************************************************************/
|
Chris@16
|
74
|
Chris@16
|
75 // We are reducing at the root for a type that has an associated MPI
|
Chris@16
|
76 // datatype but with a custom operation. We'll use MPI_Reduce
|
Chris@16
|
77 // directly, but we'll need to create an MPI_Op manually.
|
Chris@16
|
78 template<typename T, typename Op>
|
Chris@16
|
79 void
|
Chris@16
|
80 reduce_impl(const communicator& comm, const T* in_values, int n,
|
Chris@16
|
81 T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/,
|
Chris@16
|
82 mpl::true_/*is_mpi_datatype*/)
|
Chris@16
|
83 {
|
Chris@16
|
84 user_op<Op, T> mpi_op(op);
|
Chris@16
|
85 BOOST_MPI_CHECK_RESULT(MPI_Reduce,
|
Chris@16
|
86 (const_cast<T*>(in_values), out_values, n,
|
Chris@16
|
87 boost::mpi::get_mpi_datatype<T>(*in_values),
|
Chris@16
|
88 mpi_op.get_mpi_op(), root, comm));
|
Chris@16
|
89 }
|
Chris@16
|
90
|
Chris@16
|
91 // We are reducing to the root for a type that has an associated MPI
|
Chris@16
|
92 // datatype but with a custom operation. We'll use MPI_Reduce
|
Chris@16
|
93 // directly, but we'll need to create an MPI_Op manually.
|
Chris@16
|
94 template<typename T, typename Op>
|
Chris@16
|
95 void
|
Chris@16
|
96 reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
|
Chris@16
|
97 int root, mpl::false_/*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/)
|
Chris@16
|
98 {
|
Chris@16
|
99 user_op<Op, T> mpi_op(op);
|
Chris@16
|
100 BOOST_MPI_CHECK_RESULT(MPI_Reduce,
|
Chris@16
|
101 (const_cast<T*>(in_values), 0, n,
|
Chris@16
|
102 boost::mpi::get_mpi_datatype<T>(*in_values),
|
Chris@16
|
103 mpi_op.get_mpi_op(), root, comm));
|
Chris@16
|
104 }
|
Chris@16
|
105
|
Chris@16
|
106 /**********************************************************************
|
Chris@16
|
107 * User-defined, tree-based reduction for non-MPI data types *
|
Chris@16
|
108 **********************************************************************/
|
Chris@16
|
109
|
Chris@16
|
110 // Commutative reduction
|
Chris@16
|
111 template<typename T, typename Op>
|
Chris@16
|
112 void
|
Chris@16
|
113 tree_reduce_impl(const communicator& comm, const T* in_values, int n,
|
Chris@16
|
114 T* out_values, Op op, int root,
|
Chris@16
|
115 mpl::true_ /*is_commutative*/)
|
Chris@16
|
116 {
|
Chris@16
|
117 std::copy(in_values, in_values + n, out_values);
|
Chris@16
|
118
|
Chris@16
|
119 int size = comm.size();
|
Chris@16
|
120 int rank = comm.rank();
|
Chris@16
|
121
|
Chris@16
|
122 // The computation tree we will use.
|
Chris@16
|
123 detail::computation_tree tree(rank, size, root);
|
Chris@16
|
124
|
Chris@16
|
125 int tag = environment::collectives_tag();
|
Chris@16
|
126
|
Chris@16
|
127 MPI_Status status;
|
Chris@16
|
128 int children = 0;
|
Chris@16
|
129 for (int child = tree.child_begin();
|
Chris@16
|
130 children < tree.branching_factor() && child != root;
|
Chris@16
|
131 ++children, child = (child + 1) % size) {
|
Chris@16
|
132 // Receive archive
|
Chris@16
|
133 packed_iarchive ia(comm);
|
Chris@16
|
134 detail::packed_archive_recv(comm, child, tag, ia, status);
|
Chris@16
|
135
|
Chris@16
|
136 T incoming;
|
Chris@16
|
137 for (int i = 0; i < n; ++i) {
|
Chris@16
|
138 ia >> incoming;
|
Chris@16
|
139 out_values[i] = op(out_values[i], incoming);
|
Chris@16
|
140 }
|
Chris@16
|
141 }
|
Chris@16
|
142
|
Chris@16
|
143 // For non-roots, send the result to the parent.
|
Chris@16
|
144 if (tree.parent() != rank) {
|
Chris@16
|
145 packed_oarchive oa(comm);
|
Chris@16
|
146 for (int i = 0; i < n; ++i)
|
Chris@16
|
147 oa << out_values[i];
|
Chris@16
|
148 detail::packed_archive_send(comm, tree.parent(), tag, oa);
|
Chris@16
|
149 }
|
Chris@16
|
150 }
|
Chris@16
|
151
|
Chris@16
|
152 // Commutative reduction from a non-root.
|
Chris@16
|
153 template<typename T, typename Op>
|
Chris@16
|
154 void
|
Chris@16
|
155 tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
|
Chris@16
|
156 int root, mpl::true_ /*is_commutative*/)
|
Chris@16
|
157 {
|
Chris@16
|
158 scoped_array<T> results(new T[n]);
|
Chris@16
|
159 detail::tree_reduce_impl(comm, in_values, n, results.get(), op, root,
|
Chris@16
|
160 mpl::true_());
|
Chris@16
|
161 }
|
Chris@16
|
162
|
Chris@16
|
163 // Non-commutative reduction
|
Chris@16
|
164 template<typename T, typename Op>
|
Chris@16
|
165 void
|
Chris@16
|
166 tree_reduce_impl(const communicator& comm, const T* in_values, int n,
|
Chris@16
|
167 T* out_values, Op op, int root,
|
Chris@16
|
168 mpl::false_ /*is_commutative*/)
|
Chris@16
|
169 {
|
Chris@16
|
170 int tag = environment::collectives_tag();
|
Chris@16
|
171
|
Chris@16
|
172 int left_child = root / 2;
|
Chris@16
|
173 int right_child = (root + comm.size()) / 2;
|
Chris@16
|
174
|
Chris@16
|
175 MPI_Status status;
|
Chris@16
|
176 if (left_child != root) {
|
Chris@16
|
177 // Receive value from the left child and merge it with the value
|
Chris@16
|
178 // we had incoming.
|
Chris@16
|
179 packed_iarchive ia(comm);
|
Chris@16
|
180 detail::packed_archive_recv(comm, left_child, tag, ia, status);
|
Chris@16
|
181 T incoming;
|
Chris@16
|
182 for (int i = 0; i < n; ++i) {
|
Chris@16
|
183 ia >> incoming;
|
Chris@16
|
184 out_values[i] = op(incoming, in_values[i]);
|
Chris@16
|
185 }
|
Chris@16
|
186 } else {
|
Chris@16
|
187 // There was no left value, so copy our incoming value.
|
Chris@16
|
188 std::copy(in_values, in_values + n, out_values);
|
Chris@16
|
189 }
|
Chris@16
|
190
|
Chris@16
|
191 if (right_child != root) {
|
Chris@16
|
192 // Receive value from the right child and merge it with the
|
Chris@16
|
193 // value we had incoming.
|
Chris@16
|
194 packed_iarchive ia(comm);
|
Chris@16
|
195 detail::packed_archive_recv(comm, right_child, tag, ia, status);
|
Chris@16
|
196 T incoming;
|
Chris@16
|
197 for (int i = 0; i < n; ++i) {
|
Chris@16
|
198 ia >> incoming;
|
Chris@16
|
199 out_values[i] = op(out_values[i], incoming);
|
Chris@16
|
200 }
|
Chris@16
|
201 }
|
Chris@16
|
202 }
|
Chris@16
|
203
|
Chris@16
|
204 // Non-commutative reduction from a non-root.
|
Chris@16
|
205 template<typename T, typename Op>
|
Chris@16
|
206 void
|
Chris@16
|
207 tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
|
Chris@16
|
208 int root, mpl::false_ /*is_commutative*/)
|
Chris@16
|
209 {
|
Chris@16
|
210 int size = comm.size();
|
Chris@16
|
211 int rank = comm.rank();
|
Chris@16
|
212
|
Chris@16
|
213 int tag = environment::collectives_tag();
|
Chris@16
|
214
|
Chris@16
|
215 // Determine our parents and children in the commutative binary
|
Chris@16
|
216 // computation tree.
|
Chris@16
|
217 int grandparent = root;
|
Chris@16
|
218 int parent = root;
|
Chris@16
|
219 int left_bound = 0;
|
Chris@16
|
220 int right_bound = size;
|
Chris@16
|
221 int left_child, right_child;
|
Chris@16
|
222 do {
|
Chris@16
|
223 left_child = (left_bound + parent) / 2;
|
Chris@16
|
224 right_child = (parent + right_bound) / 2;
|
Chris@16
|
225
|
Chris@16
|
226 if (rank < parent) {
|
Chris@16
|
227 // Go left.
|
Chris@16
|
228 grandparent = parent;
|
Chris@16
|
229 right_bound = parent;
|
Chris@16
|
230 parent = left_child;
|
Chris@16
|
231 } else if (rank > parent) {
|
Chris@16
|
232 // Go right.
|
Chris@16
|
233 grandparent = parent;
|
Chris@16
|
234 left_bound = parent + 1;
|
Chris@16
|
235 parent = right_child;
|
Chris@16
|
236 } else {
|
Chris@16
|
237 // We've found the parent
|
Chris@16
|
238 break;
|
Chris@16
|
239 }
|
Chris@16
|
240 } while (true);
|
Chris@16
|
241
|
Chris@16
|
242 // Our parent is the grandparent of our children. This is a slight
|
Chris@16
|
243 // abuse of notation, but it makes the send-to-parent below make
|
Chris@16
|
244 // more sense.
|
Chris@16
|
245 parent = grandparent;
|
Chris@16
|
246
|
Chris@16
|
247 MPI_Status status;
|
Chris@16
|
248 scoped_array<T> out_values(new T[n]);
|
Chris@16
|
249 if (left_child != rank) {
|
Chris@16
|
250 // Receive value from the left child and merge it with the value
|
Chris@16
|
251 // we had incoming.
|
Chris@16
|
252 packed_iarchive ia(comm);
|
Chris@16
|
253 detail::packed_archive_recv(comm, left_child, tag, ia, status);
|
Chris@16
|
254 T incoming;
|
Chris@16
|
255 for (int i = 0; i < n; ++i) {
|
Chris@16
|
256 ia >> incoming;
|
Chris@16
|
257 out_values[i] = op(incoming, in_values[i]);
|
Chris@16
|
258 }
|
Chris@16
|
259 } else {
|
Chris@16
|
260 // There was no left value, so copy our incoming value.
|
Chris@16
|
261 std::copy(in_values, in_values + n, out_values.get());
|
Chris@16
|
262 }
|
Chris@16
|
263
|
Chris@16
|
264 if (right_child != rank) {
|
Chris@16
|
265 // Receive value from the right child and merge it with the
|
Chris@16
|
266 // value we had incoming.
|
Chris@16
|
267 packed_iarchive ia(comm);
|
Chris@16
|
268 detail::packed_archive_recv(comm, right_child, tag, ia, status);
|
Chris@16
|
269 T incoming;
|
Chris@16
|
270 for (int i = 0; i < n; ++i) {
|
Chris@16
|
271 ia >> incoming;
|
Chris@16
|
272 out_values[i] = op(out_values[i], incoming);
|
Chris@16
|
273 }
|
Chris@16
|
274 }
|
Chris@16
|
275
|
Chris@16
|
276 // Send the combined value to our parent.
|
Chris@16
|
277 packed_oarchive oa(comm);
|
Chris@16
|
278 for (int i = 0; i < n; ++i)
|
Chris@16
|
279 oa << out_values[i];
|
Chris@16
|
280 detail::packed_archive_send(comm, parent, tag, oa);
|
Chris@16
|
281 }
|
Chris@16
|
282
|
Chris@16
|
283 // We are reducing at the root for a type that has no associated MPI
|
Chris@16
|
284 // datatype and operation, so we'll use a simple tree-based
|
Chris@16
|
285 // algorithm.
|
Chris@16
|
286 template<typename T, typename Op>
|
Chris@16
|
287 void
|
Chris@16
|
288 reduce_impl(const communicator& comm, const T* in_values, int n,
|
Chris@16
|
289 T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/,
|
Chris@16
|
290 mpl::false_ /*is_mpi_datatype*/)
|
Chris@16
|
291 {
|
Chris@16
|
292 detail::tree_reduce_impl(comm, in_values, n, out_values, op, root,
|
Chris@16
|
293 is_commutative<Op, T>());
|
Chris@16
|
294 }
|
Chris@16
|
295
|
Chris@16
|
296 // We are reducing to the root for a type that has no associated MPI
|
Chris@16
|
297 // datatype and operation, so we'll use a simple tree-based
|
Chris@16
|
298 // algorithm.
|
Chris@16
|
299 template<typename T, typename Op>
|
Chris@16
|
300 void
|
Chris@16
|
301 reduce_impl(const communicator& comm, const T* in_values, int n, Op op,
|
Chris@16
|
302 int root, mpl::false_ /*is_mpi_op*/,
|
Chris@16
|
303 mpl::false_ /*is_mpi_datatype*/)
|
Chris@16
|
304 {
|
Chris@16
|
305 detail::tree_reduce_impl(comm, in_values, n, op, root,
|
Chris@16
|
306 is_commutative<Op, T>());
|
Chris@16
|
307 }
|
Chris@16
|
308 } // end namespace detail
|
Chris@16
|
309
|
Chris@16
|
310 template<typename T, typename Op>
|
Chris@16
|
311 void
|
Chris@16
|
312 reduce(const communicator& comm, const T* in_values, int n, T* out_values,
|
Chris@16
|
313 Op op, int root)
|
Chris@16
|
314 {
|
Chris@16
|
315 if (comm.rank() == root)
|
Chris@16
|
316 detail::reduce_impl(comm, in_values, n, out_values, op, root,
|
Chris@16
|
317 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
318 else
|
Chris@16
|
319 detail::reduce_impl(comm, in_values, n, op, root,
|
Chris@16
|
320 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
321 }
|
Chris@16
|
322
|
Chris@16
|
323 template<typename T, typename Op>
|
Chris@16
|
324 void
|
Chris@16
|
325 reduce(const communicator& comm, const T* in_values, int n, Op op, int root)
|
Chris@16
|
326 {
|
Chris@16
|
327 BOOST_ASSERT(comm.rank() != root);
|
Chris@16
|
328
|
Chris@16
|
329 detail::reduce_impl(comm, in_values, n, op, root,
|
Chris@16
|
330 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
331 }
|
Chris@16
|
332
|
Chris@16
|
333 template<typename T, typename Op>
|
Chris@16
|
334 void
|
Chris@16
|
335 reduce(const communicator & comm, std::vector<T> const & in_values, Op op,
|
Chris@16
|
336 int root)
|
Chris@16
|
337 {
|
Chris@16
|
338 reduce(comm, &in_values.front(), in_values.size(), op, root);
|
Chris@16
|
339 }
|
Chris@16
|
340
|
Chris@16
|
341 template<typename T, typename Op>
|
Chris@16
|
342 void
|
Chris@16
|
343 reduce(const communicator & comm, std::vector<T> const & in_values,
|
Chris@16
|
344 std::vector<T> & out_values, Op op, int root)
|
Chris@16
|
345 {
|
Chris@16
|
346 out_values.resize(in_values.size());
|
Chris@16
|
347 reduce(comm, &in_values.front(), in_values.size(), &out_values.front(), op,
|
Chris@16
|
348 root);
|
Chris@16
|
349 }
|
Chris@16
|
350
|
Chris@16
|
351
|
Chris@16
|
352 template<typename T, typename Op>
|
Chris@16
|
353 void
|
Chris@16
|
354 reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
|
Chris@16
|
355 int root)
|
Chris@16
|
356 {
|
Chris@16
|
357 if (comm.rank() == root)
|
Chris@16
|
358 detail::reduce_impl(comm, &in_value, 1, &out_value, op, root,
|
Chris@16
|
359 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
360 else
|
Chris@16
|
361 detail::reduce_impl(comm, &in_value, 1, op, root,
|
Chris@16
|
362 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
363 }
|
Chris@16
|
364
|
Chris@16
|
365 template<typename T, typename Op>
|
Chris@16
|
366 void reduce(const communicator& comm, const T& in_value, Op op, int root)
|
Chris@16
|
367 {
|
Chris@16
|
368 BOOST_ASSERT(comm.rank() != root);
|
Chris@16
|
369
|
Chris@16
|
370 detail::reduce_impl(comm, &in_value, 1, op, root,
|
Chris@16
|
371 is_mpi_op<Op, T>(), is_mpi_datatype<T>());
|
Chris@16
|
372 }
|
Chris@16
|
373
|
Chris@16
|
374 } } // end namespace boost::mpi
|
Chris@16
|
375
|
Chris@16
|
376 #endif // BOOST_MPI_REDUCE_HPP
|