Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/generic/include/boost/mpi/collectives/reduce.hpp @ 16:2665513ce2d3
Add boost headers
author | Chris Cannam |
---|---|
date | Tue, 05 Aug 2014 11:11:38 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
15:663ca0da4350 | 16:2665513ce2d3 |
---|---|
1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor@gmail.com>. | |
2 // Copyright (C) 2004 The Trustees of Indiana University | |
3 | |
4 // Use, modification and distribution is subject to the Boost Software | |
5 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
6 // http://www.boost.org/LICENSE_1_0.txt) | |
7 | |
8 // Authors: Douglas Gregor | |
9 // Andrew Lumsdaine | |
10 | |
11 // Message Passing Interface 1.1 -- Section 4.9.1. Reduce | |
12 #ifndef BOOST_MPI_REDUCE_HPP | |
13 #define BOOST_MPI_REDUCE_HPP | |
14 | |
15 #include <boost/mpi/exception.hpp> | |
16 #include <boost/mpi/datatype.hpp> | |
17 | |
18 // For (de-)serializing sends and receives | |
19 #include <boost/mpi/packed_oarchive.hpp> | |
20 #include <boost/mpi/packed_iarchive.hpp> | |
21 | |
22 // For packed_[io]archive sends and receives | |
23 #include <boost/mpi/detail/point_to_point.hpp> | |
24 | |
25 #include <boost/mpi/communicator.hpp> | |
26 #include <boost/mpi/environment.hpp> | |
27 #include <boost/mpi/detail/computation_tree.hpp> | |
28 #include <boost/mpi/operations.hpp> | |
29 #include <algorithm> | |
30 #include <exception> | |
31 #include <boost/assert.hpp> | |
32 #include <boost/scoped_array.hpp> | |
33 | |
34 namespace boost { namespace mpi { | |
35 | |
36 | |
37 /************************************************************************ | |
38 * Implementation details * | |
39 ************************************************************************/ | |
40 namespace detail { | |
41 /********************************************************************** | |
42 * Simple reduction with MPI_Reduce * | |
43 **********************************************************************/ | |
44 // We are reducing at the root for a type that has an associated MPI | |
45 // datatype and operation, so we'll use MPI_Reduce directly. | |
46 template<typename T, typename Op> | |
47 void | |
48 reduce_impl(const communicator& comm, const T* in_values, int n, | |
49 T* out_values, Op op, int root, mpl::true_ /*is_mpi_op*/, | |
50 mpl::true_/*is_mpi_datatype*/) | |
51 { | |
52 BOOST_MPI_CHECK_RESULT(MPI_Reduce, | |
53 (const_cast<T*>(in_values), out_values, n, | |
54 boost::mpi::get_mpi_datatype<T>(*in_values), | |
55 (is_mpi_op<Op, T>::op()), root, comm)); | |
56 } | |
57 | |
58 // We are reducing to the root for a type that has an associated MPI | |
59 // datatype and operation, so we'll use MPI_Reduce directly. | |
60 template<typename T, typename Op> | |
61 void | |
62 reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
63 int root, mpl::true_ /*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/) | |
64 { | |
65 BOOST_MPI_CHECK_RESULT(MPI_Reduce, | |
66 (const_cast<T*>(in_values), 0, n, | |
67 boost::mpi::get_mpi_datatype<T>(*in_values), | |
68 (is_mpi_op<Op, T>::op()), root, comm)); | |
69 } | |
70 | |
71 /********************************************************************** | |
72 * User-defined reduction with MPI_Reduce * | |
73 **********************************************************************/ | |
74 | |
75 // We are reducing at the root for a type that has an associated MPI | |
76 // datatype but with a custom operation. We'll use MPI_Reduce | |
77 // directly, but we'll need to create an MPI_Op manually. | |
78 template<typename T, typename Op> | |
79 void | |
80 reduce_impl(const communicator& comm, const T* in_values, int n, | |
81 T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/, | |
82 mpl::true_/*is_mpi_datatype*/) | |
83 { | |
84 user_op<Op, T> mpi_op(op); | |
85 BOOST_MPI_CHECK_RESULT(MPI_Reduce, | |
86 (const_cast<T*>(in_values), out_values, n, | |
87 boost::mpi::get_mpi_datatype<T>(*in_values), | |
88 mpi_op.get_mpi_op(), root, comm)); | |
89 } | |
90 | |
91 // We are reducing to the root for a type that has an associated MPI | |
92 // datatype but with a custom operation. We'll use MPI_Reduce | |
93 // directly, but we'll need to create an MPI_Op manually. | |
94 template<typename T, typename Op> | |
95 void | |
96 reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
97 int root, mpl::false_/*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/) | |
98 { | |
99 user_op<Op, T> mpi_op(op); | |
100 BOOST_MPI_CHECK_RESULT(MPI_Reduce, | |
101 (const_cast<T*>(in_values), 0, n, | |
102 boost::mpi::get_mpi_datatype<T>(*in_values), | |
103 mpi_op.get_mpi_op(), root, comm)); | |
104 } | |
105 | |
106 /********************************************************************** | |
107 * User-defined, tree-based reduction for non-MPI data types * | |
108 **********************************************************************/ | |
109 | |
110 // Commutative reduction | |
111 template<typename T, typename Op> | |
112 void | |
113 tree_reduce_impl(const communicator& comm, const T* in_values, int n, | |
114 T* out_values, Op op, int root, | |
115 mpl::true_ /*is_commutative*/) | |
116 { | |
117 std::copy(in_values, in_values + n, out_values); | |
118 | |
119 int size = comm.size(); | |
120 int rank = comm.rank(); | |
121 | |
122 // The computation tree we will use. | |
123 detail::computation_tree tree(rank, size, root); | |
124 | |
125 int tag = environment::collectives_tag(); | |
126 | |
127 MPI_Status status; | |
128 int children = 0; | |
129 for (int child = tree.child_begin(); | |
130 children < tree.branching_factor() && child != root; | |
131 ++children, child = (child + 1) % size) { | |
132 // Receive archive | |
133 packed_iarchive ia(comm); | |
134 detail::packed_archive_recv(comm, child, tag, ia, status); | |
135 | |
136 T incoming; | |
137 for (int i = 0; i < n; ++i) { | |
138 ia >> incoming; | |
139 out_values[i] = op(out_values[i], incoming); | |
140 } | |
141 } | |
142 | |
143 // For non-roots, send the result to the parent. | |
144 if (tree.parent() != rank) { | |
145 packed_oarchive oa(comm); | |
146 for (int i = 0; i < n; ++i) | |
147 oa << out_values[i]; | |
148 detail::packed_archive_send(comm, tree.parent(), tag, oa); | |
149 } | |
150 } | |
151 | |
152 // Commutative reduction from a non-root. | |
153 template<typename T, typename Op> | |
154 void | |
155 tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
156 int root, mpl::true_ /*is_commutative*/) | |
157 { | |
158 scoped_array<T> results(new T[n]); | |
159 detail::tree_reduce_impl(comm, in_values, n, results.get(), op, root, | |
160 mpl::true_()); | |
161 } | |
162 | |
163 // Non-commutative reduction | |
164 template<typename T, typename Op> | |
165 void | |
166 tree_reduce_impl(const communicator& comm, const T* in_values, int n, | |
167 T* out_values, Op op, int root, | |
168 mpl::false_ /*is_commutative*/) | |
169 { | |
170 int tag = environment::collectives_tag(); | |
171 | |
172 int left_child = root / 2; | |
173 int right_child = (root + comm.size()) / 2; | |
174 | |
175 MPI_Status status; | |
176 if (left_child != root) { | |
177 // Receive value from the left child and merge it with the value | |
178 // we had incoming. | |
179 packed_iarchive ia(comm); | |
180 detail::packed_archive_recv(comm, left_child, tag, ia, status); | |
181 T incoming; | |
182 for (int i = 0; i < n; ++i) { | |
183 ia >> incoming; | |
184 out_values[i] = op(incoming, in_values[i]); | |
185 } | |
186 } else { | |
187 // There was no left value, so copy our incoming value. | |
188 std::copy(in_values, in_values + n, out_values); | |
189 } | |
190 | |
191 if (right_child != root) { | |
192 // Receive value from the right child and merge it with the | |
193 // value we had incoming. | |
194 packed_iarchive ia(comm); | |
195 detail::packed_archive_recv(comm, right_child, tag, ia, status); | |
196 T incoming; | |
197 for (int i = 0; i < n; ++i) { | |
198 ia >> incoming; | |
199 out_values[i] = op(out_values[i], incoming); | |
200 } | |
201 } | |
202 } | |
203 | |
204 // Non-commutative reduction from a non-root. | |
205 template<typename T, typename Op> | |
206 void | |
207 tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
208 int root, mpl::false_ /*is_commutative*/) | |
209 { | |
210 int size = comm.size(); | |
211 int rank = comm.rank(); | |
212 | |
213 int tag = environment::collectives_tag(); | |
214 | |
215 // Determine our parents and children in the commutative binary | |
216 // computation tree. | |
217 int grandparent = root; | |
218 int parent = root; | |
219 int left_bound = 0; | |
220 int right_bound = size; | |
221 int left_child, right_child; | |
222 do { | |
223 left_child = (left_bound + parent) / 2; | |
224 right_child = (parent + right_bound) / 2; | |
225 | |
226 if (rank < parent) { | |
227 // Go left. | |
228 grandparent = parent; | |
229 right_bound = parent; | |
230 parent = left_child; | |
231 } else if (rank > parent) { | |
232 // Go right. | |
233 grandparent = parent; | |
234 left_bound = parent + 1; | |
235 parent = right_child; | |
236 } else { | |
237 // We've found the parent | |
238 break; | |
239 } | |
240 } while (true); | |
241 | |
242 // Our parent is the grandparent of our children. This is a slight | |
243 // abuse of notation, but it makes the send-to-parent below make | |
244 // more sense. | |
245 parent = grandparent; | |
246 | |
247 MPI_Status status; | |
248 scoped_array<T> out_values(new T[n]); | |
249 if (left_child != rank) { | |
250 // Receive value from the left child and merge it with the value | |
251 // we had incoming. | |
252 packed_iarchive ia(comm); | |
253 detail::packed_archive_recv(comm, left_child, tag, ia, status); | |
254 T incoming; | |
255 for (int i = 0; i < n; ++i) { | |
256 ia >> incoming; | |
257 out_values[i] = op(incoming, in_values[i]); | |
258 } | |
259 } else { | |
260 // There was no left value, so copy our incoming value. | |
261 std::copy(in_values, in_values + n, out_values.get()); | |
262 } | |
263 | |
264 if (right_child != rank) { | |
265 // Receive value from the right child and merge it with the | |
266 // value we had incoming. | |
267 packed_iarchive ia(comm); | |
268 detail::packed_archive_recv(comm, right_child, tag, ia, status); | |
269 T incoming; | |
270 for (int i = 0; i < n; ++i) { | |
271 ia >> incoming; | |
272 out_values[i] = op(out_values[i], incoming); | |
273 } | |
274 } | |
275 | |
276 // Send the combined value to our parent. | |
277 packed_oarchive oa(comm); | |
278 for (int i = 0; i < n; ++i) | |
279 oa << out_values[i]; | |
280 detail::packed_archive_send(comm, parent, tag, oa); | |
281 } | |
282 | |
283 // We are reducing at the root for a type that has no associated MPI | |
284 // datatype and operation, so we'll use a simple tree-based | |
285 // algorithm. | |
286 template<typename T, typename Op> | |
287 void | |
288 reduce_impl(const communicator& comm, const T* in_values, int n, | |
289 T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/, | |
290 mpl::false_ /*is_mpi_datatype*/) | |
291 { | |
292 detail::tree_reduce_impl(comm, in_values, n, out_values, op, root, | |
293 is_commutative<Op, T>()); | |
294 } | |
295 | |
296 // We are reducing to the root for a type that has no associated MPI | |
297 // datatype and operation, so we'll use a simple tree-based | |
298 // algorithm. | |
299 template<typename T, typename Op> | |
300 void | |
301 reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | |
302 int root, mpl::false_ /*is_mpi_op*/, | |
303 mpl::false_ /*is_mpi_datatype*/) | |
304 { | |
305 detail::tree_reduce_impl(comm, in_values, n, op, root, | |
306 is_commutative<Op, T>()); | |
307 } | |
308 } // end namespace detail | |
309 | |
310 template<typename T, typename Op> | |
311 void | |
312 reduce(const communicator& comm, const T* in_values, int n, T* out_values, | |
313 Op op, int root) | |
314 { | |
315 if (comm.rank() == root) | |
316 detail::reduce_impl(comm, in_values, n, out_values, op, root, | |
317 is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
318 else | |
319 detail::reduce_impl(comm, in_values, n, op, root, | |
320 is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
321 } | |
322 | |
323 template<typename T, typename Op> | |
324 void | |
325 reduce(const communicator& comm, const T* in_values, int n, Op op, int root) | |
326 { | |
327 BOOST_ASSERT(comm.rank() != root); | |
328 | |
329 detail::reduce_impl(comm, in_values, n, op, root, | |
330 is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
331 } | |
332 | |
333 template<typename T, typename Op> | |
334 void | |
335 reduce(const communicator & comm, std::vector<T> const & in_values, Op op, | |
336 int root) | |
337 { | |
338 reduce(comm, &in_values.front(), in_values.size(), op, root); | |
339 } | |
340 | |
341 template<typename T, typename Op> | |
342 void | |
343 reduce(const communicator & comm, std::vector<T> const & in_values, | |
344 std::vector<T> & out_values, Op op, int root) | |
345 { | |
346 out_values.resize(in_values.size()); | |
347 reduce(comm, &in_values.front(), in_values.size(), &out_values.front(), op, | |
348 root); | |
349 } | |
350 | |
351 | |
352 template<typename T, typename Op> | |
353 void | |
354 reduce(const communicator& comm, const T& in_value, T& out_value, Op op, | |
355 int root) | |
356 { | |
357 if (comm.rank() == root) | |
358 detail::reduce_impl(comm, &in_value, 1, &out_value, op, root, | |
359 is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
360 else | |
361 detail::reduce_impl(comm, &in_value, 1, op, root, | |
362 is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
363 } | |
364 | |
365 template<typename T, typename Op> | |
366 void reduce(const communicator& comm, const T& in_value, Op op, int root) | |
367 { | |
368 BOOST_ASSERT(comm.rank() != root); | |
369 | |
370 detail::reduce_impl(comm, &in_value, 1, op, root, | |
371 is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | |
372 } | |
373 | |
374 } } // end namespace boost::mpi | |
375 | |
376 #endif // BOOST_MPI_REDUCE_HPP |