Chris@16
|
1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>.
|
Chris@16
|
2
|
Chris@16
|
3 // Use, modification and distribution is subject to the Boost Software
|
Chris@16
|
4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
5 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
6
|
Chris@16
|
7 // Message Passing Interface 1.1 -- Section 4. MPI Collectives
|
Chris@16
|
8
|
Chris@16
|
9 /** @file collectives.hpp
|
Chris@16
|
10 *
|
Chris@16
|
11 * This header contains MPI collective operations, which implement
|
Chris@16
|
12 * various parallel algorithms that require the coordination of all
|
Chris@16
|
13 * processes within a communicator. The header @c collectives_fwd.hpp
|
Chris@16
|
14 * provides forward declarations for each of these operations. To
|
Chris@16
|
15 * include only specific collective algorithms, use the headers @c
|
Chris@16
|
16 * boost/mpi/collectives/algorithm_name.hpp.
|
Chris@16
|
17 */
|
Chris@16
|
18 #ifndef BOOST_MPI_COLLECTIVES_HPP
|
Chris@16
|
19 #define BOOST_MPI_COLLECTIVES_HPP
|
Chris@16
|
20
|
Chris@16
|
21 #include <boost/mpi/communicator.hpp>
|
Chris@16
|
22 #include <boost/mpi/inplace.hpp>
|
Chris@16
|
23 #include <vector>
|
Chris@16
|
24
|
Chris@16
|
25 namespace boost { namespace mpi {
|
Chris@16
|
26 /**
|
Chris@16
|
27 * @brief Gather the values stored at every process into vectors of
|
Chris@16
|
28 * values from each process.
|
Chris@16
|
29 *
|
Chris@16
|
30 * @c all_gather is a collective algorithm that collects the values
|
Chris@16
|
31 * stored at each process into a vector of values indexed by the
|
Chris@16
|
32 * process number they came from. The type @c T of the values may be
|
Chris@16
|
33 * any type that is serializable or has an associated MPI data type.
|
Chris@16
|
34 *
|
Chris@16
|
35 * When the type @c T has an associated MPI data type, this routine
|
Chris@16
|
36 * invokes @c MPI_Allgather to gather the values.
|
Chris@16
|
37 *
|
Chris@16
|
38 * @param comm The communicator over which the all-gather will
|
Chris@16
|
39 * occur.
|
Chris@16
|
40 *
|
Chris@16
|
41 * @param in_value The value to be transmitted by each process. To
|
Chris@16
|
42 * gather an array of values, @c in_values points to the @c n local
|
Chris@16
|
43 * values to be transmitted.
|
Chris@16
|
44 *
|
Chris@16
|
45 * @param out_values A vector or pointer to storage that will be
|
Chris@16
|
46 * populated with the values from each process, indexed by the
|
Chris@16
|
47 * process ID number. If it is a vector, the vector will be resized
|
Chris@16
|
48 * accordingly.
|
Chris@16
|
49 */
|
Chris@16
|
50 template<typename T>
|
Chris@16
|
51 void
|
Chris@16
|
52 all_gather(const communicator& comm, const T& in_value,
|
Chris@16
|
53 std::vector<T>& out_values);
|
Chris@16
|
54
|
Chris@16
|
55 /**
|
Chris@16
|
56 * \overload
|
Chris@16
|
57 */
|
Chris@16
|
58 template<typename T>
|
Chris@16
|
59 void
|
Chris@16
|
60 all_gather(const communicator& comm, const T& in_value, T* out_values);
|
Chris@16
|
61
|
Chris@16
|
62 /**
|
Chris@16
|
63 * \overload
|
Chris@16
|
64 */
|
Chris@16
|
65 template<typename T>
|
Chris@16
|
66 void
|
Chris@16
|
67 all_gather(const communicator& comm, const T* in_values, int n,
|
Chris@16
|
68 std::vector<T>& out_values);
|
Chris@16
|
69
|
Chris@16
|
70 /**
|
Chris@16
|
71 * \overload
|
Chris@16
|
72 */
|
Chris@16
|
73 template<typename T>
|
Chris@16
|
74 void
|
Chris@16
|
75 all_gather(const communicator& comm, const T* in_values, int n, T* out_values);
|
Chris@16
|
76
|
Chris@16
|
77 /**
|
Chris@16
|
78 * @brief Combine the values stored by each process into a single
|
Chris@16
|
79 * value available to all processes.
|
Chris@16
|
80 *
|
Chris@16
|
81 * @c all_reduce is a collective algorithm that combines the values
|
Chris@16
|
82 * stored by each process into a single value available to all
|
Chris@16
|
83 * processes. The values are combined in a user-defined way,
|
Chris@16
|
84 * specified via a function object. The type @c T of the values may
|
Chris@16
|
85 * be any type that is serializable or has an associated MPI data
|
Chris@16
|
86 * type. One can think of this operation as a @c all_gather, followed
|
Chris@16
|
87 * by an @c std::accumulate() over the gather values and using the
|
Chris@16
|
88 * operation @c op.
|
Chris@16
|
89 *
|
Chris@16
|
90 * When the type @c T has an associated MPI data type, this routine
|
Chris@16
|
91 * invokes @c MPI_Allreduce to perform the reduction. If possible,
|
Chris@16
|
92 * built-in MPI operations will be used; otherwise, @c all_reduce()
|
Chris@16
|
93 * will create a custom MPI_Op for the call to MPI_Allreduce.
|
Chris@16
|
94 *
|
Chris@16
|
95 * @param comm The communicator over which the reduction will
|
Chris@16
|
96 * occur.
|
Chris@16
|
97 * @param value The local value to be combined with the local
|
Chris@16
|
98 * values of every other process. For reducing arrays, @c in_values
|
Chris@16
|
99 * is a pointer to the local values to be reduced and @c n is the
|
Chris@16
|
100 * number of values to reduce. See @c reduce for more information.
|
Chris@16
|
101 *
|
Chris@16
|
102 * If wrapped in a @c inplace_t object, combine the usage of both
|
Chris@16
|
103 * input and $c out_value and the local value will be overwritten
|
Chris@16
|
104 * (a convenience function @c inplace is provided for the wrapping).
|
Chris@16
|
105 *
|
Chris@16
|
106 * @param out_value Will receive the result of the reduction
|
Chris@16
|
107 * operation. If this parameter is omitted, the outgoing value will
|
Chris@16
|
108 * instead be returned.
|
Chris@16
|
109 *
|
Chris@16
|
110 * @param op The binary operation that combines two values of type
|
Chris@16
|
111 * @c T and returns a third value of type @c T. For types @c T that has
|
Chris@16
|
112 * ssociated MPI data types, @c op will either be translated into
|
Chris@16
|
113 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
|
Chris@16
|
114 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
|
Chris@16
|
115 * operations.hpp header for more details on this mapping. For any
|
Chris@16
|
116 * non-built-in operation, commutativity will be determined by the
|
Chris@16
|
117 * @c is_commmutative trait (also in @c operations.hpp): users are
|
Chris@16
|
118 * encouraged to mark commutative operations as such, because it
|
Chris@16
|
119 * gives the implementation additional lattitude to optimize the
|
Chris@16
|
120 * reduction operation.
|
Chris@16
|
121 *
|
Chris@16
|
122 * @param n Indicated the size of the buffers of array type.
|
Chris@16
|
123 * @returns If no @p out_value parameter is supplied, returns the
|
Chris@16
|
124 * result of the reduction operation.
|
Chris@16
|
125 */
|
Chris@16
|
126 template<typename T, typename Op>
|
Chris@16
|
127 void
|
Chris@16
|
128 all_reduce(const communicator& comm, const T* value, int n, T* out_value,
|
Chris@16
|
129 Op op);
|
Chris@16
|
130 /**
|
Chris@16
|
131 * \overload
|
Chris@16
|
132 */
|
Chris@16
|
133 template<typename T, typename Op>
|
Chris@16
|
134 void
|
Chris@16
|
135 all_reduce(const communicator& comm, const T& value, T& out_value, Op op);
|
Chris@16
|
136 /**
|
Chris@16
|
137 * \overload
|
Chris@16
|
138 */
|
Chris@16
|
139 template<typename T, typename Op>
|
Chris@16
|
140 T all_reduce(const communicator& comm, const T& value, Op op);
|
Chris@16
|
141
|
Chris@16
|
142 /**
|
Chris@16
|
143 * \overload
|
Chris@16
|
144 */
|
Chris@16
|
145 template<typename T, typename Op>
|
Chris@16
|
146 void
|
Chris@16
|
147 all_reduce(const communicator& comm, inplace_t<T*> value, int n,
|
Chris@16
|
148 Op op);
|
Chris@16
|
149 /**
|
Chris@16
|
150 * \overload
|
Chris@16
|
151 */
|
Chris@16
|
152 template<typename T, typename Op>
|
Chris@16
|
153 void
|
Chris@16
|
154 all_reduce(const communicator& comm, inplace_t<T> value, Op op);
|
Chris@16
|
155
|
Chris@16
|
156 /**
|
Chris@16
|
157 * @brief Send data from every process to every other process.
|
Chris@16
|
158 *
|
Chris@16
|
159 * @c all_to_all is a collective algorithm that transmits @c p values
|
Chris@16
|
160 * from every process to every other process. On process i, jth value
|
Chris@16
|
161 * of the @p in_values vector is sent to process j and placed in the
|
Chris@16
|
162 * ith position of the @p out_values vector in process @p j. The type
|
Chris@16
|
163 * @c T of the values may be any type that is serializable or has an
|
Chris@16
|
164 * associated MPI data type. If @c n is provided, then arrays of @p n
|
Chris@16
|
165 * values will be transferred from one process to another.
|
Chris@16
|
166 *
|
Chris@16
|
167 * When the type @c T has an associated MPI data type, this routine
|
Chris@16
|
168 * invokes @c MPI_Alltoall to scatter the values.
|
Chris@16
|
169 *
|
Chris@16
|
170 * @param comm The communicator over which the all-to-all
|
Chris@16
|
171 * communication will occur.
|
Chris@16
|
172 *
|
Chris@16
|
173 * @param in_values A vector or pointer to storage that contains
|
Chris@16
|
174 * the values to send to each process, indexed by the process ID
|
Chris@16
|
175 * number.
|
Chris@16
|
176 *
|
Chris@16
|
177 * @param out_values A vector or pointer to storage that will be
|
Chris@16
|
178 * updated to contain the values received from other processes. The
|
Chris@16
|
179 * jth value in @p out_values will come from the procss with rank j.
|
Chris@16
|
180 */
|
Chris@16
|
181 template<typename T>
|
Chris@16
|
182 void
|
Chris@16
|
183 all_to_all(const communicator& comm, const std::vector<T>& in_values,
|
Chris@16
|
184 std::vector<T>& out_values);
|
Chris@16
|
185
|
Chris@16
|
186 /**
|
Chris@16
|
187 * \overload
|
Chris@16
|
188 */
|
Chris@16
|
189 template<typename T>
|
Chris@16
|
190 void all_to_all(const communicator& comm, const T* in_values, T* out_values);
|
Chris@16
|
191
|
Chris@16
|
192 /**
|
Chris@16
|
193 * \overload
|
Chris@16
|
194 */
|
Chris@16
|
195 template<typename T>
|
Chris@16
|
196 void
|
Chris@16
|
197 all_to_all(const communicator& comm, const std::vector<T>& in_values, int n,
|
Chris@16
|
198 std::vector<T>& out_values);
|
Chris@16
|
199
|
Chris@16
|
200 /**
|
Chris@16
|
201 * \overload
|
Chris@16
|
202 */
|
Chris@16
|
203 template<typename T>
|
Chris@16
|
204 void
|
Chris@16
|
205 all_to_all(const communicator& comm, const T* in_values, int n, T* out_values);
|
Chris@16
|
206
|
Chris@16
|
207 /**
|
Chris@16
|
208 * @brief Broadcast a value from a root process to all other
|
Chris@16
|
209 * processes.
|
Chris@16
|
210 *
|
Chris@16
|
211 * @c broadcast is a collective algorithm that transfers a value from
|
Chris@16
|
212 * an arbitrary @p root process to every other process that is part of
|
Chris@16
|
213 * the given communicator. The @c broadcast algorithm can transmit any
|
Chris@16
|
214 * Serializable value, values that have associated MPI data types,
|
Chris@16
|
215 * packed archives, skeletons, and the content of skeletons; see the
|
Chris@16
|
216 * @c send primitive for communicators for a complete list. The type
|
Chris@16
|
217 * @c T shall be the same for all processes that are a part of the
|
Chris@16
|
218 * communicator @p comm, unless packed archives are being transferred:
|
Chris@16
|
219 * with packed archives, the root sends a @c packed_oarchive or @c
|
Chris@16
|
220 * packed_skeleton_oarchive whereas the other processes receive a
|
Chris@16
|
221 * @c packed_iarchive or @c packed_skeleton_iarchve, respectively.
|
Chris@16
|
222 *
|
Chris@16
|
223 * When the type @c T has an associated MPI data type, this routine
|
Chris@16
|
224 * invokes @c MPI_Bcast to perform the broadcast.
|
Chris@16
|
225 *
|
Chris@16
|
226 * @param comm The communicator over which the broadcast will
|
Chris@16
|
227 * occur.
|
Chris@16
|
228 *
|
Chris@16
|
229 * @param value The value (or values, if @p n is provided) to be
|
Chris@16
|
230 * transmitted (if the rank of @p comm is equal to @p root) or
|
Chris@16
|
231 * received (if the rank of @p comm is not equal to @p root). When
|
Chris@16
|
232 * the @p value is a @c skeleton_proxy, only the skeleton of the
|
Chris@16
|
233 * object will be broadcast. In this case, the @p root will build a
|
Chris@16
|
234 * skeleton from the object help in the proxy and all of the
|
Chris@16
|
235 * non-roots will reshape the objects held in their proxies based on
|
Chris@16
|
236 * the skeleton sent from the root.
|
Chris@16
|
237 *
|
Chris@16
|
238 * @param n When supplied, the number of values that the pointer @p
|
Chris@16
|
239 * values points to, for broadcasting an array of values. The value
|
Chris@16
|
240 * of @p n must be the same for all processes in @p comm.
|
Chris@16
|
241 *
|
Chris@16
|
242 * @param root The rank/process ID of the process that will be
|
Chris@16
|
243 * transmitting the value.
|
Chris@16
|
244 */
|
Chris@16
|
245 template<typename T>
|
Chris@16
|
246 void broadcast(const communicator& comm, T& value, int root);
|
Chris@16
|
247
|
Chris@16
|
248 /**
|
Chris@16
|
249 * \overload
|
Chris@16
|
250 */
|
Chris@16
|
251 template<typename T>
|
Chris@16
|
252 void broadcast(const communicator& comm, T* values, int n, int root);
|
Chris@16
|
253
|
Chris@16
|
254 /**
|
Chris@16
|
255 * \overload
|
Chris@16
|
256 */
|
Chris@16
|
257 template<typename T>
|
Chris@16
|
258 void broadcast(const communicator& comm, skeleton_proxy<T>& value, int root);
|
Chris@16
|
259
|
Chris@16
|
260 /**
|
Chris@16
|
261 * \overload
|
Chris@16
|
262 */
|
Chris@16
|
263 template<typename T>
|
Chris@16
|
264 void
|
Chris@16
|
265 broadcast(const communicator& comm, const skeleton_proxy<T>& value, int root);
|
Chris@16
|
266
|
Chris@16
|
267 /**
|
Chris@16
|
268 * @brief Gather the values stored at every process into a vector at
|
Chris@16
|
269 * the root process.
|
Chris@16
|
270 *
|
Chris@16
|
271 * @c gather is a collective algorithm that collects the values
|
Chris@16
|
272 * stored at each process into a vector of values at the @p root
|
Chris@16
|
273 * process. This vector is indexed by the process number that the
|
Chris@16
|
274 * value came from. The type @c T of the values may be any type that
|
Chris@16
|
275 * is serializable or has an associated MPI data type.
|
Chris@16
|
276 *
|
Chris@16
|
277 * When the type @c T has an associated MPI data type, this routine
|
Chris@16
|
278 * invokes @c MPI_Gather to gather the values.
|
Chris@16
|
279 *
|
Chris@16
|
280 * @param comm The communicator over which the gather will occur.
|
Chris@16
|
281 *
|
Chris@16
|
282 * @param in_value The value to be transmitted by each process. For
|
Chris@16
|
283 * gathering arrays of values, @c in_values points to storage for
|
Chris@16
|
284 * @c n*comm.size() values.
|
Chris@16
|
285 *
|
Chris@16
|
286 * @param out_values A vector or pointer to storage that will be
|
Chris@16
|
287 * populated with the values from each process, indexed by the
|
Chris@16
|
288 * process ID number. If it is a vector, it will be resized
|
Chris@16
|
289 * accordingly. For non-root processes, this parameter may be
|
Chris@16
|
290 * omitted. If it is still provided, however, it will be unchanged.
|
Chris@16
|
291 *
|
Chris@16
|
292 * @param root The process ID number that will collect the
|
Chris@16
|
293 * values. This value must be the same on all processes.
|
Chris@16
|
294 */
|
Chris@16
|
295 template<typename T>
|
Chris@16
|
296 void
|
Chris@16
|
297 gather(const communicator& comm, const T& in_value, std::vector<T>& out_values,
|
Chris@16
|
298 int root);
|
Chris@16
|
299
|
Chris@16
|
300 /**
|
Chris@16
|
301 * \overload
|
Chris@16
|
302 */
|
Chris@16
|
303 template<typename T>
|
Chris@16
|
304 void
|
Chris@16
|
305 gather(const communicator& comm, const T& in_value, T* out_values, int root);
|
Chris@16
|
306
|
Chris@16
|
307 /**
|
Chris@16
|
308 * \overload
|
Chris@16
|
309 */
|
Chris@16
|
310 template<typename T>
|
Chris@16
|
311 void gather(const communicator& comm, const T& in_value, int root);
|
Chris@16
|
312
|
Chris@16
|
313 /**
|
Chris@16
|
314 * \overload
|
Chris@16
|
315 */
|
Chris@16
|
316 template<typename T>
|
Chris@16
|
317 void
|
Chris@16
|
318 gather(const communicator& comm, const T* in_values, int n,
|
Chris@16
|
319 std::vector<T>& out_values, int root);
|
Chris@16
|
320
|
Chris@16
|
321 /**
|
Chris@16
|
322 * \overload
|
Chris@16
|
323 */
|
Chris@16
|
324 template<typename T>
|
Chris@16
|
325 void
|
Chris@16
|
326 gather(const communicator& comm, const T* in_values, int n, T* out_values,
|
Chris@16
|
327 int root);
|
Chris@16
|
328
|
Chris@16
|
329 /**
|
Chris@16
|
330 * \overload
|
Chris@16
|
331 */
|
Chris@16
|
332 template<typename T>
|
Chris@16
|
333 void gather(const communicator& comm, const T* in_values, int n, int root);
|
Chris@16
|
334
|
Chris@16
|
335 /**
|
Chris@16
|
336 * @brief Scatter the values stored at the root to all processes
|
Chris@16
|
337 * within the communicator.
|
Chris@16
|
338 *
|
Chris@16
|
339 * @c scatter is a collective algorithm that scatters the values
|
Chris@16
|
340 * stored in the @p root process (inside a vector) to all of the
|
Chris@16
|
341 * processes in the communicator. The vector @p out_values (only
|
Chris@16
|
342 * significant at the @p root) is indexed by the process number to
|
Chris@16
|
343 * which the corresponding value will be sent. The type @c T of the
|
Chris@16
|
344 * values may be any type that is serializable or has an associated
|
Chris@16
|
345 * MPI data type.
|
Chris@16
|
346 *
|
Chris@16
|
347 * When the type @c T has an associated MPI data type, this routine
|
Chris@16
|
348 * invokes @c MPI_Scatter to scatter the values.
|
Chris@16
|
349 *
|
Chris@16
|
350 * @param comm The communicator over which the gather will occur.
|
Chris@16
|
351 *
|
Chris@16
|
352 * @param in_values A vector or pointer to storage that will contain
|
Chris@16
|
353 * the values to send to each process, indexed by the process rank.
|
Chris@16
|
354 * For non-root processes, this parameter may be omitted. If it is
|
Chris@16
|
355 * still provided, however, it will be unchanged.
|
Chris@16
|
356 *
|
Chris@16
|
357 * @param out_value The value received by each process. When
|
Chris@16
|
358 * scattering an array of values, @p out_values points to the @p n
|
Chris@16
|
359 * values that will be received by each process.
|
Chris@16
|
360 *
|
Chris@16
|
361 * @param root The process ID number that will scatter the
|
Chris@16
|
362 * values. This value must be the same on all processes.
|
Chris@16
|
363 */
|
Chris@16
|
364 template<typename T>
|
Chris@16
|
365 void
|
Chris@16
|
366 scatter(const communicator& comm, const std::vector<T>& in_values, T& out_value,
|
Chris@16
|
367 int root);
|
Chris@16
|
368
|
Chris@16
|
369 /**
|
Chris@16
|
370 * \overload
|
Chris@16
|
371 */
|
Chris@16
|
372 template<typename T>
|
Chris@16
|
373 void
|
Chris@16
|
374 scatter(const communicator& comm, const T* in_values, T& out_value, int root);
|
Chris@16
|
375
|
Chris@16
|
376 /**
|
Chris@16
|
377 * \overload
|
Chris@16
|
378 */
|
Chris@16
|
379 template<typename T>
|
Chris@16
|
380 void scatter(const communicator& comm, T& out_value, int root);
|
Chris@16
|
381
|
Chris@16
|
382 /**
|
Chris@16
|
383 * \overload
|
Chris@16
|
384 */
|
Chris@16
|
385 template<typename T>
|
Chris@16
|
386 void
|
Chris@16
|
387 scatter(const communicator& comm, const std::vector<T>& in_values,
|
Chris@16
|
388 T* out_values, int n, int root);
|
Chris@16
|
389
|
Chris@16
|
390 /**
|
Chris@16
|
391 * \overload
|
Chris@16
|
392 */
|
Chris@16
|
393 template<typename T>
|
Chris@16
|
394 void
|
Chris@16
|
395 scatter(const communicator& comm, const T* in_values, T* out_values, int n,
|
Chris@16
|
396 int root);
|
Chris@16
|
397
|
Chris@16
|
398 /**
|
Chris@16
|
399 * \overload
|
Chris@16
|
400 */
|
Chris@16
|
401 template<typename T>
|
Chris@16
|
402 void scatter(const communicator& comm, T* out_values, int n, int root);
|
Chris@16
|
403
|
Chris@16
|
404 /**
|
Chris@16
|
405 * @brief Combine the values stored by each process into a single
|
Chris@16
|
406 * value at the root.
|
Chris@16
|
407 *
|
Chris@16
|
408 * @c reduce is a collective algorithm that combines the values
|
Chris@16
|
409 * stored by each process into a single value at the @c root. The
|
Chris@16
|
410 * values can be combined arbitrarily, specified via a function
|
Chris@16
|
411 * object. The type @c T of the values may be any type that is
|
Chris@16
|
412 * serializable or has an associated MPI data type. One can think of
|
Chris@16
|
413 * this operation as a @c gather to the @p root, followed by an @c
|
Chris@16
|
414 * std::accumulate() over the gathered values and using the operation
|
Chris@16
|
415 * @c op.
|
Chris@16
|
416 *
|
Chris@16
|
417 * When the type @c T has an associated MPI data type, this routine
|
Chris@16
|
418 * invokes @c MPI_Reduce to perform the reduction. If possible,
|
Chris@16
|
419 * built-in MPI operations will be used; otherwise, @c reduce() will
|
Chris@16
|
420 * create a custom MPI_Op for the call to MPI_Reduce.
|
Chris@16
|
421 *
|
Chris@16
|
422 * @param comm The communicator over which the reduction will
|
Chris@16
|
423 * occur.
|
Chris@16
|
424 *
|
Chris@16
|
425 * @param in_value The local value to be combined with the local
|
Chris@16
|
426 * values of every other process. For reducing arrays, @c in_values
|
Chris@16
|
427 * contains a pointer to the local values. In this case, @c n is
|
Chris@16
|
428 * the number of values that will be reduced. Reduction occurs
|
Chris@16
|
429 * independently for each of the @p n values referenced by @p
|
Chris@16
|
430 * in_values, e.g., calling reduce on an array of @p n values is
|
Chris@16
|
431 * like calling @c reduce @p n separate times, one for each
|
Chris@16
|
432 * location in @p in_values and @p out_values.
|
Chris@16
|
433 *
|
Chris@16
|
434 * @param out_value Will receive the result of the reduction
|
Chris@16
|
435 * operation, but only for the @p root process. Non-root processes
|
Chris@16
|
436 * may omit if parameter; if they choose to supply the parameter,
|
Chris@16
|
437 * it will be unchanged. For reducing arrays, @c out_values
|
Chris@16
|
438 * contains a pointer to the storage for the output values.
|
Chris@16
|
439 *
|
Chris@16
|
440 * @param op The binary operation that combines two values of type
|
Chris@16
|
441 * @c T into a third value of type @c T. For types @c T that has
|
Chris@16
|
442 * ssociated MPI data types, @c op will either be translated into
|
Chris@16
|
443 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
|
Chris@16
|
444 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
|
Chris@16
|
445 * operations.hpp header for more details on this mapping. For any
|
Chris@16
|
446 * non-built-in operation, commutativity will be determined by the
|
Chris@16
|
447 * @c is_commmutative trait (also in @c operations.hpp): users are
|
Chris@16
|
448 * encouraged to mark commutative operations as such, because it
|
Chris@16
|
449 * gives the implementation additional lattitude to optimize the
|
Chris@16
|
450 * reduction operation.
|
Chris@16
|
451 *
|
Chris@16
|
452 * @param root The process ID number that will receive the final,
|
Chris@16
|
453 * combined value. This value must be the same on all processes.
|
Chris@16
|
454 */
|
Chris@16
|
455 template<typename T, typename Op>
|
Chris@16
|
456 void
|
Chris@16
|
457 reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
|
Chris@16
|
458 int root);
|
Chris@16
|
459
|
Chris@16
|
460 /**
|
Chris@16
|
461 * \overload
|
Chris@16
|
462 */
|
Chris@16
|
463 template<typename T, typename Op>
|
Chris@16
|
464 void reduce(const communicator& comm, const T& in_value, Op op, int root);
|
Chris@16
|
465
|
Chris@16
|
466 /**
|
Chris@16
|
467 * \overload
|
Chris@16
|
468 */
|
Chris@16
|
469 template<typename T, typename Op>
|
Chris@16
|
470 void
|
Chris@16
|
471 reduce(const communicator& comm, const T* in_values, int n, T* out_values,
|
Chris@16
|
472 Op op, int root);
|
Chris@16
|
473
|
Chris@16
|
474 /**
|
Chris@16
|
475 * \overload
|
Chris@16
|
476 */
|
Chris@16
|
477 template<typename T, typename Op>
|
Chris@16
|
478 void
|
Chris@16
|
479 reduce(const communicator& comm, const T* in_values, int n, Op op, int root);
|
Chris@16
|
480
|
Chris@16
|
481 /**
|
Chris@16
|
482 * @brief Compute a prefix reduction of values from all processes in
|
Chris@16
|
483 * the communicator.
|
Chris@16
|
484 *
|
Chris@16
|
485 * @c scan is a collective algorithm that combines the values stored
|
Chris@16
|
486 * by each process with the values of all processes with a smaller
|
Chris@16
|
487 * rank. The values can be arbitrarily combined, specified via a
|
Chris@16
|
488 * function object @p op. The type @c T of the values may be any type
|
Chris@16
|
489 * that is serializable or has an associated MPI data type. One can
|
Chris@16
|
490 * think of this operation as a @c gather to some process, followed
|
Chris@16
|
491 * by an @c std::prefix_sum() over the gathered values using the
|
Chris@16
|
492 * operation @c op. The ith process returns the ith value emitted by
|
Chris@16
|
493 * @c std::prefix_sum().
|
Chris@16
|
494 *
|
Chris@16
|
495 * When the type @c T has an associated MPI data type, this routine
|
Chris@16
|
496 * invokes @c MPI_Scan to perform the reduction. If possible,
|
Chris@16
|
497 * built-in MPI operations will be used; otherwise, @c scan() will
|
Chris@16
|
498 * create a custom @c MPI_Op for the call to MPI_Scan.
|
Chris@16
|
499 *
|
Chris@16
|
500 * @param comm The communicator over which the prefix reduction
|
Chris@16
|
501 * will occur.
|
Chris@16
|
502 *
|
Chris@16
|
503 * @param in_value The local value to be combined with the local
|
Chris@16
|
504 * values of other processes. For the array variant, the @c
|
Chris@16
|
505 * in_values parameter points to the @c n local values that will be
|
Chris@16
|
506 * combined.
|
Chris@16
|
507 *
|
Chris@16
|
508 * @param out_value If provided, the ith process will receive the
|
Chris@16
|
509 * value @c op(in_value[0], op(in_value[1], op(..., in_value[i])
|
Chris@16
|
510 * ... )). For the array variant, @c out_values contains a pointer
|
Chris@16
|
511 * to storage for the @c n output values. The prefix reduction
|
Chris@16
|
512 * occurs independently for each of the @p n values referenced by
|
Chris@16
|
513 * @p in_values, e.g., calling scan on an array of @p n values is
|
Chris@16
|
514 * like calling @c scan @p n separate times, one for each location
|
Chris@16
|
515 * in @p in_values and @p out_values.
|
Chris@16
|
516 *
|
Chris@16
|
517 * @param op The binary operation that combines two values of type
|
Chris@16
|
518 * @c T into a third value of type @c T. For types @c T that has
|
Chris@16
|
519 * ssociated MPI data types, @c op will either be translated into
|
Chris@16
|
520 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
|
Chris@16
|
521 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
|
Chris@16
|
522 * operations.hpp header for more details on this mapping. For any
|
Chris@16
|
523 * non-built-in operation, commutativity will be determined by the
|
Chris@16
|
524 * @c is_commmutative trait (also in @c operations.hpp).
|
Chris@16
|
525 *
|
Chris@16
|
526 * @returns If no @p out_value parameter is provided, returns the
|
Chris@16
|
527 * result of prefix reduction.
|
Chris@16
|
528 */
|
Chris@16
|
529 template<typename T, typename Op>
|
Chris@16
|
530 void
|
Chris@16
|
531 scan(const communicator& comm, const T& in_value, T& out_value, Op op);
|
Chris@16
|
532
|
Chris@16
|
533 /**
|
Chris@16
|
534 * \overload
|
Chris@16
|
535 */
|
Chris@16
|
536 template<typename T, typename Op>
|
Chris@16
|
537 T
|
Chris@16
|
538 scan(const communicator& comm, const T& in_value, Op op);
|
Chris@16
|
539
|
Chris@16
|
540 /**
|
Chris@16
|
541 * \overload
|
Chris@16
|
542 */
|
Chris@16
|
543 template<typename T, typename Op>
|
Chris@16
|
544 void
|
Chris@16
|
545 scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op);
|
Chris@16
|
546
|
Chris@16
|
547 } } // end namespace boost::mpi
|
Chris@16
|
548 #endif // BOOST_MPI_COLLECTIVES_HPP
|
Chris@16
|
549
|
Chris@16
|
550 #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY
|
Chris@16
|
551 // Include implementations of each of the collectives
|
Chris@16
|
552 # include <boost/mpi/collectives/all_gather.hpp>
|
Chris@16
|
553 # include <boost/mpi/collectives/all_reduce.hpp>
|
Chris@16
|
554 # include <boost/mpi/collectives/all_to_all.hpp>
|
Chris@16
|
555 # include <boost/mpi/collectives/broadcast.hpp>
|
Chris@16
|
556 # include <boost/mpi/collectives/gather.hpp>
|
Chris@16
|
557 # include <boost/mpi/collectives/scatter.hpp>
|
Chris@16
|
558 # include <boost/mpi/collectives/reduce.hpp>
|
Chris@16
|
559 # include <boost/mpi/collectives/scan.hpp>
|
Chris@16
|
560 #endif
|
Chris@16
|
561
|