Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/generic/include/boost/mpi/collectives.hpp @ 16:2665513ce2d3
Add boost headers
author | Chris Cannam |
---|---|
date | Tue, 05 Aug 2014 11:11:38 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
15:663ca0da4350 | 16:2665513ce2d3 |
---|---|
1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>. | |
2 | |
3 // Use, modification and distribution is subject to the Boost Software | |
4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
5 // http://www.boost.org/LICENSE_1_0.txt) | |
6 | |
7 // Message Passing Interface 1.1 -- Section 4. MPI Collectives | |
8 | |
9 /** @file collectives.hpp | |
10 * | |
11 * This header contains MPI collective operations, which implement | |
12 * various parallel algorithms that require the coordination of all | |
13 * processes within a communicator. The header @c collectives_fwd.hpp | |
14 * provides forward declarations for each of these operations. To | |
15 * include only specific collective algorithms, use the headers @c | |
16 * boost/mpi/collectives/algorithm_name.hpp. | |
17 */ | |
18 #ifndef BOOST_MPI_COLLECTIVES_HPP | |
19 #define BOOST_MPI_COLLECTIVES_HPP | |
20 | |
21 #include <boost/mpi/communicator.hpp> | |
22 #include <boost/mpi/inplace.hpp> | |
23 #include <vector> | |
24 | |
25 namespace boost { namespace mpi { | |
26 /** | |
27 * @brief Gather the values stored at every process into vectors of | |
28 * values from each process. | |
29 * | |
30 * @c all_gather is a collective algorithm that collects the values | |
31 * stored at each process into a vector of values indexed by the | |
32 * process number they came from. The type @c T of the values may be | |
33 * any type that is serializable or has an associated MPI data type. | |
34 * | |
35 * When the type @c T has an associated MPI data type, this routine | |
36 * invokes @c MPI_Allgather to gather the values. | |
37 * | |
38 * @param comm The communicator over which the all-gather will | |
39 * occur. | |
40 * | |
41 * @param in_value The value to be transmitted by each process. To | |
42 * gather an array of values, @c in_values points to the @c n local | |
43 * values to be transmitted. | |
44 * | |
45 * @param out_values A vector or pointer to storage that will be | |
46 * populated with the values from each process, indexed by the | |
47 * process ID number. If it is a vector, the vector will be resized | |
48 * accordingly. | |
49 */ | |
50 template<typename T> | |
51 void | |
52 all_gather(const communicator& comm, const T& in_value, | |
53 std::vector<T>& out_values); | |
54 | |
55 /** | |
56 * \overload | |
57 */ | |
58 template<typename T> | |
59 void | |
60 all_gather(const communicator& comm, const T& in_value, T* out_values); | |
61 | |
62 /** | |
63 * \overload | |
64 */ | |
65 template<typename T> | |
66 void | |
67 all_gather(const communicator& comm, const T* in_values, int n, | |
68 std::vector<T>& out_values); | |
69 | |
70 /** | |
71 * \overload | |
72 */ | |
73 template<typename T> | |
74 void | |
75 all_gather(const communicator& comm, const T* in_values, int n, T* out_values); | |
76 | |
77 /** | |
78 * @brief Combine the values stored by each process into a single | |
79 * value available to all processes. | |
80 * | |
81 * @c all_reduce is a collective algorithm that combines the values | |
82 * stored by each process into a single value available to all | |
83 * processes. The values are combined in a user-defined way, | |
84 * specified via a function object. The type @c T of the values may | |
85 * be any type that is serializable or has an associated MPI data | |
86 * type. One can think of this operation as a @c all_gather, followed | |
87 * by an @c std::accumulate() over the gather values and using the | |
88 * operation @c op. | |
89 * | |
90 * When the type @c T has an associated MPI data type, this routine | |
91 * invokes @c MPI_Allreduce to perform the reduction. If possible, | |
92 * built-in MPI operations will be used; otherwise, @c all_reduce() | |
93 * will create a custom MPI_Op for the call to MPI_Allreduce. | |
94 * | |
95 * @param comm The communicator over which the reduction will | |
96 * occur. | |
97 * @param value The local value to be combined with the local | |
98 * values of every other process. For reducing arrays, @c in_values | |
99 * is a pointer to the local values to be reduced and @c n is the | |
100 * number of values to reduce. See @c reduce for more information. | |
101 * | |
102 * If wrapped in a @c inplace_t object, combine the usage of both | |
103 * input and $c out_value and the local value will be overwritten | |
104 * (a convenience function @c inplace is provided for the wrapping). | |
105 * | |
106 * @param out_value Will receive the result of the reduction | |
107 * operation. If this parameter is omitted, the outgoing value will | |
108 * instead be returned. | |
109 * | |
110 * @param op The binary operation that combines two values of type | |
111 * @c T and returns a third value of type @c T. For types @c T that has | |
112 * ssociated MPI data types, @c op will either be translated into | |
113 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped | |
114 * directly to a built-in MPI operation. See @c is_mpi_op in the @c | |
115 * operations.hpp header for more details on this mapping. For any | |
116 * non-built-in operation, commutativity will be determined by the | |
117 * @c is_commmutative trait (also in @c operations.hpp): users are | |
118 * encouraged to mark commutative operations as such, because it | |
119 * gives the implementation additional lattitude to optimize the | |
120 * reduction operation. | |
121 * | |
122 * @param n Indicated the size of the buffers of array type. | |
123 * @returns If no @p out_value parameter is supplied, returns the | |
124 * result of the reduction operation. | |
125 */ | |
126 template<typename T, typename Op> | |
127 void | |
128 all_reduce(const communicator& comm, const T* value, int n, T* out_value, | |
129 Op op); | |
130 /** | |
131 * \overload | |
132 */ | |
133 template<typename T, typename Op> | |
134 void | |
135 all_reduce(const communicator& comm, const T& value, T& out_value, Op op); | |
136 /** | |
137 * \overload | |
138 */ | |
139 template<typename T, typename Op> | |
140 T all_reduce(const communicator& comm, const T& value, Op op); | |
141 | |
142 /** | |
143 * \overload | |
144 */ | |
145 template<typename T, typename Op> | |
146 void | |
147 all_reduce(const communicator& comm, inplace_t<T*> value, int n, | |
148 Op op); | |
149 /** | |
150 * \overload | |
151 */ | |
152 template<typename T, typename Op> | |
153 void | |
154 all_reduce(const communicator& comm, inplace_t<T> value, Op op); | |
155 | |
156 /** | |
157 * @brief Send data from every process to every other process. | |
158 * | |
159 * @c all_to_all is a collective algorithm that transmits @c p values | |
160 * from every process to every other process. On process i, jth value | |
161 * of the @p in_values vector is sent to process j and placed in the | |
162 * ith position of the @p out_values vector in process @p j. The type | |
163 * @c T of the values may be any type that is serializable or has an | |
164 * associated MPI data type. If @c n is provided, then arrays of @p n | |
165 * values will be transferred from one process to another. | |
166 * | |
167 * When the type @c T has an associated MPI data type, this routine | |
168 * invokes @c MPI_Alltoall to scatter the values. | |
169 * | |
170 * @param comm The communicator over which the all-to-all | |
171 * communication will occur. | |
172 * | |
173 * @param in_values A vector or pointer to storage that contains | |
174 * the values to send to each process, indexed by the process ID | |
175 * number. | |
176 * | |
177 * @param out_values A vector or pointer to storage that will be | |
178 * updated to contain the values received from other processes. The | |
179 * jth value in @p out_values will come from the procss with rank j. | |
180 */ | |
181 template<typename T> | |
182 void | |
183 all_to_all(const communicator& comm, const std::vector<T>& in_values, | |
184 std::vector<T>& out_values); | |
185 | |
186 /** | |
187 * \overload | |
188 */ | |
189 template<typename T> | |
190 void all_to_all(const communicator& comm, const T* in_values, T* out_values); | |
191 | |
192 /** | |
193 * \overload | |
194 */ | |
195 template<typename T> | |
196 void | |
197 all_to_all(const communicator& comm, const std::vector<T>& in_values, int n, | |
198 std::vector<T>& out_values); | |
199 | |
200 /** | |
201 * \overload | |
202 */ | |
203 template<typename T> | |
204 void | |
205 all_to_all(const communicator& comm, const T* in_values, int n, T* out_values); | |
206 | |
207 /** | |
208 * @brief Broadcast a value from a root process to all other | |
209 * processes. | |
210 * | |
211 * @c broadcast is a collective algorithm that transfers a value from | |
212 * an arbitrary @p root process to every other process that is part of | |
213 * the given communicator. The @c broadcast algorithm can transmit any | |
214 * Serializable value, values that have associated MPI data types, | |
215 * packed archives, skeletons, and the content of skeletons; see the | |
216 * @c send primitive for communicators for a complete list. The type | |
217 * @c T shall be the same for all processes that are a part of the | |
218 * communicator @p comm, unless packed archives are being transferred: | |
219 * with packed archives, the root sends a @c packed_oarchive or @c | |
220 * packed_skeleton_oarchive whereas the other processes receive a | |
221 * @c packed_iarchive or @c packed_skeleton_iarchve, respectively. | |
222 * | |
223 * When the type @c T has an associated MPI data type, this routine | |
224 * invokes @c MPI_Bcast to perform the broadcast. | |
225 * | |
226 * @param comm The communicator over which the broadcast will | |
227 * occur. | |
228 * | |
229 * @param value The value (or values, if @p n is provided) to be | |
230 * transmitted (if the rank of @p comm is equal to @p root) or | |
231 * received (if the rank of @p comm is not equal to @p root). When | |
232 * the @p value is a @c skeleton_proxy, only the skeleton of the | |
233 * object will be broadcast. In this case, the @p root will build a | |
234 * skeleton from the object help in the proxy and all of the | |
235 * non-roots will reshape the objects held in their proxies based on | |
236 * the skeleton sent from the root. | |
237 * | |
238 * @param n When supplied, the number of values that the pointer @p | |
239 * values points to, for broadcasting an array of values. The value | |
240 * of @p n must be the same for all processes in @p comm. | |
241 * | |
242 * @param root The rank/process ID of the process that will be | |
243 * transmitting the value. | |
244 */ | |
245 template<typename T> | |
246 void broadcast(const communicator& comm, T& value, int root); | |
247 | |
248 /** | |
249 * \overload | |
250 */ | |
251 template<typename T> | |
252 void broadcast(const communicator& comm, T* values, int n, int root); | |
253 | |
254 /** | |
255 * \overload | |
256 */ | |
257 template<typename T> | |
258 void broadcast(const communicator& comm, skeleton_proxy<T>& value, int root); | |
259 | |
260 /** | |
261 * \overload | |
262 */ | |
263 template<typename T> | |
264 void | |
265 broadcast(const communicator& comm, const skeleton_proxy<T>& value, int root); | |
266 | |
267 /** | |
268 * @brief Gather the values stored at every process into a vector at | |
269 * the root process. | |
270 * | |
271 * @c gather is a collective algorithm that collects the values | |
272 * stored at each process into a vector of values at the @p root | |
273 * process. This vector is indexed by the process number that the | |
274 * value came from. The type @c T of the values may be any type that | |
275 * is serializable or has an associated MPI data type. | |
276 * | |
277 * When the type @c T has an associated MPI data type, this routine | |
278 * invokes @c MPI_Gather to gather the values. | |
279 * | |
280 * @param comm The communicator over which the gather will occur. | |
281 * | |
282 * @param in_value The value to be transmitted by each process. For | |
283 * gathering arrays of values, @c in_values points to storage for | |
284 * @c n*comm.size() values. | |
285 * | |
286 * @param out_values A vector or pointer to storage that will be | |
287 * populated with the values from each process, indexed by the | |
288 * process ID number. If it is a vector, it will be resized | |
289 * accordingly. For non-root processes, this parameter may be | |
290 * omitted. If it is still provided, however, it will be unchanged. | |
291 * | |
292 * @param root The process ID number that will collect the | |
293 * values. This value must be the same on all processes. | |
294 */ | |
295 template<typename T> | |
296 void | |
297 gather(const communicator& comm, const T& in_value, std::vector<T>& out_values, | |
298 int root); | |
299 | |
300 /** | |
301 * \overload | |
302 */ | |
303 template<typename T> | |
304 void | |
305 gather(const communicator& comm, const T& in_value, T* out_values, int root); | |
306 | |
307 /** | |
308 * \overload | |
309 */ | |
310 template<typename T> | |
311 void gather(const communicator& comm, const T& in_value, int root); | |
312 | |
313 /** | |
314 * \overload | |
315 */ | |
316 template<typename T> | |
317 void | |
318 gather(const communicator& comm, const T* in_values, int n, | |
319 std::vector<T>& out_values, int root); | |
320 | |
321 /** | |
322 * \overload | |
323 */ | |
324 template<typename T> | |
325 void | |
326 gather(const communicator& comm, const T* in_values, int n, T* out_values, | |
327 int root); | |
328 | |
329 /** | |
330 * \overload | |
331 */ | |
332 template<typename T> | |
333 void gather(const communicator& comm, const T* in_values, int n, int root); | |
334 | |
335 /** | |
336 * @brief Scatter the values stored at the root to all processes | |
337 * within the communicator. | |
338 * | |
339 * @c scatter is a collective algorithm that scatters the values | |
340 * stored in the @p root process (inside a vector) to all of the | |
341 * processes in the communicator. The vector @p out_values (only | |
342 * significant at the @p root) is indexed by the process number to | |
343 * which the corresponding value will be sent. The type @c T of the | |
344 * values may be any type that is serializable or has an associated | |
345 * MPI data type. | |
346 * | |
347 * When the type @c T has an associated MPI data type, this routine | |
348 * invokes @c MPI_Scatter to scatter the values. | |
349 * | |
350 * @param comm The communicator over which the gather will occur. | |
351 * | |
352 * @param in_values A vector or pointer to storage that will contain | |
353 * the values to send to each process, indexed by the process rank. | |
354 * For non-root processes, this parameter may be omitted. If it is | |
355 * still provided, however, it will be unchanged. | |
356 * | |
357 * @param out_value The value received by each process. When | |
358 * scattering an array of values, @p out_values points to the @p n | |
359 * values that will be received by each process. | |
360 * | |
361 * @param root The process ID number that will scatter the | |
362 * values. This value must be the same on all processes. | |
363 */ | |
364 template<typename T> | |
365 void | |
366 scatter(const communicator& comm, const std::vector<T>& in_values, T& out_value, | |
367 int root); | |
368 | |
369 /** | |
370 * \overload | |
371 */ | |
372 template<typename T> | |
373 void | |
374 scatter(const communicator& comm, const T* in_values, T& out_value, int root); | |
375 | |
376 /** | |
377 * \overload | |
378 */ | |
379 template<typename T> | |
380 void scatter(const communicator& comm, T& out_value, int root); | |
381 | |
382 /** | |
383 * \overload | |
384 */ | |
385 template<typename T> | |
386 void | |
387 scatter(const communicator& comm, const std::vector<T>& in_values, | |
388 T* out_values, int n, int root); | |
389 | |
390 /** | |
391 * \overload | |
392 */ | |
393 template<typename T> | |
394 void | |
395 scatter(const communicator& comm, const T* in_values, T* out_values, int n, | |
396 int root); | |
397 | |
398 /** | |
399 * \overload | |
400 */ | |
401 template<typename T> | |
402 void scatter(const communicator& comm, T* out_values, int n, int root); | |
403 | |
404 /** | |
405 * @brief Combine the values stored by each process into a single | |
406 * value at the root. | |
407 * | |
408 * @c reduce is a collective algorithm that combines the values | |
409 * stored by each process into a single value at the @c root. The | |
410 * values can be combined arbitrarily, specified via a function | |
411 * object. The type @c T of the values may be any type that is | |
412 * serializable or has an associated MPI data type. One can think of | |
413 * this operation as a @c gather to the @p root, followed by an @c | |
414 * std::accumulate() over the gathered values and using the operation | |
415 * @c op. | |
416 * | |
417 * When the type @c T has an associated MPI data type, this routine | |
418 * invokes @c MPI_Reduce to perform the reduction. If possible, | |
419 * built-in MPI operations will be used; otherwise, @c reduce() will | |
420 * create a custom MPI_Op for the call to MPI_Reduce. | |
421 * | |
422 * @param comm The communicator over which the reduction will | |
423 * occur. | |
424 * | |
425 * @param in_value The local value to be combined with the local | |
426 * values of every other process. For reducing arrays, @c in_values | |
427 * contains a pointer to the local values. In this case, @c n is | |
428 * the number of values that will be reduced. Reduction occurs | |
429 * independently for each of the @p n values referenced by @p | |
430 * in_values, e.g., calling reduce on an array of @p n values is | |
431 * like calling @c reduce @p n separate times, one for each | |
432 * location in @p in_values and @p out_values. | |
433 * | |
434 * @param out_value Will receive the result of the reduction | |
435 * operation, but only for the @p root process. Non-root processes | |
436 * may omit if parameter; if they choose to supply the parameter, | |
437 * it will be unchanged. For reducing arrays, @c out_values | |
438 * contains a pointer to the storage for the output values. | |
439 * | |
440 * @param op The binary operation that combines two values of type | |
441 * @c T into a third value of type @c T. For types @c T that has | |
442 * ssociated MPI data types, @c op will either be translated into | |
443 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped | |
444 * directly to a built-in MPI operation. See @c is_mpi_op in the @c | |
445 * operations.hpp header for more details on this mapping. For any | |
446 * non-built-in operation, commutativity will be determined by the | |
447 * @c is_commmutative trait (also in @c operations.hpp): users are | |
448 * encouraged to mark commutative operations as such, because it | |
449 * gives the implementation additional lattitude to optimize the | |
450 * reduction operation. | |
451 * | |
452 * @param root The process ID number that will receive the final, | |
453 * combined value. This value must be the same on all processes. | |
454 */ | |
455 template<typename T, typename Op> | |
456 void | |
457 reduce(const communicator& comm, const T& in_value, T& out_value, Op op, | |
458 int root); | |
459 | |
460 /** | |
461 * \overload | |
462 */ | |
463 template<typename T, typename Op> | |
464 void reduce(const communicator& comm, const T& in_value, Op op, int root); | |
465 | |
466 /** | |
467 * \overload | |
468 */ | |
469 template<typename T, typename Op> | |
470 void | |
471 reduce(const communicator& comm, const T* in_values, int n, T* out_values, | |
472 Op op, int root); | |
473 | |
474 /** | |
475 * \overload | |
476 */ | |
477 template<typename T, typename Op> | |
478 void | |
479 reduce(const communicator& comm, const T* in_values, int n, Op op, int root); | |
480 | |
481 /** | |
482 * @brief Compute a prefix reduction of values from all processes in | |
483 * the communicator. | |
484 * | |
485 * @c scan is a collective algorithm that combines the values stored | |
486 * by each process with the values of all processes with a smaller | |
487 * rank. The values can be arbitrarily combined, specified via a | |
488 * function object @p op. The type @c T of the values may be any type | |
489 * that is serializable or has an associated MPI data type. One can | |
490 * think of this operation as a @c gather to some process, followed | |
491 * by an @c std::prefix_sum() over the gathered values using the | |
492 * operation @c op. The ith process returns the ith value emitted by | |
493 * @c std::prefix_sum(). | |
494 * | |
495 * When the type @c T has an associated MPI data type, this routine | |
496 * invokes @c MPI_Scan to perform the reduction. If possible, | |
497 * built-in MPI operations will be used; otherwise, @c scan() will | |
498 * create a custom @c MPI_Op for the call to MPI_Scan. | |
499 * | |
500 * @param comm The communicator over which the prefix reduction | |
501 * will occur. | |
502 * | |
503 * @param in_value The local value to be combined with the local | |
504 * values of other processes. For the array variant, the @c | |
505 * in_values parameter points to the @c n local values that will be | |
506 * combined. | |
507 * | |
508 * @param out_value If provided, the ith process will receive the | |
509 * value @c op(in_value[0], op(in_value[1], op(..., in_value[i]) | |
510 * ... )). For the array variant, @c out_values contains a pointer | |
511 * to storage for the @c n output values. The prefix reduction | |
512 * occurs independently for each of the @p n values referenced by | |
513 * @p in_values, e.g., calling scan on an array of @p n values is | |
514 * like calling @c scan @p n separate times, one for each location | |
515 * in @p in_values and @p out_values. | |
516 * | |
517 * @param op The binary operation that combines two values of type | |
518 * @c T into a third value of type @c T. For types @c T that has | |
519 * ssociated MPI data types, @c op will either be translated into | |
520 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped | |
521 * directly to a built-in MPI operation. See @c is_mpi_op in the @c | |
522 * operations.hpp header for more details on this mapping. For any | |
523 * non-built-in operation, commutativity will be determined by the | |
524 * @c is_commmutative trait (also in @c operations.hpp). | |
525 * | |
526 * @returns If no @p out_value parameter is provided, returns the | |
527 * result of prefix reduction. | |
528 */ | |
529 template<typename T, typename Op> | |
530 void | |
531 scan(const communicator& comm, const T& in_value, T& out_value, Op op); | |
532 | |
533 /** | |
534 * \overload | |
535 */ | |
536 template<typename T, typename Op> | |
537 T | |
538 scan(const communicator& comm, const T& in_value, Op op); | |
539 | |
540 /** | |
541 * \overload | |
542 */ | |
543 template<typename T, typename Op> | |
544 void | |
545 scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op); | |
546 | |
547 } } // end namespace boost::mpi | |
548 #endif // BOOST_MPI_COLLECTIVES_HPP | |
549 | |
550 #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY | |
551 // Include implementations of each of the collectives | |
552 # include <boost/mpi/collectives/all_gather.hpp> | |
553 # include <boost/mpi/collectives/all_reduce.hpp> | |
554 # include <boost/mpi/collectives/all_to_all.hpp> | |
555 # include <boost/mpi/collectives/broadcast.hpp> | |
556 # include <boost/mpi/collectives/gather.hpp> | |
557 # include <boost/mpi/collectives/scatter.hpp> | |
558 # include <boost/mpi/collectives/reduce.hpp> | |
559 # include <boost/mpi/collectives/scan.hpp> | |
560 #endif | |
561 |