Chris@16: // Copyright (C) 2005-2006 Douglas Gregor . Chris@16: Chris@16: // Use, modification and distribution is subject to the Boost Software Chris@16: // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at Chris@16: // http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: // Message Passing Interface 1.1 -- Section 4. MPI Collectives Chris@16: Chris@16: /** @file collectives.hpp Chris@16: * Chris@16: * This header contains MPI collective operations, which implement Chris@16: * various parallel algorithms that require the coordination of all Chris@16: * processes within a communicator. The header @c collectives_fwd.hpp Chris@16: * provides forward declarations for each of these operations. To Chris@16: * include only specific collective algorithms, use the headers @c Chris@16: * boost/mpi/collectives/algorithm_name.hpp. Chris@16: */ Chris@16: #ifndef BOOST_MPI_COLLECTIVES_HPP Chris@16: #define BOOST_MPI_COLLECTIVES_HPP Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace mpi { Chris@16: /** Chris@16: * @brief Gather the values stored at every process into vectors of Chris@16: * values from each process. Chris@16: * Chris@16: * @c all_gather is a collective algorithm that collects the values Chris@16: * stored at each process into a vector of values indexed by the Chris@16: * process number they came from. The type @c T of the values may be Chris@16: * any type that is serializable or has an associated MPI data type. Chris@16: * Chris@16: * When the type @c T has an associated MPI data type, this routine Chris@16: * invokes @c MPI_Allgather to gather the values. Chris@16: * Chris@16: * @param comm The communicator over which the all-gather will Chris@16: * occur. Chris@16: * Chris@16: * @param in_value The value to be transmitted by each process. To Chris@16: * gather an array of values, @c in_values points to the @c n local Chris@16: * values to be transmitted. Chris@16: * Chris@16: * @param out_values A vector or pointer to storage that will be Chris@16: * populated with the values from each process, indexed by the Chris@16: * process ID number. If it is a vector, the vector will be resized Chris@16: * accordingly. Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_gather(const communicator& comm, const T& in_value, Chris@16: std::vector& out_values); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_gather(const communicator& comm, const T& in_value, T* out_values); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_gather(const communicator& comm, const T* in_values, int n, Chris@16: std::vector& out_values); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_gather(const communicator& comm, const T* in_values, int n, T* out_values); Chris@16: Chris@16: /** Chris@16: * @brief Combine the values stored by each process into a single Chris@16: * value available to all processes. Chris@16: * Chris@16: * @c all_reduce is a collective algorithm that combines the values Chris@16: * stored by each process into a single value available to all Chris@16: * processes. The values are combined in a user-defined way, Chris@16: * specified via a function object. The type @c T of the values may Chris@16: * be any type that is serializable or has an associated MPI data Chris@16: * type. One can think of this operation as a @c all_gather, followed Chris@16: * by an @c std::accumulate() over the gather values and using the Chris@16: * operation @c op. Chris@16: * Chris@16: * When the type @c T has an associated MPI data type, this routine Chris@16: * invokes @c MPI_Allreduce to perform the reduction. If possible, Chris@16: * built-in MPI operations will be used; otherwise, @c all_reduce() Chris@16: * will create a custom MPI_Op for the call to MPI_Allreduce. Chris@16: * Chris@16: * @param comm The communicator over which the reduction will Chris@16: * occur. Chris@16: * @param value The local value to be combined with the local Chris@16: * values of every other process. For reducing arrays, @c in_values Chris@16: * is a pointer to the local values to be reduced and @c n is the Chris@16: * number of values to reduce. See @c reduce for more information. Chris@16: * Chris@16: * If wrapped in a @c inplace_t object, combine the usage of both Chris@16: * input and $c out_value and the local value will be overwritten Chris@16: * (a convenience function @c inplace is provided for the wrapping). Chris@16: * Chris@16: * @param out_value Will receive the result of the reduction Chris@16: * operation. If this parameter is omitted, the outgoing value will Chris@16: * instead be returned. Chris@16: * Chris@16: * @param op The binary operation that combines two values of type Chris@16: * @c T and returns a third value of type @c T. For types @c T that has Chris@16: * ssociated MPI data types, @c op will either be translated into Chris@16: * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped Chris@16: * directly to a built-in MPI operation. See @c is_mpi_op in the @c Chris@16: * operations.hpp header for more details on this mapping. For any Chris@16: * non-built-in operation, commutativity will be determined by the Chris@16: * @c is_commmutative trait (also in @c operations.hpp): users are Chris@16: * encouraged to mark commutative operations as such, because it Chris@16: * gives the implementation additional lattitude to optimize the Chris@16: * reduction operation. Chris@16: * Chris@16: * @param n Indicated the size of the buffers of array type. Chris@16: * @returns If no @p out_value parameter is supplied, returns the Chris@16: * result of the reduction operation. Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_reduce(const communicator& comm, const T* value, int n, T* out_value, Chris@16: Op op); Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_reduce(const communicator& comm, const T& value, T& out_value, Op op); Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: T all_reduce(const communicator& comm, const T& value, Op op); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_reduce(const communicator& comm, inplace_t value, int n, Chris@16: Op op); Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_reduce(const communicator& comm, inplace_t value, Op op); Chris@16: Chris@16: /** Chris@16: * @brief Send data from every process to every other process. Chris@16: * Chris@16: * @c all_to_all is a collective algorithm that transmits @c p values Chris@16: * from every process to every other process. On process i, jth value Chris@16: * of the @p in_values vector is sent to process j and placed in the Chris@16: * ith position of the @p out_values vector in process @p j. The type Chris@16: * @c T of the values may be any type that is serializable or has an Chris@16: * associated MPI data type. If @c n is provided, then arrays of @p n Chris@16: * values will be transferred from one process to another. Chris@16: * Chris@16: * When the type @c T has an associated MPI data type, this routine Chris@16: * invokes @c MPI_Alltoall to scatter the values. Chris@16: * Chris@16: * @param comm The communicator over which the all-to-all Chris@16: * communication will occur. Chris@16: * Chris@16: * @param in_values A vector or pointer to storage that contains Chris@16: * the values to send to each process, indexed by the process ID Chris@16: * number. Chris@16: * Chris@16: * @param out_values A vector or pointer to storage that will be Chris@16: * updated to contain the values received from other processes. The Chris@16: * jth value in @p out_values will come from the procss with rank j. Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_to_all(const communicator& comm, const std::vector& in_values, Chris@16: std::vector& out_values); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void all_to_all(const communicator& comm, const T* in_values, T* out_values); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_to_all(const communicator& comm, const std::vector& in_values, int n, Chris@16: std::vector& out_values); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: all_to_all(const communicator& comm, const T* in_values, int n, T* out_values); Chris@16: Chris@16: /** Chris@16: * @brief Broadcast a value from a root process to all other Chris@16: * processes. Chris@16: * Chris@16: * @c broadcast is a collective algorithm that transfers a value from Chris@16: * an arbitrary @p root process to every other process that is part of Chris@16: * the given communicator. The @c broadcast algorithm can transmit any Chris@16: * Serializable value, values that have associated MPI data types, Chris@16: * packed archives, skeletons, and the content of skeletons; see the Chris@16: * @c send primitive for communicators for a complete list. The type Chris@16: * @c T shall be the same for all processes that are a part of the Chris@16: * communicator @p comm, unless packed archives are being transferred: Chris@16: * with packed archives, the root sends a @c packed_oarchive or @c Chris@16: * packed_skeleton_oarchive whereas the other processes receive a Chris@16: * @c packed_iarchive or @c packed_skeleton_iarchve, respectively. Chris@16: * Chris@16: * When the type @c T has an associated MPI data type, this routine Chris@16: * invokes @c MPI_Bcast to perform the broadcast. Chris@16: * Chris@16: * @param comm The communicator over which the broadcast will Chris@16: * occur. Chris@16: * Chris@16: * @param value The value (or values, if @p n is provided) to be Chris@16: * transmitted (if the rank of @p comm is equal to @p root) or Chris@16: * received (if the rank of @p comm is not equal to @p root). When Chris@16: * the @p value is a @c skeleton_proxy, only the skeleton of the Chris@16: * object will be broadcast. In this case, the @p root will build a Chris@16: * skeleton from the object help in the proxy and all of the Chris@16: * non-roots will reshape the objects held in their proxies based on Chris@16: * the skeleton sent from the root. Chris@16: * Chris@16: * @param n When supplied, the number of values that the pointer @p Chris@16: * values points to, for broadcasting an array of values. The value Chris@16: * of @p n must be the same for all processes in @p comm. Chris@16: * Chris@16: * @param root The rank/process ID of the process that will be Chris@16: * transmitting the value. Chris@16: */ Chris@16: template Chris@16: void broadcast(const communicator& comm, T& value, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void broadcast(const communicator& comm, T* values, int n, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void broadcast(const communicator& comm, skeleton_proxy& value, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: broadcast(const communicator& comm, const skeleton_proxy& value, int root); Chris@16: Chris@16: /** Chris@16: * @brief Gather the values stored at every process into a vector at Chris@16: * the root process. Chris@16: * Chris@16: * @c gather is a collective algorithm that collects the values Chris@16: * stored at each process into a vector of values at the @p root Chris@16: * process. This vector is indexed by the process number that the Chris@16: * value came from. The type @c T of the values may be any type that Chris@16: * is serializable or has an associated MPI data type. Chris@16: * Chris@16: * When the type @c T has an associated MPI data type, this routine Chris@16: * invokes @c MPI_Gather to gather the values. Chris@16: * Chris@16: * @param comm The communicator over which the gather will occur. Chris@16: * Chris@16: * @param in_value The value to be transmitted by each process. For Chris@16: * gathering arrays of values, @c in_values points to storage for Chris@16: * @c n*comm.size() values. Chris@16: * Chris@16: * @param out_values A vector or pointer to storage that will be Chris@16: * populated with the values from each process, indexed by the Chris@16: * process ID number. If it is a vector, it will be resized Chris@16: * accordingly. For non-root processes, this parameter may be Chris@16: * omitted. If it is still provided, however, it will be unchanged. Chris@16: * Chris@16: * @param root The process ID number that will collect the Chris@16: * values. This value must be the same on all processes. Chris@16: */ Chris@16: template Chris@16: void Chris@16: gather(const communicator& comm, const T& in_value, std::vector& out_values, Chris@16: int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: gather(const communicator& comm, const T& in_value, T* out_values, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void gather(const communicator& comm, const T& in_value, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: gather(const communicator& comm, const T* in_values, int n, Chris@16: std::vector& out_values, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: gather(const communicator& comm, const T* in_values, int n, T* out_values, Chris@16: int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void gather(const communicator& comm, const T* in_values, int n, int root); Chris@16: Chris@16: /** Chris@16: * @brief Scatter the values stored at the root to all processes Chris@16: * within the communicator. Chris@16: * Chris@16: * @c scatter is a collective algorithm that scatters the values Chris@16: * stored in the @p root process (inside a vector) to all of the Chris@16: * processes in the communicator. The vector @p out_values (only Chris@16: * significant at the @p root) is indexed by the process number to Chris@16: * which the corresponding value will be sent. The type @c T of the Chris@16: * values may be any type that is serializable or has an associated Chris@16: * MPI data type. Chris@16: * Chris@16: * When the type @c T has an associated MPI data type, this routine Chris@16: * invokes @c MPI_Scatter to scatter the values. Chris@16: * Chris@16: * @param comm The communicator over which the gather will occur. Chris@16: * Chris@16: * @param in_values A vector or pointer to storage that will contain Chris@16: * the values to send to each process, indexed by the process rank. Chris@16: * For non-root processes, this parameter may be omitted. If it is Chris@16: * still provided, however, it will be unchanged. Chris@16: * Chris@16: * @param out_value The value received by each process. When Chris@16: * scattering an array of values, @p out_values points to the @p n Chris@16: * values that will be received by each process. Chris@16: * Chris@16: * @param root The process ID number that will scatter the Chris@16: * values. This value must be the same on all processes. Chris@16: */ Chris@16: template Chris@16: void Chris@16: scatter(const communicator& comm, const std::vector& in_values, T& out_value, Chris@16: int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: scatter(const communicator& comm, const T* in_values, T& out_value, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void scatter(const communicator& comm, T& out_value, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: scatter(const communicator& comm, const std::vector& in_values, Chris@16: T* out_values, int n, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: scatter(const communicator& comm, const T* in_values, T* out_values, int n, Chris@16: int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void scatter(const communicator& comm, T* out_values, int n, int root); Chris@16: Chris@16: /** Chris@16: * @brief Combine the values stored by each process into a single Chris@16: * value at the root. Chris@16: * Chris@16: * @c reduce is a collective algorithm that combines the values Chris@16: * stored by each process into a single value at the @c root. The Chris@16: * values can be combined arbitrarily, specified via a function Chris@16: * object. The type @c T of the values may be any type that is Chris@16: * serializable or has an associated MPI data type. One can think of Chris@16: * this operation as a @c gather to the @p root, followed by an @c Chris@16: * std::accumulate() over the gathered values and using the operation Chris@16: * @c op. Chris@16: * Chris@16: * When the type @c T has an associated MPI data type, this routine Chris@16: * invokes @c MPI_Reduce to perform the reduction. If possible, Chris@16: * built-in MPI operations will be used; otherwise, @c reduce() will Chris@16: * create a custom MPI_Op for the call to MPI_Reduce. Chris@16: * Chris@16: * @param comm The communicator over which the reduction will Chris@16: * occur. Chris@16: * Chris@16: * @param in_value The local value to be combined with the local Chris@16: * values of every other process. For reducing arrays, @c in_values Chris@16: * contains a pointer to the local values. In this case, @c n is Chris@16: * the number of values that will be reduced. Reduction occurs Chris@16: * independently for each of the @p n values referenced by @p Chris@16: * in_values, e.g., calling reduce on an array of @p n values is Chris@16: * like calling @c reduce @p n separate times, one for each Chris@16: * location in @p in_values and @p out_values. Chris@16: * Chris@16: * @param out_value Will receive the result of the reduction Chris@16: * operation, but only for the @p root process. Non-root processes Chris@16: * may omit if parameter; if they choose to supply the parameter, Chris@16: * it will be unchanged. For reducing arrays, @c out_values Chris@16: * contains a pointer to the storage for the output values. Chris@16: * Chris@16: * @param op The binary operation that combines two values of type Chris@16: * @c T into a third value of type @c T. For types @c T that has Chris@16: * ssociated MPI data types, @c op will either be translated into Chris@16: * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped Chris@16: * directly to a built-in MPI operation. See @c is_mpi_op in the @c Chris@16: * operations.hpp header for more details on this mapping. For any Chris@16: * non-built-in operation, commutativity will be determined by the Chris@16: * @c is_commmutative trait (also in @c operations.hpp): users are Chris@16: * encouraged to mark commutative operations as such, because it Chris@16: * gives the implementation additional lattitude to optimize the Chris@16: * reduction operation. Chris@16: * Chris@16: * @param root The process ID number that will receive the final, Chris@16: * combined value. This value must be the same on all processes. Chris@16: */ Chris@16: template Chris@16: void Chris@16: reduce(const communicator& comm, const T& in_value, T& out_value, Op op, Chris@16: int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void reduce(const communicator& comm, const T& in_value, Op op, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: reduce(const communicator& comm, const T* in_values, int n, T* out_values, Chris@16: Op op, int root); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: reduce(const communicator& comm, const T* in_values, int n, Op op, int root); Chris@16: Chris@16: /** Chris@16: * @brief Compute a prefix reduction of values from all processes in Chris@16: * the communicator. Chris@16: * Chris@16: * @c scan is a collective algorithm that combines the values stored Chris@16: * by each process with the values of all processes with a smaller Chris@16: * rank. The values can be arbitrarily combined, specified via a Chris@16: * function object @p op. The type @c T of the values may be any type Chris@16: * that is serializable or has an associated MPI data type. One can Chris@16: * think of this operation as a @c gather to some process, followed Chris@16: * by an @c std::prefix_sum() over the gathered values using the Chris@16: * operation @c op. The ith process returns the ith value emitted by Chris@16: * @c std::prefix_sum(). Chris@16: * Chris@16: * When the type @c T has an associated MPI data type, this routine Chris@16: * invokes @c MPI_Scan to perform the reduction. If possible, Chris@16: * built-in MPI operations will be used; otherwise, @c scan() will Chris@16: * create a custom @c MPI_Op for the call to MPI_Scan. Chris@16: * Chris@16: * @param comm The communicator over which the prefix reduction Chris@16: * will occur. Chris@16: * Chris@16: * @param in_value The local value to be combined with the local Chris@16: * values of other processes. For the array variant, the @c Chris@16: * in_values parameter points to the @c n local values that will be Chris@16: * combined. Chris@16: * Chris@16: * @param out_value If provided, the ith process will receive the Chris@16: * value @c op(in_value[0], op(in_value[1], op(..., in_value[i]) Chris@16: * ... )). For the array variant, @c out_values contains a pointer Chris@16: * to storage for the @c n output values. The prefix reduction Chris@16: * occurs independently for each of the @p n values referenced by Chris@16: * @p in_values, e.g., calling scan on an array of @p n values is Chris@16: * like calling @c scan @p n separate times, one for each location Chris@16: * in @p in_values and @p out_values. Chris@16: * Chris@16: * @param op The binary operation that combines two values of type Chris@16: * @c T into a third value of type @c T. For types @c T that has Chris@16: * ssociated MPI data types, @c op will either be translated into Chris@16: * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped Chris@16: * directly to a built-in MPI operation. See @c is_mpi_op in the @c Chris@16: * operations.hpp header for more details on this mapping. For any Chris@16: * non-built-in operation, commutativity will be determined by the Chris@16: * @c is_commmutative trait (also in @c operations.hpp). Chris@16: * Chris@16: * @returns If no @p out_value parameter is provided, returns the Chris@16: * result of prefix reduction. Chris@16: */ Chris@16: template Chris@16: void Chris@16: scan(const communicator& comm, const T& in_value, T& out_value, Op op); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: T Chris@16: scan(const communicator& comm, const T& in_value, Op op); Chris@16: Chris@16: /** Chris@16: * \overload Chris@16: */ Chris@16: template Chris@16: void Chris@16: scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op); Chris@16: Chris@16: } } // end namespace boost::mpi Chris@16: #endif // BOOST_MPI_COLLECTIVES_HPP Chris@16: Chris@16: #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY Chris@16: // Include implementations of each of the collectives Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: # include Chris@16: #endif Chris@16: