Chris@16: // Copyright (C) 2005-2006 Douglas Gregor . Chris@16: // Copyright (C) 2004 The Trustees of Indiana University Chris@16: Chris@16: // Use, modification and distribution is subject to the Boost Software Chris@16: // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at Chris@16: // http://www.boost.org/LICENSE_1_0.txt) Chris@16: Chris@16: // Authors: Douglas Gregor Chris@16: // Andrew Lumsdaine Chris@16: Chris@16: // Message Passing Interface 1.1 -- Section 4.9.1. Reduce Chris@16: #ifndef BOOST_MPI_REDUCE_HPP Chris@16: #define BOOST_MPI_REDUCE_HPP Chris@16: Chris@16: #include Chris@16: #include Chris@16: Chris@16: // For (de-)serializing sends and receives Chris@16: #include Chris@16: #include Chris@16: Chris@16: // For packed_[io]archive sends and receives Chris@16: #include Chris@16: Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: #include Chris@16: Chris@16: namespace boost { namespace mpi { Chris@16: Chris@16: Chris@16: /************************************************************************ Chris@16: * Implementation details * Chris@16: ************************************************************************/ Chris@16: namespace detail { Chris@16: /********************************************************************** Chris@16: * Simple reduction with MPI_Reduce * Chris@16: **********************************************************************/ Chris@16: // We are reducing at the root for a type that has an associated MPI Chris@16: // datatype and operation, so we'll use MPI_Reduce directly. Chris@16: template Chris@16: void Chris@16: reduce_impl(const communicator& comm, const T* in_values, int n, Chris@16: T* out_values, Op op, int root, mpl::true_ /*is_mpi_op*/, Chris@16: mpl::true_/*is_mpi_datatype*/) Chris@16: { Chris@16: BOOST_MPI_CHECK_RESULT(MPI_Reduce, Chris@16: (const_cast(in_values), out_values, n, Chris@16: boost::mpi::get_mpi_datatype(*in_values), Chris@16: (is_mpi_op::op()), root, comm)); Chris@16: } Chris@16: Chris@16: // We are reducing to the root for a type that has an associated MPI Chris@16: // datatype and operation, so we'll use MPI_Reduce directly. Chris@16: template Chris@16: void Chris@16: reduce_impl(const communicator& comm, const T* in_values, int n, Op op, Chris@16: int root, mpl::true_ /*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/) Chris@16: { Chris@16: BOOST_MPI_CHECK_RESULT(MPI_Reduce, Chris@16: (const_cast(in_values), 0, n, Chris@16: boost::mpi::get_mpi_datatype(*in_values), Chris@16: (is_mpi_op::op()), root, comm)); Chris@16: } Chris@16: Chris@16: /********************************************************************** Chris@16: * User-defined reduction with MPI_Reduce * Chris@16: **********************************************************************/ Chris@16: Chris@16: // We are reducing at the root for a type that has an associated MPI Chris@16: // datatype but with a custom operation. We'll use MPI_Reduce Chris@16: // directly, but we'll need to create an MPI_Op manually. Chris@16: template Chris@16: void Chris@16: reduce_impl(const communicator& comm, const T* in_values, int n, Chris@16: T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/, Chris@16: mpl::true_/*is_mpi_datatype*/) Chris@16: { Chris@16: user_op mpi_op(op); Chris@16: BOOST_MPI_CHECK_RESULT(MPI_Reduce, Chris@16: (const_cast(in_values), out_values, n, Chris@16: boost::mpi::get_mpi_datatype(*in_values), Chris@16: mpi_op.get_mpi_op(), root, comm)); Chris@16: } Chris@16: Chris@16: // We are reducing to the root for a type that has an associated MPI Chris@16: // datatype but with a custom operation. We'll use MPI_Reduce Chris@16: // directly, but we'll need to create an MPI_Op manually. Chris@16: template Chris@16: void Chris@16: reduce_impl(const communicator& comm, const T* in_values, int n, Op op, Chris@16: int root, mpl::false_/*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/) Chris@16: { Chris@16: user_op mpi_op(op); Chris@16: BOOST_MPI_CHECK_RESULT(MPI_Reduce, Chris@16: (const_cast(in_values), 0, n, Chris@16: boost::mpi::get_mpi_datatype(*in_values), Chris@16: mpi_op.get_mpi_op(), root, comm)); Chris@16: } Chris@16: Chris@16: /********************************************************************** Chris@16: * User-defined, tree-based reduction for non-MPI data types * Chris@16: **********************************************************************/ Chris@16: Chris@16: // Commutative reduction Chris@16: template Chris@16: void Chris@16: tree_reduce_impl(const communicator& comm, const T* in_values, int n, Chris@16: T* out_values, Op op, int root, Chris@16: mpl::true_ /*is_commutative*/) Chris@16: { Chris@16: std::copy(in_values, in_values + n, out_values); Chris@16: Chris@16: int size = comm.size(); Chris@16: int rank = comm.rank(); Chris@16: Chris@16: // The computation tree we will use. Chris@16: detail::computation_tree tree(rank, size, root); Chris@16: Chris@16: int tag = environment::collectives_tag(); Chris@16: Chris@16: MPI_Status status; Chris@16: int children = 0; Chris@16: for (int child = tree.child_begin(); Chris@16: children < tree.branching_factor() && child != root; Chris@16: ++children, child = (child + 1) % size) { Chris@16: // Receive archive Chris@16: packed_iarchive ia(comm); Chris@16: detail::packed_archive_recv(comm, child, tag, ia, status); Chris@16: Chris@16: T incoming; Chris@16: for (int i = 0; i < n; ++i) { Chris@16: ia >> incoming; Chris@16: out_values[i] = op(out_values[i], incoming); Chris@16: } Chris@16: } Chris@16: Chris@16: // For non-roots, send the result to the parent. Chris@16: if (tree.parent() != rank) { Chris@16: packed_oarchive oa(comm); Chris@16: for (int i = 0; i < n; ++i) Chris@16: oa << out_values[i]; Chris@16: detail::packed_archive_send(comm, tree.parent(), tag, oa); Chris@16: } Chris@16: } Chris@16: Chris@16: // Commutative reduction from a non-root. Chris@16: template Chris@16: void Chris@16: tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op, Chris@16: int root, mpl::true_ /*is_commutative*/) Chris@16: { Chris@16: scoped_array results(new T[n]); Chris@16: detail::tree_reduce_impl(comm, in_values, n, results.get(), op, root, Chris@16: mpl::true_()); Chris@16: } Chris@16: Chris@16: // Non-commutative reduction Chris@16: template Chris@16: void Chris@16: tree_reduce_impl(const communicator& comm, const T* in_values, int n, Chris@16: T* out_values, Op op, int root, Chris@16: mpl::false_ /*is_commutative*/) Chris@16: { Chris@16: int tag = environment::collectives_tag(); Chris@16: Chris@16: int left_child = root / 2; Chris@16: int right_child = (root + comm.size()) / 2; Chris@16: Chris@16: MPI_Status status; Chris@16: if (left_child != root) { Chris@16: // Receive value from the left child and merge it with the value Chris@16: // we had incoming. Chris@16: packed_iarchive ia(comm); Chris@16: detail::packed_archive_recv(comm, left_child, tag, ia, status); Chris@16: T incoming; Chris@16: for (int i = 0; i < n; ++i) { Chris@16: ia >> incoming; Chris@16: out_values[i] = op(incoming, in_values[i]); Chris@16: } Chris@16: } else { Chris@16: // There was no left value, so copy our incoming value. Chris@16: std::copy(in_values, in_values + n, out_values); Chris@16: } Chris@16: Chris@16: if (right_child != root) { Chris@16: // Receive value from the right child and merge it with the Chris@16: // value we had incoming. Chris@16: packed_iarchive ia(comm); Chris@16: detail::packed_archive_recv(comm, right_child, tag, ia, status); Chris@16: T incoming; Chris@16: for (int i = 0; i < n; ++i) { Chris@16: ia >> incoming; Chris@16: out_values[i] = op(out_values[i], incoming); Chris@16: } Chris@16: } Chris@16: } Chris@16: Chris@16: // Non-commutative reduction from a non-root. Chris@16: template Chris@16: void Chris@16: tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op, Chris@16: int root, mpl::false_ /*is_commutative*/) Chris@16: { Chris@16: int size = comm.size(); Chris@16: int rank = comm.rank(); Chris@16: Chris@16: int tag = environment::collectives_tag(); Chris@16: Chris@16: // Determine our parents and children in the commutative binary Chris@16: // computation tree. Chris@16: int grandparent = root; Chris@16: int parent = root; Chris@16: int left_bound = 0; Chris@16: int right_bound = size; Chris@16: int left_child, right_child; Chris@16: do { Chris@16: left_child = (left_bound + parent) / 2; Chris@16: right_child = (parent + right_bound) / 2; Chris@16: Chris@16: if (rank < parent) { Chris@16: // Go left. Chris@16: grandparent = parent; Chris@16: right_bound = parent; Chris@16: parent = left_child; Chris@16: } else if (rank > parent) { Chris@16: // Go right. Chris@16: grandparent = parent; Chris@16: left_bound = parent + 1; Chris@16: parent = right_child; Chris@16: } else { Chris@16: // We've found the parent Chris@16: break; Chris@16: } Chris@16: } while (true); Chris@16: Chris@16: // Our parent is the grandparent of our children. This is a slight Chris@16: // abuse of notation, but it makes the send-to-parent below make Chris@16: // more sense. Chris@16: parent = grandparent; Chris@16: Chris@16: MPI_Status status; Chris@16: scoped_array out_values(new T[n]); Chris@16: if (left_child != rank) { Chris@16: // Receive value from the left child and merge it with the value Chris@16: // we had incoming. Chris@16: packed_iarchive ia(comm); Chris@16: detail::packed_archive_recv(comm, left_child, tag, ia, status); Chris@16: T incoming; Chris@16: for (int i = 0; i < n; ++i) { Chris@16: ia >> incoming; Chris@16: out_values[i] = op(incoming, in_values[i]); Chris@16: } Chris@16: } else { Chris@16: // There was no left value, so copy our incoming value. Chris@16: std::copy(in_values, in_values + n, out_values.get()); Chris@16: } Chris@16: Chris@16: if (right_child != rank) { Chris@16: // Receive value from the right child and merge it with the Chris@16: // value we had incoming. Chris@16: packed_iarchive ia(comm); Chris@16: detail::packed_archive_recv(comm, right_child, tag, ia, status); Chris@16: T incoming; Chris@16: for (int i = 0; i < n; ++i) { Chris@16: ia >> incoming; Chris@16: out_values[i] = op(out_values[i], incoming); Chris@16: } Chris@16: } Chris@16: Chris@16: // Send the combined value to our parent. Chris@16: packed_oarchive oa(comm); Chris@16: for (int i = 0; i < n; ++i) Chris@16: oa << out_values[i]; Chris@16: detail::packed_archive_send(comm, parent, tag, oa); Chris@16: } Chris@16: Chris@16: // We are reducing at the root for a type that has no associated MPI Chris@16: // datatype and operation, so we'll use a simple tree-based Chris@16: // algorithm. Chris@16: template Chris@16: void Chris@16: reduce_impl(const communicator& comm, const T* in_values, int n, Chris@16: T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/, Chris@16: mpl::false_ /*is_mpi_datatype*/) Chris@16: { Chris@16: detail::tree_reduce_impl(comm, in_values, n, out_values, op, root, Chris@16: is_commutative()); Chris@16: } Chris@16: Chris@16: // We are reducing to the root for a type that has no associated MPI Chris@16: // datatype and operation, so we'll use a simple tree-based Chris@16: // algorithm. Chris@16: template Chris@16: void Chris@16: reduce_impl(const communicator& comm, const T* in_values, int n, Op op, Chris@16: int root, mpl::false_ /*is_mpi_op*/, Chris@16: mpl::false_ /*is_mpi_datatype*/) Chris@16: { Chris@16: detail::tree_reduce_impl(comm, in_values, n, op, root, Chris@16: is_commutative()); Chris@16: } Chris@16: } // end namespace detail Chris@16: Chris@16: template Chris@16: void Chris@16: reduce(const communicator& comm, const T* in_values, int n, T* out_values, Chris@16: Op op, int root) Chris@16: { Chris@16: if (comm.rank() == root) Chris@16: detail::reduce_impl(comm, in_values, n, out_values, op, root, Chris@16: is_mpi_op(), is_mpi_datatype()); Chris@16: else Chris@16: detail::reduce_impl(comm, in_values, n, op, root, Chris@16: is_mpi_op(), is_mpi_datatype()); Chris@16: } Chris@16: Chris@16: template Chris@16: void Chris@16: reduce(const communicator& comm, const T* in_values, int n, Op op, int root) Chris@16: { Chris@16: BOOST_ASSERT(comm.rank() != root); Chris@16: Chris@16: detail::reduce_impl(comm, in_values, n, op, root, Chris@16: is_mpi_op(), is_mpi_datatype()); Chris@16: } Chris@16: Chris@16: template Chris@16: void Chris@16: reduce(const communicator & comm, std::vector const & in_values, Op op, Chris@16: int root) Chris@16: { Chris@16: reduce(comm, &in_values.front(), in_values.size(), op, root); Chris@16: } Chris@16: Chris@16: template Chris@16: void Chris@16: reduce(const communicator & comm, std::vector const & in_values, Chris@16: std::vector & out_values, Op op, int root) Chris@16: { Chris@16: out_values.resize(in_values.size()); Chris@16: reduce(comm, &in_values.front(), in_values.size(), &out_values.front(), op, Chris@16: root); Chris@16: } Chris@16: Chris@16: Chris@16: template Chris@16: void Chris@16: reduce(const communicator& comm, const T& in_value, T& out_value, Op op, Chris@16: int root) Chris@16: { Chris@16: if (comm.rank() == root) Chris@16: detail::reduce_impl(comm, &in_value, 1, &out_value, op, root, Chris@16: is_mpi_op(), is_mpi_datatype()); Chris@16: else Chris@16: detail::reduce_impl(comm, &in_value, 1, op, root, Chris@16: is_mpi_op(), is_mpi_datatype()); Chris@16: } Chris@16: Chris@16: template Chris@16: void reduce(const communicator& comm, const T& in_value, Op op, int root) Chris@16: { Chris@16: BOOST_ASSERT(comm.rank() != root); Chris@16: Chris@16: detail::reduce_impl(comm, &in_value, 1, op, root, Chris@16: is_mpi_op(), is_mpi_datatype()); Chris@16: } Chris@16: Chris@16: } } // end namespace boost::mpi Chris@16: Chris@16: #endif // BOOST_MPI_REDUCE_HPP