Chris@16
|
1 // Copyright (C) 2004-2006 The Trustees of Indiana University.
|
Chris@16
|
2
|
Chris@16
|
3 // Use, modification and distribution is subject to the Boost Software
|
Chris@16
|
4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
Chris@16
|
5 // http://www.boost.org/LICENSE_1_0.txt)
|
Chris@16
|
6
|
Chris@16
|
7 // Authors: Brian Barrett
|
Chris@16
|
8 // Douglas Gregor
|
Chris@16
|
9 // Andrew Lumsdaine
|
Chris@16
|
10 #ifndef BOOST_GRAPH_PARALLEL_CC_PS_HPP
|
Chris@16
|
11 #define BOOST_GRAPH_PARALLEL_CC_PS_HPP
|
Chris@16
|
12
|
Chris@16
|
13 #ifndef BOOST_GRAPH_USE_MPI
|
Chris@16
|
14 #error "Parallel BGL files should not be included unless <boost/graph/use_mpi.hpp> has been included"
|
Chris@16
|
15 #endif
|
Chris@16
|
16
|
Chris@16
|
17 #include <boost/assert.hpp>
|
Chris@16
|
18 #include <boost/property_map/property_map.hpp>
|
Chris@16
|
19 #include <boost/graph/parallel/algorithm.hpp>
|
Chris@16
|
20 #include <boost/pending/indirect_cmp.hpp>
|
Chris@16
|
21 #include <boost/graph/graph_traits.hpp>
|
Chris@16
|
22 #include <boost/graph/overloading.hpp>
|
Chris@16
|
23 #include <boost/graph/distributed/concepts.hpp>
|
Chris@16
|
24 #include <boost/graph/parallel/properties.hpp>
|
Chris@16
|
25 #include <boost/graph/parallel/process_group.hpp>
|
Chris@16
|
26 #include <boost/optional.hpp>
|
Chris@16
|
27 #include <algorithm>
|
Chris@16
|
28 #include <vector>
|
Chris@16
|
29 #include <queue>
|
Chris@16
|
30 #include <limits>
|
Chris@16
|
31 #include <map>
|
Chris@16
|
32 #include <boost/graph/parallel/container_traits.hpp>
|
Chris@16
|
33 #include <boost/graph/iteration_macros.hpp>
|
Chris@16
|
34
|
Chris@16
|
35
|
Chris@16
|
36 // Connected components algorithm based on a parallel search.
|
Chris@16
|
37 //
|
Chris@16
|
38 // Every N nodes starts a parallel search from the first vertex in
|
Chris@16
|
39 // their local vertex list during the first superstep (the other nodes
|
Chris@16
|
40 // remain idle during the first superstep to reduce the number of
|
Chris@16
|
41 // conflicts in numbering the components). At each superstep, all new
|
Chris@16
|
42 // component mappings from remote nodes are handled. If there is no
|
Chris@16
|
43 // work from remote updates, a new vertex is removed from the local
|
Chris@16
|
44 // list and added to the work queue.
|
Chris@16
|
45 //
|
Chris@16
|
46 // Components are allocated from the component_value_allocator object,
|
Chris@16
|
47 // which ensures that a given component number is unique in the
|
Chris@16
|
48 // system, currently by using the rank and number of processes to
|
Chris@16
|
49 // stride allocations.
|
Chris@16
|
50 //
|
Chris@16
|
51 // When two components are discovered to actually be the same
|
Chris@16
|
52 // component, a mapping is created in the collisions object. The
|
Chris@16
|
53 // lower component number is prefered in the resolution, so component
|
Chris@16
|
54 // numbering resolution is consistent. After the search has exhausted
|
Chris@16
|
55 // all vertices in the graph, the mapping is shared with all
|
Chris@16
|
56 // processes, and they independently resolve the comonent mapping (so
|
Chris@16
|
57 // O((N * NP) + (V * NP)) work, in O(N + V) time, where N is the
|
Chris@16
|
58 // number of mappings and V is the number of local vertices). This
|
Chris@16
|
59 // phase can likely be significantly sped up if a clever algorithm for
|
Chris@16
|
60 // the reduction can be found.
|
Chris@16
|
61 namespace boost { namespace graph { namespace distributed {
|
Chris@16
|
62 namespace cc_ps_detail {
|
Chris@16
|
63 // Local object for allocating component numbers. There are two
|
Chris@16
|
64 // places this happens in the code, and I was getting sick of them
|
Chris@16
|
65 // getting out of sync. Components are not tightly packed in
|
Chris@16
|
66 // numbering, but are numbered to ensure each rank has its own
|
Chris@16
|
67 // independent sets of numberings.
|
Chris@16
|
68 template<typename component_value_type>
|
Chris@16
|
69 class component_value_allocator {
|
Chris@16
|
70 public:
|
Chris@16
|
71 component_value_allocator(int num, int size) :
|
Chris@16
|
72 last(0), num(num), size(size)
|
Chris@16
|
73 {
|
Chris@16
|
74 }
|
Chris@16
|
75
|
Chris@16
|
76 component_value_type allocate(void)
|
Chris@16
|
77 {
|
Chris@16
|
78 component_value_type ret = num + (last * size);
|
Chris@16
|
79 last++;
|
Chris@16
|
80 return ret;
|
Chris@16
|
81 }
|
Chris@16
|
82
|
Chris@16
|
83 private:
|
Chris@16
|
84 component_value_type last;
|
Chris@16
|
85 int num;
|
Chris@16
|
86 int size;
|
Chris@16
|
87 };
|
Chris@16
|
88
|
Chris@16
|
89
|
Chris@16
|
90 // Map of the "collisions" between component names in the global
|
Chris@16
|
91 // component mapping. TO make cleanup easier, component numbers
|
Chris@16
|
92 // are added, pointing to themselves, when a new component is
|
Chris@16
|
93 // found. In order to make the results deterministic, the lower
|
Chris@16
|
94 // component number is always taken. The resolver will drill
|
Chris@16
|
95 // through the map until it finds a component entry that points to
|
Chris@16
|
96 // itself as the next value, allowing some cleanup to happen at
|
Chris@16
|
97 // update() time. Attempts are also made to update the mapping
|
Chris@16
|
98 // when new entries are created.
|
Chris@16
|
99 //
|
Chris@16
|
100 // Note that there's an assumption that the entire mapping is
|
Chris@16
|
101 // shared during the end of the algorithm, but before component
|
Chris@16
|
102 // name resolution.
|
Chris@16
|
103 template<typename component_value_type>
|
Chris@16
|
104 class collision_map {
|
Chris@16
|
105 public:
|
Chris@16
|
106 collision_map() : num_unique(0)
|
Chris@16
|
107 {
|
Chris@16
|
108 }
|
Chris@16
|
109
|
Chris@16
|
110 // add new component mapping first time component is used. Own
|
Chris@16
|
111 // function only so that we can sanity check there isn't already
|
Chris@16
|
112 // a mapping for that component number (which would be bad)
|
Chris@16
|
113 void add(const component_value_type &a)
|
Chris@16
|
114 {
|
Chris@16
|
115 BOOST_ASSERT(collisions.count(a) == 0);
|
Chris@16
|
116 collisions[a] = a;
|
Chris@16
|
117 }
|
Chris@16
|
118
|
Chris@16
|
119 // add a mapping between component values saying they're the
|
Chris@16
|
120 // same component
|
Chris@16
|
121 void add(const component_value_type &a, const component_value_type &b)
|
Chris@16
|
122 {
|
Chris@16
|
123 component_value_type high, low, tmp;
|
Chris@16
|
124 if (a > b) {
|
Chris@16
|
125 high = a;
|
Chris@16
|
126 low = b;
|
Chris@16
|
127 } else {
|
Chris@16
|
128 high = b;
|
Chris@16
|
129 low = a;
|
Chris@16
|
130 }
|
Chris@16
|
131
|
Chris@16
|
132 if (collisions.count(high) != 0 && collisions[high] != low) {
|
Chris@16
|
133 tmp = collisions[high];
|
Chris@16
|
134 if (tmp > low) {
|
Chris@16
|
135 collisions[tmp] = low;
|
Chris@16
|
136 collisions[high] = low;
|
Chris@16
|
137 } else {
|
Chris@16
|
138 collisions[low] = tmp;
|
Chris@16
|
139 collisions[high] = tmp;
|
Chris@16
|
140 }
|
Chris@16
|
141 } else {
|
Chris@16
|
142 collisions[high] = low;
|
Chris@16
|
143 }
|
Chris@16
|
144
|
Chris@16
|
145 }
|
Chris@16
|
146
|
Chris@16
|
147 // get the "real" component number for the given component.
|
Chris@16
|
148 // Used to resolve mapping at end of run.
|
Chris@16
|
149 component_value_type update(component_value_type a)
|
Chris@16
|
150 {
|
Chris@16
|
151 BOOST_ASSERT(num_unique > 0);
|
Chris@16
|
152 BOOST_ASSERT(collisions.count(a) != 0);
|
Chris@16
|
153 return collisions[a];
|
Chris@16
|
154 }
|
Chris@16
|
155
|
Chris@16
|
156 // collapse the collisions tree, so that update is a one lookup
|
Chris@16
|
157 // operation. Count unique components at the same time.
|
Chris@16
|
158 void uniqify(void)
|
Chris@16
|
159 {
|
Chris@16
|
160 typename std::map<component_value_type, component_value_type>::iterator i, end;
|
Chris@16
|
161
|
Chris@16
|
162 end = collisions.end();
|
Chris@16
|
163 for (i = collisions.begin() ; i != end ; ++i) {
|
Chris@16
|
164 if (i->first == i->second) {
|
Chris@16
|
165 num_unique++;
|
Chris@16
|
166 } else {
|
Chris@16
|
167 i->second = collisions[i->second];
|
Chris@16
|
168 }
|
Chris@16
|
169 }
|
Chris@16
|
170 }
|
Chris@16
|
171
|
Chris@16
|
172 // get the number of component entries that have an associated
|
Chris@16
|
173 // component number of themselves, which are the real components
|
Chris@16
|
174 // used in the final mapping. This is the number of unique
|
Chris@16
|
175 // components in the graph.
|
Chris@16
|
176 int unique(void)
|
Chris@16
|
177 {
|
Chris@16
|
178 BOOST_ASSERT(num_unique > 0);
|
Chris@16
|
179 return num_unique;
|
Chris@16
|
180 }
|
Chris@16
|
181
|
Chris@16
|
182 // "serialize" into a vector for communication.
|
Chris@16
|
183 std::vector<component_value_type> serialize(void)
|
Chris@16
|
184 {
|
Chris@16
|
185 std::vector<component_value_type> ret;
|
Chris@16
|
186 typename std::map<component_value_type, component_value_type>::iterator i, end;
|
Chris@16
|
187
|
Chris@16
|
188 end = collisions.end();
|
Chris@16
|
189 for (i = collisions.begin() ; i != end ; ++i) {
|
Chris@16
|
190 ret.push_back(i->first);
|
Chris@16
|
191 ret.push_back(i->second);
|
Chris@16
|
192 }
|
Chris@16
|
193
|
Chris@16
|
194 return ret;
|
Chris@16
|
195 }
|
Chris@16
|
196
|
Chris@16
|
197 private:
|
Chris@16
|
198 std::map<component_value_type, component_value_type> collisions;
|
Chris@16
|
199 int num_unique;
|
Chris@16
|
200 };
|
Chris@16
|
201
|
Chris@16
|
202
|
Chris@16
|
203 // resolver to handle remote updates. The resolver will add
|
Chris@16
|
204 // entries into the collisions map if required, and if it is the
|
Chris@16
|
205 // first time the vertex has been touched, it will add the vertex
|
Chris@16
|
206 // to the remote queue. Note that local updates are handled
|
Chris@16
|
207 // differently, in the main loop (below).
|
Chris@16
|
208
|
Chris@16
|
209 // BWB - FIX ME - don't need graph anymore - can pull from key value of Component Map.
|
Chris@16
|
210 template<typename ComponentMap, typename work_queue>
|
Chris@16
|
211 struct update_reducer {
|
Chris@16
|
212 BOOST_STATIC_CONSTANT(bool, non_default_resolver = false);
|
Chris@16
|
213
|
Chris@16
|
214 typedef typename property_traits<ComponentMap>::value_type component_value_type;
|
Chris@16
|
215 typedef typename property_traits<ComponentMap>::key_type vertex_descriptor;
|
Chris@16
|
216
|
Chris@16
|
217 update_reducer(work_queue *q,
|
Chris@16
|
218 cc_ps_detail::collision_map<component_value_type> *collisions,
|
Chris@16
|
219 processor_id_type pg_id) :
|
Chris@16
|
220 q(q), collisions(collisions), pg_id(pg_id)
|
Chris@16
|
221 {
|
Chris@16
|
222 }
|
Chris@16
|
223
|
Chris@16
|
224 // ghost cell initialization routine. This should never be
|
Chris@16
|
225 // called in this imlementation.
|
Chris@16
|
226 template<typename K>
|
Chris@16
|
227 component_value_type operator()(const K&) const
|
Chris@16
|
228 {
|
Chris@16
|
229 return component_value_type(0);
|
Chris@16
|
230 }
|
Chris@16
|
231
|
Chris@16
|
232 // resolver for remote updates. I'm not entirely sure why, but
|
Chris@16
|
233 // I decided to not change the value of the vertex if it's
|
Chris@16
|
234 // already non-infinite. It doesn't matter in the end, as we'll
|
Chris@16
|
235 // touch every vertex in the cleanup phase anyway. If the
|
Chris@16
|
236 // component is currently infinite, set to the new component
|
Chris@16
|
237 // number and add the vertex to the work queue. If it's not
|
Chris@16
|
238 // infinite, we've touched it already so don't add it to the
|
Chris@16
|
239 // work queue. Do add a collision entry so that we know the two
|
Chris@16
|
240 // components are the same.
|
Chris@16
|
241 component_value_type operator()(const vertex_descriptor &v,
|
Chris@16
|
242 const component_value_type& current,
|
Chris@16
|
243 const component_value_type& update) const
|
Chris@16
|
244 {
|
Chris@16
|
245 const component_value_type max = (std::numeric_limits<component_value_type>::max)();
|
Chris@16
|
246 component_value_type ret = current;
|
Chris@16
|
247
|
Chris@16
|
248 if (max == current) {
|
Chris@16
|
249 q->push(v);
|
Chris@16
|
250 ret = update;
|
Chris@16
|
251 } else if (current != update) {
|
Chris@16
|
252 collisions->add(current, update);
|
Chris@16
|
253 }
|
Chris@16
|
254
|
Chris@16
|
255 return ret;
|
Chris@16
|
256 }
|
Chris@16
|
257
|
Chris@16
|
258 // So for whatever reason, the property map can in theory call
|
Chris@16
|
259 // the resolver with a local descriptor in addition to the
|
Chris@16
|
260 // standard global descriptor. As far as I can tell, this code
|
Chris@16
|
261 // path is never taken in this implementation, but I need to
|
Chris@16
|
262 // have this code here to make it compile. We just make a
|
Chris@16
|
263 // global descriptor and call the "real" operator().
|
Chris@16
|
264 template<typename K>
|
Chris@16
|
265 component_value_type operator()(const K& v,
|
Chris@16
|
266 const component_value_type& current,
|
Chris@16
|
267 const component_value_type& update) const
|
Chris@16
|
268 {
|
Chris@16
|
269 return (*this)(vertex_descriptor(pg_id, v), current, update);
|
Chris@16
|
270 }
|
Chris@16
|
271
|
Chris@16
|
272 private:
|
Chris@16
|
273 work_queue *q;
|
Chris@16
|
274 collision_map<component_value_type> *collisions;
|
Chris@16
|
275 boost::processor_id_type pg_id;
|
Chris@16
|
276 };
|
Chris@16
|
277
|
Chris@16
|
278 } // namespace cc_ps_detail
|
Chris@16
|
279
|
Chris@16
|
280
|
Chris@16
|
281 template<typename Graph, typename ComponentMap>
|
Chris@16
|
282 typename property_traits<ComponentMap>::value_type
|
Chris@16
|
283 connected_components_ps(const Graph& g, ComponentMap c)
|
Chris@16
|
284 {
|
Chris@16
|
285 using boost::graph::parallel::process_group;
|
Chris@16
|
286
|
Chris@16
|
287 typedef typename property_traits<ComponentMap>::value_type component_value_type;
|
Chris@16
|
288 typedef typename graph_traits<Graph>::vertex_iterator vertex_iterator;
|
Chris@16
|
289 typedef typename graph_traits<Graph>::vertex_descriptor vertex_descriptor;
|
Chris@16
|
290 typedef typename boost::graph::parallel::process_group_type<Graph>
|
Chris@16
|
291 ::type process_group_type;
|
Chris@16
|
292 typedef typename process_group_type::process_id_type process_id_type;
|
Chris@16
|
293 typedef std::queue<vertex_descriptor> work_queue;
|
Chris@16
|
294
|
Chris@16
|
295 static const component_value_type max_component =
|
Chris@16
|
296 (std::numeric_limits<component_value_type>::max)();
|
Chris@16
|
297 typename property_map<Graph, vertex_owner_t>::const_type
|
Chris@16
|
298 owner = get(vertex_owner, g);
|
Chris@16
|
299
|
Chris@16
|
300 // standard who am i? stuff
|
Chris@16
|
301 process_group_type pg = process_group(g);
|
Chris@16
|
302 process_id_type id = process_id(pg);
|
Chris@16
|
303
|
Chris@16
|
304 // Initialize every vertex to have infinite component number
|
Chris@16
|
305 BGL_FORALL_VERTICES_T(v, g, Graph) put(c, v, max_component);
|
Chris@16
|
306
|
Chris@16
|
307 vertex_iterator current, end;
|
Chris@16
|
308 boost::tie(current, end) = vertices(g);
|
Chris@16
|
309
|
Chris@16
|
310 cc_ps_detail::component_value_allocator<component_value_type> cva(process_id(pg), num_processes(pg));
|
Chris@16
|
311 cc_ps_detail::collision_map<component_value_type> collisions;
|
Chris@16
|
312 work_queue q; // this is intentionally a local data structure
|
Chris@16
|
313 c.set_reduce(cc_ps_detail::update_reducer<ComponentMap, work_queue>(&q, &collisions, id));
|
Chris@16
|
314
|
Chris@16
|
315 // add starting work
|
Chris@16
|
316 while (true) {
|
Chris@16
|
317 bool useful_found = false;
|
Chris@16
|
318 component_value_type val = cva.allocate();
|
Chris@16
|
319 put(c, *current, val);
|
Chris@16
|
320 collisions.add(val);
|
Chris@16
|
321 q.push(*current);
|
Chris@16
|
322 if (0 != out_degree(*current, g)) useful_found = true;
|
Chris@16
|
323 ++current;
|
Chris@16
|
324 if (useful_found) break;
|
Chris@16
|
325 }
|
Chris@16
|
326
|
Chris@16
|
327 // Run the loop until everyone in the system is done
|
Chris@16
|
328 bool global_done = false;
|
Chris@16
|
329 while (!global_done) {
|
Chris@16
|
330
|
Chris@16
|
331 // drain queue of work for this superstep
|
Chris@16
|
332 while (!q.empty()) {
|
Chris@16
|
333 vertex_descriptor v = q.front();
|
Chris@16
|
334 q.pop();
|
Chris@16
|
335 // iterate through outedges of the vertex currently being
|
Chris@16
|
336 // examined, setting their component to our component. There
|
Chris@16
|
337 // is no way to end up in the queue without having a component
|
Chris@16
|
338 // number already.
|
Chris@16
|
339
|
Chris@16
|
340 BGL_FORALL_ADJ_T(v, peer, g, Graph) {
|
Chris@16
|
341 component_value_type my_component = get(c, v);
|
Chris@16
|
342
|
Chris@16
|
343 // update other vertex with our component information.
|
Chris@16
|
344 // Resolver will handle remote collisions as well as whether
|
Chris@16
|
345 // to put the vertex on the work queue or not. We have to
|
Chris@16
|
346 // handle local collisions and work queue management
|
Chris@16
|
347 if (id == get(owner, peer)) {
|
Chris@16
|
348 if (max_component == get(c, peer)) {
|
Chris@16
|
349 put(c, peer, my_component);
|
Chris@16
|
350 q.push(peer);
|
Chris@16
|
351 } else if (my_component != get(c, peer)) {
|
Chris@16
|
352 collisions.add(my_component, get(c, peer));
|
Chris@16
|
353 }
|
Chris@16
|
354 } else {
|
Chris@16
|
355 put(c, peer, my_component);
|
Chris@16
|
356 }
|
Chris@16
|
357 }
|
Chris@16
|
358 }
|
Chris@16
|
359
|
Chris@16
|
360 // synchronize / start a new superstep.
|
Chris@16
|
361 synchronize(pg);
|
Chris@16
|
362 global_done = all_reduce(pg, (q.empty() && (current == end)), boost::parallel::minimum<bool>());
|
Chris@16
|
363
|
Chris@16
|
364 // If the queue is currently empty, add something to do to start
|
Chris@16
|
365 // the current superstep (supersteps start at the sync, not at
|
Chris@16
|
366 // the top of the while loop as one might expect). Down at the
|
Chris@16
|
367 // bottom of the while loop so that not everyone starts the
|
Chris@16
|
368 // algorithm with something to do, to try to reduce component
|
Chris@16
|
369 // name conflicts
|
Chris@16
|
370 if (q.empty()) {
|
Chris@16
|
371 bool useful_found = false;
|
Chris@16
|
372 for ( ; current != end && !useful_found ; ++current) {
|
Chris@16
|
373 if (max_component == get(c, *current)) {
|
Chris@16
|
374 component_value_type val = cva.allocate();
|
Chris@16
|
375 put(c, *current, val);
|
Chris@16
|
376 collisions.add(val);
|
Chris@16
|
377 q.push(*current);
|
Chris@16
|
378 if (0 != out_degree(*current, g)) useful_found = true;
|
Chris@16
|
379 }
|
Chris@16
|
380 }
|
Chris@16
|
381 }
|
Chris@16
|
382 }
|
Chris@16
|
383
|
Chris@16
|
384 // share component mappings
|
Chris@16
|
385 std::vector<component_value_type> global;
|
Chris@16
|
386 std::vector<component_value_type> mine = collisions.serialize();
|
Chris@16
|
387 all_gather(pg, mine.begin(), mine.end(), global);
|
Chris@16
|
388 for (size_t i = 0 ; i < global.size() ; i += 2) {
|
Chris@16
|
389 collisions.add(global[i], global[i + 1]);
|
Chris@16
|
390 }
|
Chris@16
|
391 collisions.uniqify();
|
Chris@16
|
392
|
Chris@16
|
393 // update the component mappings
|
Chris@16
|
394 BGL_FORALL_VERTICES_T(v, g, Graph) {
|
Chris@16
|
395 put(c, v, collisions.update(get(c, v)));
|
Chris@16
|
396 }
|
Chris@16
|
397
|
Chris@16
|
398 return collisions.unique();
|
Chris@16
|
399 }
|
Chris@16
|
400
|
Chris@16
|
401 } // end namespace distributed
|
Chris@16
|
402
|
Chris@16
|
403 } // end namespace graph
|
Chris@16
|
404
|
Chris@16
|
405 } // end namespace boost
|
Chris@16
|
406
|
Chris@16
|
407 #endif // BOOST_GRAPH_PARALLEL_CC_HPP
|