cannam@167
|
1 /*
|
cannam@167
|
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
|
cannam@167
|
3 * Copyright (c) 1999-2003, 2007-8 Massachusetts Institute of Technology
|
cannam@167
|
4 *
|
cannam@167
|
5 * This program is free software; you can redistribute it and/or modify
|
cannam@167
|
6 * it under the terms of the GNU General Public License as published by
|
cannam@167
|
7 * the Free Software Foundation; either version 2 of the License, or
|
cannam@167
|
8 * (at your option) any later version.
|
cannam@167
|
9 *
|
cannam@167
|
10 * This program is distributed in the hope that it will be useful,
|
cannam@167
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
cannam@167
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
cannam@167
|
13 * GNU General Public License for more details.
|
cannam@167
|
14 *
|
cannam@167
|
15 * You should have received a copy of the GNU General Public License
|
cannam@167
|
16 * along with this program; if not, write to the Free Software
|
cannam@167
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
cannam@167
|
18 *
|
cannam@167
|
19 */
|
cannam@167
|
20
|
cannam@167
|
21 /**********************************************************************/
|
cannam@167
|
22 /* This is a modified and combined version of the sched.c and
|
cannam@167
|
23 test_sched.c files shipped with FFTW 2, written to implement and
|
cannam@167
|
24 test various all-to-all communications scheduling patterns.
|
cannam@167
|
25
|
cannam@167
|
26 It is not used in FFTW 3, but I keep it around in case we ever want
|
cannam@167
|
27 to play with this again or to change algorithms. In particular, I
|
cannam@167
|
28 used it to implement and test the fill1_comm_sched routine in
|
cannam@167
|
29 transpose-pairwise.c, which allows us to create a schedule for one
|
cannam@167
|
30 process at a time and is much more compact than the FFTW 2 code.
|
cannam@167
|
31
|
cannam@167
|
32 Note that the scheduling algorithm is somewhat modified from that
|
cannam@167
|
33 of FFTW 2. Originally, I thought that one "stall" in the schedule
|
cannam@167
|
34 was unavoidable for odd numbers of processes, since this is the
|
cannam@167
|
35 case for the soccer-timetabling problem. However, because of the
|
cannam@167
|
36 self-communication step, we can use the self-communication to fill
|
cannam@167
|
37 in the stalls. (Thanks to Ralf Wildenhues for pointing this out.)
|
cannam@167
|
38 This greatly simplifies the process re-sorting algorithm. */
|
cannam@167
|
39
|
cannam@167
|
40 /**********************************************************************/
|
cannam@167
|
41
|
cannam@167
|
42 #include <stdio.h>
|
cannam@167
|
43 #include <stdlib.h>
|
cannam@167
|
44
|
cannam@167
|
45 /* This file contains routines to compute communications schedules for
|
cannam@167
|
46 all-to-all communications (complete exchanges) that are performed
|
cannam@167
|
47 in-place. (That is, the block that processor x sends to processor
|
cannam@167
|
48 y gets replaced on processor x by a block received from processor y.)
|
cannam@167
|
49
|
cannam@167
|
50 A schedule, int **sched, is a two-dimensional array where
|
cannam@167
|
51 sched[pe][i] is the processor that pe expects to exchange a message
|
cannam@167
|
52 with on the i-th step of the exchange. sched[pe][i] == -1 for the
|
cannam@167
|
53 i after the last exchange scheduled on pe.
|
cannam@167
|
54
|
cannam@167
|
55 Here, processors (pe's, for processing elements), are numbered from
|
cannam@167
|
56 0 to npes-1.
|
cannam@167
|
57
|
cannam@167
|
58 There are a couple of constraints that a schedule should satisfy
|
cannam@167
|
59 (besides the obvious one that every processor has to communicate
|
cannam@167
|
60 with every other processor exactly once).
|
cannam@167
|
61
|
cannam@167
|
62 * First, and most importantly, there must be no deadlocks.
|
cannam@167
|
63
|
cannam@167
|
64 * Second, we would like to overlap communications as much as possible,
|
cannam@167
|
65 so that all exchanges occur in parallel. It turns out that perfect
|
cannam@167
|
66 overlap is possible for all number of processes (npes).
|
cannam@167
|
67
|
cannam@167
|
68 It turns out that this scheduling problem is actually well-studied,
|
cannam@167
|
69 and good solutions are known. The problem is known as a
|
cannam@167
|
70 "time-tabling" problem, and is specifically the problem of
|
cannam@167
|
71 scheduling a sports competition (where n teams must compete exactly
|
cannam@167
|
72 once with every other team). The problem is discussed and
|
cannam@167
|
73 algorithms are presented in:
|
cannam@167
|
74
|
cannam@167
|
75 [1] J. A. M. Schreuder, "Constructing Timetables for Sport
|
cannam@167
|
76 Competitions," Mathematical Programming Study 13, pp. 58-67 (1980).
|
cannam@167
|
77
|
cannam@167
|
78 [2] A. Schaerf, "Scheduling Sport Tournaments using Constraint
|
cannam@167
|
79 Logic Programming," Proc. of 12th Europ. Conf. on
|
cannam@167
|
80 Artif. Intell. (ECAI-96), pp. 634-639 (Budapest 1996).
|
cannam@167
|
81 http://hermes.dis.uniromal.it/~aschaerf/publications.html
|
cannam@167
|
82
|
cannam@167
|
83 (These people actually impose a lot of additional constraints that
|
cannam@167
|
84 we don't care about, so they are solving harder problems. [1] gives
|
cannam@167
|
85 a simple enough algorithm for our purposes, though.)
|
cannam@167
|
86
|
cannam@167
|
87 In the timetabling problem, N teams can all play one another in N-1
|
cannam@167
|
88 steps if N is even, and N steps if N is odd. Here, however,
|
cannam@167
|
89 there is a "self-communication" step (a team must also "play itself")
|
cannam@167
|
90 and so we can always make an optimal N-step schedule regardless of N.
|
cannam@167
|
91
|
cannam@167
|
92 However, we have to do more: for a particular processor, the
|
cannam@167
|
93 communications schedule must be sorted in ascending or descending
|
cannam@167
|
94 order of processor index. (This is necessary so that the data
|
cannam@167
|
95 coming in for the transpose does not overwrite data that will be
|
cannam@167
|
96 sent later; for that processor the incoming and outgoing blocks are
|
cannam@167
|
97 of different non-zero sizes.) Fortunately, because the schedule
|
cannam@167
|
98 is stall free, each parallel step of the schedule is independent
|
cannam@167
|
99 of every other step, and we can reorder the steps arbitrarily
|
cannam@167
|
100 to achieve any desired order on a particular process.
|
cannam@167
|
101 */
|
cannam@167
|
102
|
cannam@167
|
103 void free_comm_schedule(int **sched, int npes)
|
cannam@167
|
104 {
|
cannam@167
|
105 if (sched) {
|
cannam@167
|
106 int i;
|
cannam@167
|
107
|
cannam@167
|
108 for (i = 0; i < npes; ++i)
|
cannam@167
|
109 free(sched[i]);
|
cannam@167
|
110 free(sched);
|
cannam@167
|
111 }
|
cannam@167
|
112 }
|
cannam@167
|
113
|
cannam@167
|
114 void empty_comm_schedule(int **sched, int npes)
|
cannam@167
|
115 {
|
cannam@167
|
116 int i;
|
cannam@167
|
117 for (i = 0; i < npes; ++i)
|
cannam@167
|
118 sched[i][0] = -1;
|
cannam@167
|
119 }
|
cannam@167
|
120
|
cannam@167
|
121 extern void fill_comm_schedule(int **sched, int npes);
|
cannam@167
|
122
|
cannam@167
|
123 /* Create a new communications schedule for a given number of processors.
|
cannam@167
|
124 The schedule is initialized to a deadlock-free, maximum overlap
|
cannam@167
|
125 schedule. Returns NULL on an error (may print a message to
|
cannam@167
|
126 stderr if there is a program bug detected). */
|
cannam@167
|
127 int **make_comm_schedule(int npes)
|
cannam@167
|
128 {
|
cannam@167
|
129 int **sched;
|
cannam@167
|
130 int i;
|
cannam@167
|
131
|
cannam@167
|
132 sched = (int **) malloc(sizeof(int *) * npes);
|
cannam@167
|
133 if (!sched)
|
cannam@167
|
134 return NULL;
|
cannam@167
|
135
|
cannam@167
|
136 for (i = 0; i < npes; ++i)
|
cannam@167
|
137 sched[i] = NULL;
|
cannam@167
|
138
|
cannam@167
|
139 for (i = 0; i < npes; ++i) {
|
cannam@167
|
140 sched[i] = (int *) malloc(sizeof(int) * 10 * (npes + 1));
|
cannam@167
|
141 if (!sched[i]) {
|
cannam@167
|
142 free_comm_schedule(sched,npes);
|
cannam@167
|
143 return NULL;
|
cannam@167
|
144 }
|
cannam@167
|
145 }
|
cannam@167
|
146
|
cannam@167
|
147 empty_comm_schedule(sched,npes);
|
cannam@167
|
148 fill_comm_schedule(sched,npes);
|
cannam@167
|
149
|
cannam@167
|
150 if (!check_comm_schedule(sched,npes)) {
|
cannam@167
|
151 free_comm_schedule(sched,npes);
|
cannam@167
|
152 return NULL;
|
cannam@167
|
153 }
|
cannam@167
|
154
|
cannam@167
|
155 return sched;
|
cannam@167
|
156 }
|
cannam@167
|
157
|
cannam@167
|
158 static void add_dest_to_comm_schedule(int **sched, int pe, int dest)
|
cannam@167
|
159 {
|
cannam@167
|
160 int i;
|
cannam@167
|
161
|
cannam@167
|
162 for (i = 0; sched[pe][i] != -1; ++i)
|
cannam@167
|
163 ;
|
cannam@167
|
164
|
cannam@167
|
165 sched[pe][i] = dest;
|
cannam@167
|
166 sched[pe][i+1] = -1;
|
cannam@167
|
167 }
|
cannam@167
|
168
|
cannam@167
|
169 static void add_pair_to_comm_schedule(int **sched, int pe1, int pe2)
|
cannam@167
|
170 {
|
cannam@167
|
171 add_dest_to_comm_schedule(sched, pe1, pe2);
|
cannam@167
|
172 if (pe1 != pe2)
|
cannam@167
|
173 add_dest_to_comm_schedule(sched, pe2, pe1);
|
cannam@167
|
174 }
|
cannam@167
|
175
|
cannam@167
|
176 /* Simplification of algorithm presented in [1] (we have fewer
|
cannam@167
|
177 constraints). Produces a perfect schedule (npes steps). */
|
cannam@167
|
178
|
cannam@167
|
179 void fill_comm_schedule(int **sched, int npes)
|
cannam@167
|
180 {
|
cannam@167
|
181 int pe, i, n;
|
cannam@167
|
182
|
cannam@167
|
183 if (npes % 2 == 0) {
|
cannam@167
|
184 n = npes;
|
cannam@167
|
185 for (pe = 0; pe < npes; ++pe)
|
cannam@167
|
186 add_pair_to_comm_schedule(sched,pe,pe);
|
cannam@167
|
187 }
|
cannam@167
|
188 else
|
cannam@167
|
189 n = npes + 1;
|
cannam@167
|
190
|
cannam@167
|
191 for (pe = 0; pe < n - 1; ++pe) {
|
cannam@167
|
192 add_pair_to_comm_schedule(sched, pe, npes % 2 == 0 ? npes - 1 : pe);
|
cannam@167
|
193
|
cannam@167
|
194 for (i = 1; i < n/2; ++i) {
|
cannam@167
|
195 int pe_a, pe_b;
|
cannam@167
|
196
|
cannam@167
|
197 pe_a = pe - i;
|
cannam@167
|
198 if (pe_a < 0)
|
cannam@167
|
199 pe_a += n - 1;
|
cannam@167
|
200
|
cannam@167
|
201 pe_b = (pe + i) % (n - 1);
|
cannam@167
|
202
|
cannam@167
|
203 add_pair_to_comm_schedule(sched,pe_a,pe_b);
|
cannam@167
|
204 }
|
cannam@167
|
205 }
|
cannam@167
|
206 }
|
cannam@167
|
207
|
cannam@167
|
208 /* given an array sched[npes], fills it with the communications
|
cannam@167
|
209 schedule for process pe. */
|
cannam@167
|
210 void fill1_comm_sched(int *sched, int which_pe, int npes)
|
cannam@167
|
211 {
|
cannam@167
|
212 int pe, i, n, s = 0;
|
cannam@167
|
213 if (npes % 2 == 0) {
|
cannam@167
|
214 n = npes;
|
cannam@167
|
215 sched[s++] = which_pe;
|
cannam@167
|
216 }
|
cannam@167
|
217 else
|
cannam@167
|
218 n = npes + 1;
|
cannam@167
|
219 for (pe = 0; pe < n - 1; ++pe) {
|
cannam@167
|
220 if (npes % 2 == 0) {
|
cannam@167
|
221 if (pe == which_pe) sched[s++] = npes - 1;
|
cannam@167
|
222 else if (npes - 1 == which_pe) sched[s++] = pe;
|
cannam@167
|
223 }
|
cannam@167
|
224 else if (pe == which_pe) sched[s++] = pe;
|
cannam@167
|
225
|
cannam@167
|
226 if (pe != which_pe && which_pe < n - 1) {
|
cannam@167
|
227 i = (pe - which_pe + (n - 1)) % (n - 1);
|
cannam@167
|
228 if (i < n/2)
|
cannam@167
|
229 sched[s++] = (pe + i) % (n - 1);
|
cannam@167
|
230
|
cannam@167
|
231 i = (which_pe - pe + (n - 1)) % (n - 1);
|
cannam@167
|
232 if (i < n/2)
|
cannam@167
|
233 sched[s++] = (pe - i + (n - 1)) % (n - 1);
|
cannam@167
|
234 }
|
cannam@167
|
235 }
|
cannam@167
|
236 if (s != npes) {
|
cannam@167
|
237 fprintf(stderr, "bug in fill1_com_schedule (%d, %d/%d)\n",
|
cannam@167
|
238 s, which_pe, npes);
|
cannam@167
|
239 exit(EXIT_FAILURE);
|
cannam@167
|
240 }
|
cannam@167
|
241 }
|
cannam@167
|
242
|
cannam@167
|
243 /* sort the communication schedule sched for npes so that the schedule
|
cannam@167
|
244 on process sortpe is ascending or descending (!ascending). */
|
cannam@167
|
245 static void sort1_comm_sched(int *sched, int npes, int sortpe, int ascending)
|
cannam@167
|
246 {
|
cannam@167
|
247 int *sortsched, i;
|
cannam@167
|
248 sortsched = (int *) malloc(npes * sizeof(int) * 2);
|
cannam@167
|
249 fill1_comm_sched(sortsched, sortpe, npes);
|
cannam@167
|
250 if (ascending)
|
cannam@167
|
251 for (i = 0; i < npes; ++i)
|
cannam@167
|
252 sortsched[npes + sortsched[i]] = sched[i];
|
cannam@167
|
253 else
|
cannam@167
|
254 for (i = 0; i < npes; ++i)
|
cannam@167
|
255 sortsched[2*npes - 1 - sortsched[i]] = sched[i];
|
cannam@167
|
256 for (i = 0; i < npes; ++i)
|
cannam@167
|
257 sched[i] = sortsched[npes + i];
|
cannam@167
|
258 free(sortsched);
|
cannam@167
|
259 }
|
cannam@167
|
260
|
cannam@167
|
261 /* Below, we have various checks in case of bugs: */
|
cannam@167
|
262
|
cannam@167
|
263 /* check for deadlocks by simulating the schedule and looking for
|
cannam@167
|
264 cycles in the dependency list; returns 0 if there are deadlocks
|
cannam@167
|
265 (or other errors) */
|
cannam@167
|
266 static int check_schedule_deadlock(int **sched, int npes)
|
cannam@167
|
267 {
|
cannam@167
|
268 int *step, *depend, *visited, pe, pe2, period, done = 0;
|
cannam@167
|
269 int counter = 0;
|
cannam@167
|
270
|
cannam@167
|
271 /* step[pe] is the step in the schedule that a given pe is on */
|
cannam@167
|
272 step = (int *) malloc(sizeof(int) * npes);
|
cannam@167
|
273
|
cannam@167
|
274 /* depend[pe] is the pe' that pe is currently waiting for a message
|
cannam@167
|
275 from (-1 if none) */
|
cannam@167
|
276 depend = (int *) malloc(sizeof(int) * npes);
|
cannam@167
|
277
|
cannam@167
|
278 /* visited[pe] tells whether we have visited the current pe already
|
cannam@167
|
279 when we are looking for cycles. */
|
cannam@167
|
280 visited = (int *) malloc(sizeof(int) * npes);
|
cannam@167
|
281
|
cannam@167
|
282 if (!step || !depend || !visited) {
|
cannam@167
|
283 free(step); free(depend); free(visited);
|
cannam@167
|
284 return 0;
|
cannam@167
|
285 }
|
cannam@167
|
286
|
cannam@167
|
287 for (pe = 0; pe < npes; ++pe)
|
cannam@167
|
288 step[pe] = 0;
|
cannam@167
|
289
|
cannam@167
|
290 while (!done) {
|
cannam@167
|
291 ++counter;
|
cannam@167
|
292
|
cannam@167
|
293 for (pe = 0; pe < npes; ++pe)
|
cannam@167
|
294 depend[pe] = sched[pe][step[pe]];
|
cannam@167
|
295
|
cannam@167
|
296 /* now look for cycles in the dependencies with period > 2: */
|
cannam@167
|
297 for (pe = 0; pe < npes; ++pe)
|
cannam@167
|
298 if (depend[pe] != -1) {
|
cannam@167
|
299 for (pe2 = 0; pe2 < npes; ++pe2)
|
cannam@167
|
300 visited[pe2] = 0;
|
cannam@167
|
301
|
cannam@167
|
302 period = 0;
|
cannam@167
|
303 pe2 = pe;
|
cannam@167
|
304 do {
|
cannam@167
|
305 visited[pe2] = period + 1;
|
cannam@167
|
306 pe2 = depend[pe2];
|
cannam@167
|
307 period++;
|
cannam@167
|
308 } while (pe2 != -1 && !visited[pe2]);
|
cannam@167
|
309
|
cannam@167
|
310 if (pe2 == -1) {
|
cannam@167
|
311 fprintf(stderr,
|
cannam@167
|
312 "BUG: unterminated cycle in schedule!\n");
|
cannam@167
|
313 free(step); free(depend);
|
cannam@167
|
314 free(visited);
|
cannam@167
|
315 return 0;
|
cannam@167
|
316 }
|
cannam@167
|
317 if (period - (visited[pe2] - 1) > 2) {
|
cannam@167
|
318 fprintf(stderr,"BUG: deadlock in schedule!\n");
|
cannam@167
|
319 free(step); free(depend);
|
cannam@167
|
320 free(visited);
|
cannam@167
|
321 return 0;
|
cannam@167
|
322 }
|
cannam@167
|
323
|
cannam@167
|
324 if (pe2 == pe)
|
cannam@167
|
325 step[pe]++;
|
cannam@167
|
326 }
|
cannam@167
|
327
|
cannam@167
|
328 done = 1;
|
cannam@167
|
329 for (pe = 0; pe < npes; ++pe)
|
cannam@167
|
330 if (sched[pe][step[pe]] != -1) {
|
cannam@167
|
331 done = 0;
|
cannam@167
|
332 break;
|
cannam@167
|
333 }
|
cannam@167
|
334 }
|
cannam@167
|
335
|
cannam@167
|
336 free(step); free(depend); free(visited);
|
cannam@167
|
337 return (counter > 0 ? counter : 1);
|
cannam@167
|
338 }
|
cannam@167
|
339
|
cannam@167
|
340 /* sanity checks; prints message and returns 0 on failure.
|
cannam@167
|
341 undocumented feature: the return value on success is actually the
|
cannam@167
|
342 number of steps required for the schedule to complete, counting
|
cannam@167
|
343 stalls. */
|
cannam@167
|
344 int check_comm_schedule(int **sched, int npes)
|
cannam@167
|
345 {
|
cannam@167
|
346 int pe, i, comm_pe;
|
cannam@167
|
347
|
cannam@167
|
348 for (pe = 0; pe < npes; ++pe) {
|
cannam@167
|
349 for (comm_pe = 0; comm_pe < npes; ++comm_pe) {
|
cannam@167
|
350 for (i = 0; sched[pe][i] != -1 && sched[pe][i] != comm_pe; ++i)
|
cannam@167
|
351 ;
|
cannam@167
|
352 if (sched[pe][i] == -1) {
|
cannam@167
|
353 fprintf(stderr,"BUG: schedule never sends message from "
|
cannam@167
|
354 "%d to %d.\n",pe,comm_pe);
|
cannam@167
|
355 return 0; /* never send message to comm_pe */
|
cannam@167
|
356 }
|
cannam@167
|
357 }
|
cannam@167
|
358 for (i = 0; sched[pe][i] != -1; ++i)
|
cannam@167
|
359 ;
|
cannam@167
|
360 if (i != npes) {
|
cannam@167
|
361 fprintf(stderr,"BUG: schedule sends too many messages from "
|
cannam@167
|
362 "%d\n",pe);
|
cannam@167
|
363 return 0;
|
cannam@167
|
364 }
|
cannam@167
|
365 }
|
cannam@167
|
366 return check_schedule_deadlock(sched,npes);
|
cannam@167
|
367 }
|
cannam@167
|
368
|
cannam@167
|
369 /* invert the order of all the schedules; this has no effect on
|
cannam@167
|
370 its required properties. */
|
cannam@167
|
371 void invert_comm_schedule(int **sched, int npes)
|
cannam@167
|
372 {
|
cannam@167
|
373 int pe, i;
|
cannam@167
|
374
|
cannam@167
|
375 for (pe = 0; pe < npes; ++pe)
|
cannam@167
|
376 for (i = 0; i < npes/2; ++i) {
|
cannam@167
|
377 int dummy = sched[pe][i];
|
cannam@167
|
378 sched[pe][i] = sched[pe][npes-1-i];
|
cannam@167
|
379 sched[pe][npes-1-i] = dummy;
|
cannam@167
|
380 }
|
cannam@167
|
381 }
|
cannam@167
|
382
|
cannam@167
|
383 /* Sort the schedule for sort_pe in ascending order of processor
|
cannam@167
|
384 index. Unfortunately, for odd npes (when schedule has a stall
|
cannam@167
|
385 to begin with) this will introduce an extra stall due to
|
cannam@167
|
386 the motion of the self-communication past a stall. We could
|
cannam@167
|
387 fix this if it were really important. Actually, we don't
|
cannam@167
|
388 get an extra stall when sort_pe == 0 or npes-1, which is sufficient
|
cannam@167
|
389 for our purposes. */
|
cannam@167
|
390 void sort_comm_schedule(int **sched, int npes, int sort_pe)
|
cannam@167
|
391 {
|
cannam@167
|
392 int i,j,pe;
|
cannam@167
|
393
|
cannam@167
|
394 /* Note that we can do this sort in O(npes) swaps because we know
|
cannam@167
|
395 that the numbers we are sorting are just 0...npes-1. But we'll
|
cannam@167
|
396 just do a bubble sort for simplicity here. */
|
cannam@167
|
397
|
cannam@167
|
398 for (i = 0; i < npes - 1; ++i)
|
cannam@167
|
399 for (j = i + 1; j < npes; ++j)
|
cannam@167
|
400 if (sched[sort_pe][i] > sched[sort_pe][j]) {
|
cannam@167
|
401 for (pe = 0; pe < npes; ++pe) {
|
cannam@167
|
402 int s = sched[pe][i];
|
cannam@167
|
403 sched[pe][i] = sched[pe][j];
|
cannam@167
|
404 sched[pe][j] = s;
|
cannam@167
|
405 }
|
cannam@167
|
406 }
|
cannam@167
|
407 }
|
cannam@167
|
408
|
cannam@167
|
409 /* print the schedule (for debugging purposes) */
|
cannam@167
|
410 void print_comm_schedule(int **sched, int npes)
|
cannam@167
|
411 {
|
cannam@167
|
412 int pe, i, width;
|
cannam@167
|
413
|
cannam@167
|
414 if (npes < 10)
|
cannam@167
|
415 width = 1;
|
cannam@167
|
416 else if (npes < 100)
|
cannam@167
|
417 width = 2;
|
cannam@167
|
418 else
|
cannam@167
|
419 width = 3;
|
cannam@167
|
420
|
cannam@167
|
421 for (pe = 0; pe < npes; ++pe) {
|
cannam@167
|
422 printf("pe %*d schedule:", width, pe);
|
cannam@167
|
423 for (i = 0; sched[pe][i] != -1; ++i)
|
cannam@167
|
424 printf(" %*d",width,sched[pe][i]);
|
cannam@167
|
425 printf("\n");
|
cannam@167
|
426 }
|
cannam@167
|
427 }
|
cannam@167
|
428
|
cannam@167
|
429 int main(int argc, char **argv)
|
cannam@167
|
430 {
|
cannam@167
|
431 int **sched;
|
cannam@167
|
432 int npes = -1, sortpe = -1, steps, i;
|
cannam@167
|
433
|
cannam@167
|
434 if (argc >= 2) {
|
cannam@167
|
435 npes = atoi(argv[1]);
|
cannam@167
|
436 if (npes <= 0) {
|
cannam@167
|
437 fprintf(stderr,"npes must be positive!");
|
cannam@167
|
438 return 1;
|
cannam@167
|
439 }
|
cannam@167
|
440 }
|
cannam@167
|
441 if (argc >= 3) {
|
cannam@167
|
442 sortpe = atoi(argv[2]);
|
cannam@167
|
443 if (sortpe < 0 || sortpe >= npes) {
|
cannam@167
|
444 fprintf(stderr,"sortpe must be between 0 and npes-1.\n");
|
cannam@167
|
445 return 1;
|
cannam@167
|
446 }
|
cannam@167
|
447 }
|
cannam@167
|
448
|
cannam@167
|
449 if (npes != -1) {
|
cannam@167
|
450 printf("Computing schedule for npes = %d:\n",npes);
|
cannam@167
|
451 sched = make_comm_schedule(npes);
|
cannam@167
|
452 if (!sched) {
|
cannam@167
|
453 fprintf(stderr,"Out of memory!");
|
cannam@167
|
454 return 6;
|
cannam@167
|
455 }
|
cannam@167
|
456
|
cannam@167
|
457 if (steps = check_comm_schedule(sched,npes))
|
cannam@167
|
458 printf("schedule OK (takes %d steps to complete).\n", steps);
|
cannam@167
|
459 else
|
cannam@167
|
460 printf("schedule not OK.\n");
|
cannam@167
|
461
|
cannam@167
|
462 print_comm_schedule(sched, npes);
|
cannam@167
|
463
|
cannam@167
|
464 if (sortpe != -1) {
|
cannam@167
|
465 printf("\nRe-creating schedule for pe = %d...\n", sortpe);
|
cannam@167
|
466 int *sched1 = (int*) malloc(sizeof(int) * npes);
|
cannam@167
|
467 for (i = 0; i < npes; ++i) sched1[i] = -1;
|
cannam@167
|
468 fill1_comm_sched(sched1, sortpe, npes);
|
cannam@167
|
469 printf(" =");
|
cannam@167
|
470 for (i = 0; i < npes; ++i)
|
cannam@167
|
471 printf(" %*d", npes < 10 ? 1 : (npes < 100 ? 2 : 3),
|
cannam@167
|
472 sched1[i]);
|
cannam@167
|
473 printf("\n");
|
cannam@167
|
474
|
cannam@167
|
475 printf("\nSorting schedule for sortpe = %d...\n", sortpe);
|
cannam@167
|
476 sort_comm_schedule(sched,npes,sortpe);
|
cannam@167
|
477
|
cannam@167
|
478 if (steps = check_comm_schedule(sched,npes))
|
cannam@167
|
479 printf("schedule OK (takes %d steps to complete).\n",
|
cannam@167
|
480 steps);
|
cannam@167
|
481 else
|
cannam@167
|
482 printf("schedule not OK.\n");
|
cannam@167
|
483
|
cannam@167
|
484 print_comm_schedule(sched, npes);
|
cannam@167
|
485
|
cannam@167
|
486 printf("\nInverting schedule...\n");
|
cannam@167
|
487 invert_comm_schedule(sched,npes);
|
cannam@167
|
488
|
cannam@167
|
489 if (steps = check_comm_schedule(sched,npes))
|
cannam@167
|
490 printf("schedule OK (takes %d steps to complete).\n",
|
cannam@167
|
491 steps);
|
cannam@167
|
492 else
|
cannam@167
|
493 printf("schedule not OK.\n");
|
cannam@167
|
494
|
cannam@167
|
495 print_comm_schedule(sched, npes);
|
cannam@167
|
496
|
cannam@167
|
497 free_comm_schedule(sched,npes);
|
cannam@167
|
498
|
cannam@167
|
499 free(sched1);
|
cannam@167
|
500 }
|
cannam@167
|
501 }
|
cannam@167
|
502 else {
|
cannam@167
|
503 printf("Doing infinite tests...\n");
|
cannam@167
|
504 for (npes = 1; ; ++npes) {
|
cannam@167
|
505 int *sched1 = (int*) malloc(sizeof(int) * npes);
|
cannam@167
|
506 printf("npes = %d...",npes);
|
cannam@167
|
507 sched = make_comm_schedule(npes);
|
cannam@167
|
508 if (!sched) {
|
cannam@167
|
509 fprintf(stderr,"Out of memory!\n");
|
cannam@167
|
510 return 5;
|
cannam@167
|
511 }
|
cannam@167
|
512 for (sortpe = 0; sortpe < npes; ++sortpe) {
|
cannam@167
|
513 empty_comm_schedule(sched,npes);
|
cannam@167
|
514 fill_comm_schedule(sched,npes);
|
cannam@167
|
515 if (!check_comm_schedule(sched,npes)) {
|
cannam@167
|
516 fprintf(stderr,
|
cannam@167
|
517 "\n -- fill error for sortpe = %d!\n",sortpe);
|
cannam@167
|
518 return 2;
|
cannam@167
|
519 }
|
cannam@167
|
520
|
cannam@167
|
521 for (i = 0; i < npes; ++i) sched1[i] = -1;
|
cannam@167
|
522 fill1_comm_sched(sched1, sortpe, npes);
|
cannam@167
|
523 for (i = 0; i < npes; ++i)
|
cannam@167
|
524 if (sched1[i] != sched[sortpe][i])
|
cannam@167
|
525 fprintf(stderr,
|
cannam@167
|
526 "\n -- fill1 error for pe = %d!\n",
|
cannam@167
|
527 sortpe);
|
cannam@167
|
528
|
cannam@167
|
529 sort_comm_schedule(sched,npes,sortpe);
|
cannam@167
|
530 if (!check_comm_schedule(sched,npes)) {
|
cannam@167
|
531 fprintf(stderr,
|
cannam@167
|
532 "\n -- sort error for sortpe = %d!\n",sortpe);
|
cannam@167
|
533 return 3;
|
cannam@167
|
534 }
|
cannam@167
|
535 invert_comm_schedule(sched,npes);
|
cannam@167
|
536 if (!check_comm_schedule(sched,npes)) {
|
cannam@167
|
537 fprintf(stderr,
|
cannam@167
|
538 "\n -- invert error for sortpe = %d!\n",
|
cannam@167
|
539 sortpe);
|
cannam@167
|
540 return 4;
|
cannam@167
|
541 }
|
cannam@167
|
542 }
|
cannam@167
|
543 free_comm_schedule(sched,npes);
|
cannam@167
|
544 printf("OK\n");
|
cannam@167
|
545 if (npes % 50 == 0)
|
cannam@167
|
546 printf("(...Hit Ctrl-C to stop...)\n");
|
cannam@167
|
547 free(sched1);
|
cannam@167
|
548 }
|
cannam@167
|
549 }
|
cannam@167
|
550
|
cannam@167
|
551 return 0;
|
cannam@167
|
552 }
|