annotate src/fftw-3.3.5/mpi/testsched.c @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents 7867fa7e1b6b
children
rev   line source
cannam@127 1 /*
cannam@127 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
cannam@127 3 * Copyright (c) 1999-2003, 2007-8 Massachusetts Institute of Technology
cannam@127 4 *
cannam@127 5 * This program is free software; you can redistribute it and/or modify
cannam@127 6 * it under the terms of the GNU General Public License as published by
cannam@127 7 * the Free Software Foundation; either version 2 of the License, or
cannam@127 8 * (at your option) any later version.
cannam@127 9 *
cannam@127 10 * This program is distributed in the hope that it will be useful,
cannam@127 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cannam@127 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cannam@127 13 * GNU General Public License for more details.
cannam@127 14 *
cannam@127 15 * You should have received a copy of the GNU General Public License
cannam@127 16 * along with this program; if not, write to the Free Software
cannam@127 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cannam@127 18 *
cannam@127 19 */
cannam@127 20
cannam@127 21 /**********************************************************************/
cannam@127 22 /* This is a modified and combined version of the sched.c and
cannam@127 23 test_sched.c files shipped with FFTW 2, written to implement and
cannam@127 24 test various all-to-all communications scheduling patterns.
cannam@127 25
cannam@127 26 It is not used in FFTW 3, but I keep it around in case we ever want
cannam@127 27 to play with this again or to change algorithms. In particular, I
cannam@127 28 used it to implement and test the fill1_comm_sched routine in
cannam@127 29 transpose-pairwise.c, which allows us to create a schedule for one
cannam@127 30 process at a time and is much more compact than the FFTW 2 code.
cannam@127 31
cannam@127 32 Note that the scheduling algorithm is somewhat modified from that
cannam@127 33 of FFTW 2. Originally, I thought that one "stall" in the schedule
cannam@127 34 was unavoidable for odd numbers of processes, since this is the
cannam@127 35 case for the soccer-timetabling problem. However, because of the
cannam@127 36 self-communication step, we can use the self-communication to fill
cannam@127 37 in the stalls. (Thanks to Ralf Wildenhues for pointing this out.)
cannam@127 38 This greatly simplifies the process re-sorting algorithm. */
cannam@127 39
cannam@127 40 /**********************************************************************/
cannam@127 41
cannam@127 42 #include <stdio.h>
cannam@127 43 #include <stdlib.h>
cannam@127 44
cannam@127 45 /* This file contains routines to compute communications schedules for
cannam@127 46 all-to-all communications (complete exchanges) that are performed
cannam@127 47 in-place. (That is, the block that processor x sends to processor
cannam@127 48 y gets replaced on processor x by a block received from processor y.)
cannam@127 49
cannam@127 50 A schedule, int **sched, is a two-dimensional array where
cannam@127 51 sched[pe][i] is the processor that pe expects to exchange a message
cannam@127 52 with on the i-th step of the exchange. sched[pe][i] == -1 for the
cannam@127 53 i after the last exchange scheduled on pe.
cannam@127 54
cannam@127 55 Here, processors (pe's, for processing elements), are numbered from
cannam@127 56 0 to npes-1.
cannam@127 57
cannam@127 58 There are a couple of constraints that a schedule should satisfy
cannam@127 59 (besides the obvious one that every processor has to communicate
cannam@127 60 with every other processor exactly once).
cannam@127 61
cannam@127 62 * First, and most importantly, there must be no deadlocks.
cannam@127 63
cannam@127 64 * Second, we would like to overlap communications as much as possible,
cannam@127 65 so that all exchanges occur in parallel. It turns out that perfect
cannam@127 66 overlap is possible for all number of processes (npes).
cannam@127 67
cannam@127 68 It turns out that this scheduling problem is actually well-studied,
cannam@127 69 and good solutions are known. The problem is known as a
cannam@127 70 "time-tabling" problem, and is specifically the problem of
cannam@127 71 scheduling a sports competition (where n teams must compete exactly
cannam@127 72 once with every other team). The problem is discussed and
cannam@127 73 algorithms are presented in:
cannam@127 74
cannam@127 75 [1] J. A. M. Schreuder, "Constructing Timetables for Sport
cannam@127 76 Competitions," Mathematical Programming Study 13, pp. 58-67 (1980).
cannam@127 77
cannam@127 78 [2] A. Schaerf, "Scheduling Sport Tournaments using Constraint
cannam@127 79 Logic Programming," Proc. of 12th Europ. Conf. on
cannam@127 80 Artif. Intell. (ECAI-96), pp. 634-639 (Budapest 1996).
cannam@127 81 http://hermes.dis.uniromal.it/~aschaerf/publications.html
cannam@127 82
cannam@127 83 (These people actually impose a lot of additional constraints that
cannam@127 84 we don't care about, so they are solving harder problems. [1] gives
cannam@127 85 a simple enough algorithm for our purposes, though.)
cannam@127 86
cannam@127 87 In the timetabling problem, N teams can all play one another in N-1
cannam@127 88 steps if N is even, and N steps if N is odd. Here, however,
cannam@127 89 there is a "self-communication" step (a team must also "play itself")
cannam@127 90 and so we can always make an optimal N-step schedule regardless of N.
cannam@127 91
cannam@127 92 However, we have to do more: for a particular processor, the
cannam@127 93 communications schedule must be sorted in ascending or descending
cannam@127 94 order of processor index. (This is necessary so that the data
cannam@127 95 coming in for the transpose does not overwrite data that will be
cannam@127 96 sent later; for that processor the incoming and outgoing blocks are
cannam@127 97 of different non-zero sizes.) Fortunately, because the schedule
cannam@127 98 is stall free, each parallel step of the schedule is independent
cannam@127 99 of every other step, and we can reorder the steps arbitrarily
cannam@127 100 to achieve any desired order on a particular process.
cannam@127 101 */
cannam@127 102
cannam@127 103 void free_comm_schedule(int **sched, int npes)
cannam@127 104 {
cannam@127 105 if (sched) {
cannam@127 106 int i;
cannam@127 107
cannam@127 108 for (i = 0; i < npes; ++i)
cannam@127 109 free(sched[i]);
cannam@127 110 free(sched);
cannam@127 111 }
cannam@127 112 }
cannam@127 113
cannam@127 114 void empty_comm_schedule(int **sched, int npes)
cannam@127 115 {
cannam@127 116 int i;
cannam@127 117 for (i = 0; i < npes; ++i)
cannam@127 118 sched[i][0] = -1;
cannam@127 119 }
cannam@127 120
cannam@127 121 extern void fill_comm_schedule(int **sched, int npes);
cannam@127 122
cannam@127 123 /* Create a new communications schedule for a given number of processors.
cannam@127 124 The schedule is initialized to a deadlock-free, maximum overlap
cannam@127 125 schedule. Returns NULL on an error (may print a message to
cannam@127 126 stderr if there is a program bug detected). */
cannam@127 127 int **make_comm_schedule(int npes)
cannam@127 128 {
cannam@127 129 int **sched;
cannam@127 130 int i;
cannam@127 131
cannam@127 132 sched = (int **) malloc(sizeof(int *) * npes);
cannam@127 133 if (!sched)
cannam@127 134 return NULL;
cannam@127 135
cannam@127 136 for (i = 0; i < npes; ++i)
cannam@127 137 sched[i] = NULL;
cannam@127 138
cannam@127 139 for (i = 0; i < npes; ++i) {
cannam@127 140 sched[i] = (int *) malloc(sizeof(int) * 10 * (npes + 1));
cannam@127 141 if (!sched[i]) {
cannam@127 142 free_comm_schedule(sched,npes);
cannam@127 143 return NULL;
cannam@127 144 }
cannam@127 145 }
cannam@127 146
cannam@127 147 empty_comm_schedule(sched,npes);
cannam@127 148 fill_comm_schedule(sched,npes);
cannam@127 149
cannam@127 150 if (!check_comm_schedule(sched,npes)) {
cannam@127 151 free_comm_schedule(sched,npes);
cannam@127 152 return NULL;
cannam@127 153 }
cannam@127 154
cannam@127 155 return sched;
cannam@127 156 }
cannam@127 157
cannam@127 158 static void add_dest_to_comm_schedule(int **sched, int pe, int dest)
cannam@127 159 {
cannam@127 160 int i;
cannam@127 161
cannam@127 162 for (i = 0; sched[pe][i] != -1; ++i)
cannam@127 163 ;
cannam@127 164
cannam@127 165 sched[pe][i] = dest;
cannam@127 166 sched[pe][i+1] = -1;
cannam@127 167 }
cannam@127 168
cannam@127 169 static void add_pair_to_comm_schedule(int **sched, int pe1, int pe2)
cannam@127 170 {
cannam@127 171 add_dest_to_comm_schedule(sched, pe1, pe2);
cannam@127 172 if (pe1 != pe2)
cannam@127 173 add_dest_to_comm_schedule(sched, pe2, pe1);
cannam@127 174 }
cannam@127 175
cannam@127 176 /* Simplification of algorithm presented in [1] (we have fewer
cannam@127 177 constraints). Produces a perfect schedule (npes steps). */
cannam@127 178
cannam@127 179 void fill_comm_schedule(int **sched, int npes)
cannam@127 180 {
cannam@127 181 int pe, i, n;
cannam@127 182
cannam@127 183 if (npes % 2 == 0) {
cannam@127 184 n = npes;
cannam@127 185 for (pe = 0; pe < npes; ++pe)
cannam@127 186 add_pair_to_comm_schedule(sched,pe,pe);
cannam@127 187 }
cannam@127 188 else
cannam@127 189 n = npes + 1;
cannam@127 190
cannam@127 191 for (pe = 0; pe < n - 1; ++pe) {
cannam@127 192 add_pair_to_comm_schedule(sched, pe, npes % 2 == 0 ? npes - 1 : pe);
cannam@127 193
cannam@127 194 for (i = 1; i < n/2; ++i) {
cannam@127 195 int pe_a, pe_b;
cannam@127 196
cannam@127 197 pe_a = pe - i;
cannam@127 198 if (pe_a < 0)
cannam@127 199 pe_a += n - 1;
cannam@127 200
cannam@127 201 pe_b = (pe + i) % (n - 1);
cannam@127 202
cannam@127 203 add_pair_to_comm_schedule(sched,pe_a,pe_b);
cannam@127 204 }
cannam@127 205 }
cannam@127 206 }
cannam@127 207
cannam@127 208 /* given an array sched[npes], fills it with the communications
cannam@127 209 schedule for process pe. */
cannam@127 210 void fill1_comm_sched(int *sched, int which_pe, int npes)
cannam@127 211 {
cannam@127 212 int pe, i, n, s = 0;
cannam@127 213 if (npes % 2 == 0) {
cannam@127 214 n = npes;
cannam@127 215 sched[s++] = which_pe;
cannam@127 216 }
cannam@127 217 else
cannam@127 218 n = npes + 1;
cannam@127 219 for (pe = 0; pe < n - 1; ++pe) {
cannam@127 220 if (npes % 2 == 0) {
cannam@127 221 if (pe == which_pe) sched[s++] = npes - 1;
cannam@127 222 else if (npes - 1 == which_pe) sched[s++] = pe;
cannam@127 223 }
cannam@127 224 else if (pe == which_pe) sched[s++] = pe;
cannam@127 225
cannam@127 226 if (pe != which_pe && which_pe < n - 1) {
cannam@127 227 i = (pe - which_pe + (n - 1)) % (n - 1);
cannam@127 228 if (i < n/2)
cannam@127 229 sched[s++] = (pe + i) % (n - 1);
cannam@127 230
cannam@127 231 i = (which_pe - pe + (n - 1)) % (n - 1);
cannam@127 232 if (i < n/2)
cannam@127 233 sched[s++] = (pe - i + (n - 1)) % (n - 1);
cannam@127 234 }
cannam@127 235 }
cannam@127 236 if (s != npes) {
cannam@127 237 fprintf(stderr, "bug in fill1_com_schedule (%d, %d/%d)\n",
cannam@127 238 s, which_pe, npes);
cannam@127 239 exit(EXIT_FAILURE);
cannam@127 240 }
cannam@127 241 }
cannam@127 242
cannam@127 243 /* sort the communication schedule sched for npes so that the schedule
cannam@127 244 on process sortpe is ascending or descending (!ascending). */
cannam@127 245 static void sort1_comm_sched(int *sched, int npes, int sortpe, int ascending)
cannam@127 246 {
cannam@127 247 int *sortsched, i;
cannam@127 248 sortsched = (int *) malloc(npes * sizeof(int) * 2);
cannam@127 249 fill1_comm_sched(sortsched, sortpe, npes);
cannam@127 250 if (ascending)
cannam@127 251 for (i = 0; i < npes; ++i)
cannam@127 252 sortsched[npes + sortsched[i]] = sched[i];
cannam@127 253 else
cannam@127 254 for (i = 0; i < npes; ++i)
cannam@127 255 sortsched[2*npes - 1 - sortsched[i]] = sched[i];
cannam@127 256 for (i = 0; i < npes; ++i)
cannam@127 257 sched[i] = sortsched[npes + i];
cannam@127 258 free(sortsched);
cannam@127 259 }
cannam@127 260
cannam@127 261 /* Below, we have various checks in case of bugs: */
cannam@127 262
cannam@127 263 /* check for deadlocks by simulating the schedule and looking for
cannam@127 264 cycles in the dependency list; returns 0 if there are deadlocks
cannam@127 265 (or other errors) */
cannam@127 266 static int check_schedule_deadlock(int **sched, int npes)
cannam@127 267 {
cannam@127 268 int *step, *depend, *visited, pe, pe2, period, done = 0;
cannam@127 269 int counter = 0;
cannam@127 270
cannam@127 271 /* step[pe] is the step in the schedule that a given pe is on */
cannam@127 272 step = (int *) malloc(sizeof(int) * npes);
cannam@127 273
cannam@127 274 /* depend[pe] is the pe' that pe is currently waiting for a message
cannam@127 275 from (-1 if none) */
cannam@127 276 depend = (int *) malloc(sizeof(int) * npes);
cannam@127 277
cannam@127 278 /* visited[pe] tells whether we have visited the current pe already
cannam@127 279 when we are looking for cycles. */
cannam@127 280 visited = (int *) malloc(sizeof(int) * npes);
cannam@127 281
cannam@127 282 if (!step || !depend || !visited) {
cannam@127 283 free(step); free(depend); free(visited);
cannam@127 284 return 0;
cannam@127 285 }
cannam@127 286
cannam@127 287 for (pe = 0; pe < npes; ++pe)
cannam@127 288 step[pe] = 0;
cannam@127 289
cannam@127 290 while (!done) {
cannam@127 291 ++counter;
cannam@127 292
cannam@127 293 for (pe = 0; pe < npes; ++pe)
cannam@127 294 depend[pe] = sched[pe][step[pe]];
cannam@127 295
cannam@127 296 /* now look for cycles in the dependencies with period > 2: */
cannam@127 297 for (pe = 0; pe < npes; ++pe)
cannam@127 298 if (depend[pe] != -1) {
cannam@127 299 for (pe2 = 0; pe2 < npes; ++pe2)
cannam@127 300 visited[pe2] = 0;
cannam@127 301
cannam@127 302 period = 0;
cannam@127 303 pe2 = pe;
cannam@127 304 do {
cannam@127 305 visited[pe2] = period + 1;
cannam@127 306 pe2 = depend[pe2];
cannam@127 307 period++;
cannam@127 308 } while (pe2 != -1 && !visited[pe2]);
cannam@127 309
cannam@127 310 if (pe2 == -1) {
cannam@127 311 fprintf(stderr,
cannam@127 312 "BUG: unterminated cycle in schedule!\n");
cannam@127 313 free(step); free(depend);
cannam@127 314 free(visited);
cannam@127 315 return 0;
cannam@127 316 }
cannam@127 317 if (period - (visited[pe2] - 1) > 2) {
cannam@127 318 fprintf(stderr,"BUG: deadlock in schedule!\n");
cannam@127 319 free(step); free(depend);
cannam@127 320 free(visited);
cannam@127 321 return 0;
cannam@127 322 }
cannam@127 323
cannam@127 324 if (pe2 == pe)
cannam@127 325 step[pe]++;
cannam@127 326 }
cannam@127 327
cannam@127 328 done = 1;
cannam@127 329 for (pe = 0; pe < npes; ++pe)
cannam@127 330 if (sched[pe][step[pe]] != -1) {
cannam@127 331 done = 0;
cannam@127 332 break;
cannam@127 333 }
cannam@127 334 }
cannam@127 335
cannam@127 336 free(step); free(depend); free(visited);
cannam@127 337 return (counter > 0 ? counter : 1);
cannam@127 338 }
cannam@127 339
cannam@127 340 /* sanity checks; prints message and returns 0 on failure.
cannam@127 341 undocumented feature: the return value on success is actually the
cannam@127 342 number of steps required for the schedule to complete, counting
cannam@127 343 stalls. */
cannam@127 344 int check_comm_schedule(int **sched, int npes)
cannam@127 345 {
cannam@127 346 int pe, i, comm_pe;
cannam@127 347
cannam@127 348 for (pe = 0; pe < npes; ++pe) {
cannam@127 349 for (comm_pe = 0; comm_pe < npes; ++comm_pe) {
cannam@127 350 for (i = 0; sched[pe][i] != -1 && sched[pe][i] != comm_pe; ++i)
cannam@127 351 ;
cannam@127 352 if (sched[pe][i] == -1) {
cannam@127 353 fprintf(stderr,"BUG: schedule never sends message from "
cannam@127 354 "%d to %d.\n",pe,comm_pe);
cannam@127 355 return 0; /* never send message to comm_pe */
cannam@127 356 }
cannam@127 357 }
cannam@127 358 for (i = 0; sched[pe][i] != -1; ++i)
cannam@127 359 ;
cannam@127 360 if (i != npes) {
cannam@127 361 fprintf(stderr,"BUG: schedule sends too many messages from "
cannam@127 362 "%d\n",pe);
cannam@127 363 return 0;
cannam@127 364 }
cannam@127 365 }
cannam@127 366 return check_schedule_deadlock(sched,npes);
cannam@127 367 }
cannam@127 368
cannam@127 369 /* invert the order of all the schedules; this has no effect on
cannam@127 370 its required properties. */
cannam@127 371 void invert_comm_schedule(int **sched, int npes)
cannam@127 372 {
cannam@127 373 int pe, i;
cannam@127 374
cannam@127 375 for (pe = 0; pe < npes; ++pe)
cannam@127 376 for (i = 0; i < npes/2; ++i) {
cannam@127 377 int dummy = sched[pe][i];
cannam@127 378 sched[pe][i] = sched[pe][npes-1-i];
cannam@127 379 sched[pe][npes-1-i] = dummy;
cannam@127 380 }
cannam@127 381 }
cannam@127 382
cannam@127 383 /* Sort the schedule for sort_pe in ascending order of processor
cannam@127 384 index. Unfortunately, for odd npes (when schedule has a stall
cannam@127 385 to begin with) this will introduce an extra stall due to
cannam@127 386 the motion of the self-communication past a stall. We could
cannam@127 387 fix this if it were really important. Actually, we don't
cannam@127 388 get an extra stall when sort_pe == 0 or npes-1, which is sufficient
cannam@127 389 for our purposes. */
cannam@127 390 void sort_comm_schedule(int **sched, int npes, int sort_pe)
cannam@127 391 {
cannam@127 392 int i,j,pe;
cannam@127 393
cannam@127 394 /* Note that we can do this sort in O(npes) swaps because we know
cannam@127 395 that the numbers we are sorting are just 0...npes-1. But we'll
cannam@127 396 just do a bubble sort for simplicity here. */
cannam@127 397
cannam@127 398 for (i = 0; i < npes - 1; ++i)
cannam@127 399 for (j = i + 1; j < npes; ++j)
cannam@127 400 if (sched[sort_pe][i] > sched[sort_pe][j]) {
cannam@127 401 for (pe = 0; pe < npes; ++pe) {
cannam@127 402 int s = sched[pe][i];
cannam@127 403 sched[pe][i] = sched[pe][j];
cannam@127 404 sched[pe][j] = s;
cannam@127 405 }
cannam@127 406 }
cannam@127 407 }
cannam@127 408
cannam@127 409 /* print the schedule (for debugging purposes) */
cannam@127 410 void print_comm_schedule(int **sched, int npes)
cannam@127 411 {
cannam@127 412 int pe, i, width;
cannam@127 413
cannam@127 414 if (npes < 10)
cannam@127 415 width = 1;
cannam@127 416 else if (npes < 100)
cannam@127 417 width = 2;
cannam@127 418 else
cannam@127 419 width = 3;
cannam@127 420
cannam@127 421 for (pe = 0; pe < npes; ++pe) {
cannam@127 422 printf("pe %*d schedule:", width, pe);
cannam@127 423 for (i = 0; sched[pe][i] != -1; ++i)
cannam@127 424 printf(" %*d",width,sched[pe][i]);
cannam@127 425 printf("\n");
cannam@127 426 }
cannam@127 427 }
cannam@127 428
cannam@127 429 int main(int argc, char **argv)
cannam@127 430 {
cannam@127 431 int **sched;
cannam@127 432 int npes = -1, sortpe = -1, steps, i;
cannam@127 433
cannam@127 434 if (argc >= 2) {
cannam@127 435 npes = atoi(argv[1]);
cannam@127 436 if (npes <= 0) {
cannam@127 437 fprintf(stderr,"npes must be positive!");
cannam@127 438 return 1;
cannam@127 439 }
cannam@127 440 }
cannam@127 441 if (argc >= 3) {
cannam@127 442 sortpe = atoi(argv[2]);
cannam@127 443 if (sortpe < 0 || sortpe >= npes) {
cannam@127 444 fprintf(stderr,"sortpe must be between 0 and npes-1.\n");
cannam@127 445 return 1;
cannam@127 446 }
cannam@127 447 }
cannam@127 448
cannam@127 449 if (npes != -1) {
cannam@127 450 printf("Computing schedule for npes = %d:\n",npes);
cannam@127 451 sched = make_comm_schedule(npes);
cannam@127 452 if (!sched) {
cannam@127 453 fprintf(stderr,"Out of memory!");
cannam@127 454 return 6;
cannam@127 455 }
cannam@127 456
cannam@127 457 if (steps = check_comm_schedule(sched,npes))
cannam@127 458 printf("schedule OK (takes %d steps to complete).\n", steps);
cannam@127 459 else
cannam@127 460 printf("schedule not OK.\n");
cannam@127 461
cannam@127 462 print_comm_schedule(sched, npes);
cannam@127 463
cannam@127 464 if (sortpe != -1) {
cannam@127 465 printf("\nRe-creating schedule for pe = %d...\n", sortpe);
cannam@127 466 int *sched1 = (int*) malloc(sizeof(int) * npes);
cannam@127 467 for (i = 0; i < npes; ++i) sched1[i] = -1;
cannam@127 468 fill1_comm_sched(sched1, sortpe, npes);
cannam@127 469 printf(" =");
cannam@127 470 for (i = 0; i < npes; ++i)
cannam@127 471 printf(" %*d", npes < 10 ? 1 : (npes < 100 ? 2 : 3),
cannam@127 472 sched1[i]);
cannam@127 473 printf("\n");
cannam@127 474
cannam@127 475 printf("\nSorting schedule for sortpe = %d...\n", sortpe);
cannam@127 476 sort_comm_schedule(sched,npes,sortpe);
cannam@127 477
cannam@127 478 if (steps = check_comm_schedule(sched,npes))
cannam@127 479 printf("schedule OK (takes %d steps to complete).\n",
cannam@127 480 steps);
cannam@127 481 else
cannam@127 482 printf("schedule not OK.\n");
cannam@127 483
cannam@127 484 print_comm_schedule(sched, npes);
cannam@127 485
cannam@127 486 printf("\nInverting schedule...\n");
cannam@127 487 invert_comm_schedule(sched,npes);
cannam@127 488
cannam@127 489 if (steps = check_comm_schedule(sched,npes))
cannam@127 490 printf("schedule OK (takes %d steps to complete).\n",
cannam@127 491 steps);
cannam@127 492 else
cannam@127 493 printf("schedule not OK.\n");
cannam@127 494
cannam@127 495 print_comm_schedule(sched, npes);
cannam@127 496
cannam@127 497 free_comm_schedule(sched,npes);
cannam@127 498
cannam@127 499 free(sched1);
cannam@127 500 }
cannam@127 501 }
cannam@127 502 else {
cannam@127 503 printf("Doing infinite tests...\n");
cannam@127 504 for (npes = 1; ; ++npes) {
cannam@127 505 int *sched1 = (int*) malloc(sizeof(int) * npes);
cannam@127 506 printf("npes = %d...",npes);
cannam@127 507 sched = make_comm_schedule(npes);
cannam@127 508 if (!sched) {
cannam@127 509 fprintf(stderr,"Out of memory!\n");
cannam@127 510 return 5;
cannam@127 511 }
cannam@127 512 for (sortpe = 0; sortpe < npes; ++sortpe) {
cannam@127 513 empty_comm_schedule(sched,npes);
cannam@127 514 fill_comm_schedule(sched,npes);
cannam@127 515 if (!check_comm_schedule(sched,npes)) {
cannam@127 516 fprintf(stderr,
cannam@127 517 "\n -- fill error for sortpe = %d!\n",sortpe);
cannam@127 518 return 2;
cannam@127 519 }
cannam@127 520
cannam@127 521 for (i = 0; i < npes; ++i) sched1[i] = -1;
cannam@127 522 fill1_comm_sched(sched1, sortpe, npes);
cannam@127 523 for (i = 0; i < npes; ++i)
cannam@127 524 if (sched1[i] != sched[sortpe][i])
cannam@127 525 fprintf(stderr,
cannam@127 526 "\n -- fill1 error for pe = %d!\n",
cannam@127 527 sortpe);
cannam@127 528
cannam@127 529 sort_comm_schedule(sched,npes,sortpe);
cannam@127 530 if (!check_comm_schedule(sched,npes)) {
cannam@127 531 fprintf(stderr,
cannam@127 532 "\n -- sort error for sortpe = %d!\n",sortpe);
cannam@127 533 return 3;
cannam@127 534 }
cannam@127 535 invert_comm_schedule(sched,npes);
cannam@127 536 if (!check_comm_schedule(sched,npes)) {
cannam@127 537 fprintf(stderr,
cannam@127 538 "\n -- invert error for sortpe = %d!\n",
cannam@127 539 sortpe);
cannam@127 540 return 4;
cannam@127 541 }
cannam@127 542 }
cannam@127 543 free_comm_schedule(sched,npes);
cannam@127 544 printf("OK\n");
cannam@127 545 if (npes % 50 == 0)
cannam@127 546 printf("(...Hit Ctrl-C to stop...)\n");
cannam@127 547 free(sched1);
cannam@127 548 }
cannam@127 549 }
cannam@127 550
cannam@127 551 return 0;
cannam@127 552 }