annotate src/fftw-3.3.5/mpi/testsched.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 1999-2003, 2007-8 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /**********************************************************************/
Chris@42 22 /* This is a modified and combined version of the sched.c and
Chris@42 23 test_sched.c files shipped with FFTW 2, written to implement and
Chris@42 24 test various all-to-all communications scheduling patterns.
Chris@42 25
Chris@42 26 It is not used in FFTW 3, but I keep it around in case we ever want
Chris@42 27 to play with this again or to change algorithms. In particular, I
Chris@42 28 used it to implement and test the fill1_comm_sched routine in
Chris@42 29 transpose-pairwise.c, which allows us to create a schedule for one
Chris@42 30 process at a time and is much more compact than the FFTW 2 code.
Chris@42 31
Chris@42 32 Note that the scheduling algorithm is somewhat modified from that
Chris@42 33 of FFTW 2. Originally, I thought that one "stall" in the schedule
Chris@42 34 was unavoidable for odd numbers of processes, since this is the
Chris@42 35 case for the soccer-timetabling problem. However, because of the
Chris@42 36 self-communication step, we can use the self-communication to fill
Chris@42 37 in the stalls. (Thanks to Ralf Wildenhues for pointing this out.)
Chris@42 38 This greatly simplifies the process re-sorting algorithm. */
Chris@42 39
Chris@42 40 /**********************************************************************/
Chris@42 41
Chris@42 42 #include <stdio.h>
Chris@42 43 #include <stdlib.h>
Chris@42 44
Chris@42 45 /* This file contains routines to compute communications schedules for
Chris@42 46 all-to-all communications (complete exchanges) that are performed
Chris@42 47 in-place. (That is, the block that processor x sends to processor
Chris@42 48 y gets replaced on processor x by a block received from processor y.)
Chris@42 49
Chris@42 50 A schedule, int **sched, is a two-dimensional array where
Chris@42 51 sched[pe][i] is the processor that pe expects to exchange a message
Chris@42 52 with on the i-th step of the exchange. sched[pe][i] == -1 for the
Chris@42 53 i after the last exchange scheduled on pe.
Chris@42 54
Chris@42 55 Here, processors (pe's, for processing elements), are numbered from
Chris@42 56 0 to npes-1.
Chris@42 57
Chris@42 58 There are a couple of constraints that a schedule should satisfy
Chris@42 59 (besides the obvious one that every processor has to communicate
Chris@42 60 with every other processor exactly once).
Chris@42 61
Chris@42 62 * First, and most importantly, there must be no deadlocks.
Chris@42 63
Chris@42 64 * Second, we would like to overlap communications as much as possible,
Chris@42 65 so that all exchanges occur in parallel. It turns out that perfect
Chris@42 66 overlap is possible for all number of processes (npes).
Chris@42 67
Chris@42 68 It turns out that this scheduling problem is actually well-studied,
Chris@42 69 and good solutions are known. The problem is known as a
Chris@42 70 "time-tabling" problem, and is specifically the problem of
Chris@42 71 scheduling a sports competition (where n teams must compete exactly
Chris@42 72 once with every other team). The problem is discussed and
Chris@42 73 algorithms are presented in:
Chris@42 74
Chris@42 75 [1] J. A. M. Schreuder, "Constructing Timetables for Sport
Chris@42 76 Competitions," Mathematical Programming Study 13, pp. 58-67 (1980).
Chris@42 77
Chris@42 78 [2] A. Schaerf, "Scheduling Sport Tournaments using Constraint
Chris@42 79 Logic Programming," Proc. of 12th Europ. Conf. on
Chris@42 80 Artif. Intell. (ECAI-96), pp. 634-639 (Budapest 1996).
Chris@42 81 http://hermes.dis.uniromal.it/~aschaerf/publications.html
Chris@42 82
Chris@42 83 (These people actually impose a lot of additional constraints that
Chris@42 84 we don't care about, so they are solving harder problems. [1] gives
Chris@42 85 a simple enough algorithm for our purposes, though.)
Chris@42 86
Chris@42 87 In the timetabling problem, N teams can all play one another in N-1
Chris@42 88 steps if N is even, and N steps if N is odd. Here, however,
Chris@42 89 there is a "self-communication" step (a team must also "play itself")
Chris@42 90 and so we can always make an optimal N-step schedule regardless of N.
Chris@42 91
Chris@42 92 However, we have to do more: for a particular processor, the
Chris@42 93 communications schedule must be sorted in ascending or descending
Chris@42 94 order of processor index. (This is necessary so that the data
Chris@42 95 coming in for the transpose does not overwrite data that will be
Chris@42 96 sent later; for that processor the incoming and outgoing blocks are
Chris@42 97 of different non-zero sizes.) Fortunately, because the schedule
Chris@42 98 is stall free, each parallel step of the schedule is independent
Chris@42 99 of every other step, and we can reorder the steps arbitrarily
Chris@42 100 to achieve any desired order on a particular process.
Chris@42 101 */
Chris@42 102
Chris@42 103 void free_comm_schedule(int **sched, int npes)
Chris@42 104 {
Chris@42 105 if (sched) {
Chris@42 106 int i;
Chris@42 107
Chris@42 108 for (i = 0; i < npes; ++i)
Chris@42 109 free(sched[i]);
Chris@42 110 free(sched);
Chris@42 111 }
Chris@42 112 }
Chris@42 113
Chris@42 114 void empty_comm_schedule(int **sched, int npes)
Chris@42 115 {
Chris@42 116 int i;
Chris@42 117 for (i = 0; i < npes; ++i)
Chris@42 118 sched[i][0] = -1;
Chris@42 119 }
Chris@42 120
Chris@42 121 extern void fill_comm_schedule(int **sched, int npes);
Chris@42 122
Chris@42 123 /* Create a new communications schedule for a given number of processors.
Chris@42 124 The schedule is initialized to a deadlock-free, maximum overlap
Chris@42 125 schedule. Returns NULL on an error (may print a message to
Chris@42 126 stderr if there is a program bug detected). */
Chris@42 127 int **make_comm_schedule(int npes)
Chris@42 128 {
Chris@42 129 int **sched;
Chris@42 130 int i;
Chris@42 131
Chris@42 132 sched = (int **) malloc(sizeof(int *) * npes);
Chris@42 133 if (!sched)
Chris@42 134 return NULL;
Chris@42 135
Chris@42 136 for (i = 0; i < npes; ++i)
Chris@42 137 sched[i] = NULL;
Chris@42 138
Chris@42 139 for (i = 0; i < npes; ++i) {
Chris@42 140 sched[i] = (int *) malloc(sizeof(int) * 10 * (npes + 1));
Chris@42 141 if (!sched[i]) {
Chris@42 142 free_comm_schedule(sched,npes);
Chris@42 143 return NULL;
Chris@42 144 }
Chris@42 145 }
Chris@42 146
Chris@42 147 empty_comm_schedule(sched,npes);
Chris@42 148 fill_comm_schedule(sched,npes);
Chris@42 149
Chris@42 150 if (!check_comm_schedule(sched,npes)) {
Chris@42 151 free_comm_schedule(sched,npes);
Chris@42 152 return NULL;
Chris@42 153 }
Chris@42 154
Chris@42 155 return sched;
Chris@42 156 }
Chris@42 157
Chris@42 158 static void add_dest_to_comm_schedule(int **sched, int pe, int dest)
Chris@42 159 {
Chris@42 160 int i;
Chris@42 161
Chris@42 162 for (i = 0; sched[pe][i] != -1; ++i)
Chris@42 163 ;
Chris@42 164
Chris@42 165 sched[pe][i] = dest;
Chris@42 166 sched[pe][i+1] = -1;
Chris@42 167 }
Chris@42 168
Chris@42 169 static void add_pair_to_comm_schedule(int **sched, int pe1, int pe2)
Chris@42 170 {
Chris@42 171 add_dest_to_comm_schedule(sched, pe1, pe2);
Chris@42 172 if (pe1 != pe2)
Chris@42 173 add_dest_to_comm_schedule(sched, pe2, pe1);
Chris@42 174 }
Chris@42 175
Chris@42 176 /* Simplification of algorithm presented in [1] (we have fewer
Chris@42 177 constraints). Produces a perfect schedule (npes steps). */
Chris@42 178
Chris@42 179 void fill_comm_schedule(int **sched, int npes)
Chris@42 180 {
Chris@42 181 int pe, i, n;
Chris@42 182
Chris@42 183 if (npes % 2 == 0) {
Chris@42 184 n = npes;
Chris@42 185 for (pe = 0; pe < npes; ++pe)
Chris@42 186 add_pair_to_comm_schedule(sched,pe,pe);
Chris@42 187 }
Chris@42 188 else
Chris@42 189 n = npes + 1;
Chris@42 190
Chris@42 191 for (pe = 0; pe < n - 1; ++pe) {
Chris@42 192 add_pair_to_comm_schedule(sched, pe, npes % 2 == 0 ? npes - 1 : pe);
Chris@42 193
Chris@42 194 for (i = 1; i < n/2; ++i) {
Chris@42 195 int pe_a, pe_b;
Chris@42 196
Chris@42 197 pe_a = pe - i;
Chris@42 198 if (pe_a < 0)
Chris@42 199 pe_a += n - 1;
Chris@42 200
Chris@42 201 pe_b = (pe + i) % (n - 1);
Chris@42 202
Chris@42 203 add_pair_to_comm_schedule(sched,pe_a,pe_b);
Chris@42 204 }
Chris@42 205 }
Chris@42 206 }
Chris@42 207
Chris@42 208 /* given an array sched[npes], fills it with the communications
Chris@42 209 schedule for process pe. */
Chris@42 210 void fill1_comm_sched(int *sched, int which_pe, int npes)
Chris@42 211 {
Chris@42 212 int pe, i, n, s = 0;
Chris@42 213 if (npes % 2 == 0) {
Chris@42 214 n = npes;
Chris@42 215 sched[s++] = which_pe;
Chris@42 216 }
Chris@42 217 else
Chris@42 218 n = npes + 1;
Chris@42 219 for (pe = 0; pe < n - 1; ++pe) {
Chris@42 220 if (npes % 2 == 0) {
Chris@42 221 if (pe == which_pe) sched[s++] = npes - 1;
Chris@42 222 else if (npes - 1 == which_pe) sched[s++] = pe;
Chris@42 223 }
Chris@42 224 else if (pe == which_pe) sched[s++] = pe;
Chris@42 225
Chris@42 226 if (pe != which_pe && which_pe < n - 1) {
Chris@42 227 i = (pe - which_pe + (n - 1)) % (n - 1);
Chris@42 228 if (i < n/2)
Chris@42 229 sched[s++] = (pe + i) % (n - 1);
Chris@42 230
Chris@42 231 i = (which_pe - pe + (n - 1)) % (n - 1);
Chris@42 232 if (i < n/2)
Chris@42 233 sched[s++] = (pe - i + (n - 1)) % (n - 1);
Chris@42 234 }
Chris@42 235 }
Chris@42 236 if (s != npes) {
Chris@42 237 fprintf(stderr, "bug in fill1_com_schedule (%d, %d/%d)\n",
Chris@42 238 s, which_pe, npes);
Chris@42 239 exit(EXIT_FAILURE);
Chris@42 240 }
Chris@42 241 }
Chris@42 242
Chris@42 243 /* sort the communication schedule sched for npes so that the schedule
Chris@42 244 on process sortpe is ascending or descending (!ascending). */
Chris@42 245 static void sort1_comm_sched(int *sched, int npes, int sortpe, int ascending)
Chris@42 246 {
Chris@42 247 int *sortsched, i;
Chris@42 248 sortsched = (int *) malloc(npes * sizeof(int) * 2);
Chris@42 249 fill1_comm_sched(sortsched, sortpe, npes);
Chris@42 250 if (ascending)
Chris@42 251 for (i = 0; i < npes; ++i)
Chris@42 252 sortsched[npes + sortsched[i]] = sched[i];
Chris@42 253 else
Chris@42 254 for (i = 0; i < npes; ++i)
Chris@42 255 sortsched[2*npes - 1 - sortsched[i]] = sched[i];
Chris@42 256 for (i = 0; i < npes; ++i)
Chris@42 257 sched[i] = sortsched[npes + i];
Chris@42 258 free(sortsched);
Chris@42 259 }
Chris@42 260
Chris@42 261 /* Below, we have various checks in case of bugs: */
Chris@42 262
Chris@42 263 /* check for deadlocks by simulating the schedule and looking for
Chris@42 264 cycles in the dependency list; returns 0 if there are deadlocks
Chris@42 265 (or other errors) */
Chris@42 266 static int check_schedule_deadlock(int **sched, int npes)
Chris@42 267 {
Chris@42 268 int *step, *depend, *visited, pe, pe2, period, done = 0;
Chris@42 269 int counter = 0;
Chris@42 270
Chris@42 271 /* step[pe] is the step in the schedule that a given pe is on */
Chris@42 272 step = (int *) malloc(sizeof(int) * npes);
Chris@42 273
Chris@42 274 /* depend[pe] is the pe' that pe is currently waiting for a message
Chris@42 275 from (-1 if none) */
Chris@42 276 depend = (int *) malloc(sizeof(int) * npes);
Chris@42 277
Chris@42 278 /* visited[pe] tells whether we have visited the current pe already
Chris@42 279 when we are looking for cycles. */
Chris@42 280 visited = (int *) malloc(sizeof(int) * npes);
Chris@42 281
Chris@42 282 if (!step || !depend || !visited) {
Chris@42 283 free(step); free(depend); free(visited);
Chris@42 284 return 0;
Chris@42 285 }
Chris@42 286
Chris@42 287 for (pe = 0; pe < npes; ++pe)
Chris@42 288 step[pe] = 0;
Chris@42 289
Chris@42 290 while (!done) {
Chris@42 291 ++counter;
Chris@42 292
Chris@42 293 for (pe = 0; pe < npes; ++pe)
Chris@42 294 depend[pe] = sched[pe][step[pe]];
Chris@42 295
Chris@42 296 /* now look for cycles in the dependencies with period > 2: */
Chris@42 297 for (pe = 0; pe < npes; ++pe)
Chris@42 298 if (depend[pe] != -1) {
Chris@42 299 for (pe2 = 0; pe2 < npes; ++pe2)
Chris@42 300 visited[pe2] = 0;
Chris@42 301
Chris@42 302 period = 0;
Chris@42 303 pe2 = pe;
Chris@42 304 do {
Chris@42 305 visited[pe2] = period + 1;
Chris@42 306 pe2 = depend[pe2];
Chris@42 307 period++;
Chris@42 308 } while (pe2 != -1 && !visited[pe2]);
Chris@42 309
Chris@42 310 if (pe2 == -1) {
Chris@42 311 fprintf(stderr,
Chris@42 312 "BUG: unterminated cycle in schedule!\n");
Chris@42 313 free(step); free(depend);
Chris@42 314 free(visited);
Chris@42 315 return 0;
Chris@42 316 }
Chris@42 317 if (period - (visited[pe2] - 1) > 2) {
Chris@42 318 fprintf(stderr,"BUG: deadlock in schedule!\n");
Chris@42 319 free(step); free(depend);
Chris@42 320 free(visited);
Chris@42 321 return 0;
Chris@42 322 }
Chris@42 323
Chris@42 324 if (pe2 == pe)
Chris@42 325 step[pe]++;
Chris@42 326 }
Chris@42 327
Chris@42 328 done = 1;
Chris@42 329 for (pe = 0; pe < npes; ++pe)
Chris@42 330 if (sched[pe][step[pe]] != -1) {
Chris@42 331 done = 0;
Chris@42 332 break;
Chris@42 333 }
Chris@42 334 }
Chris@42 335
Chris@42 336 free(step); free(depend); free(visited);
Chris@42 337 return (counter > 0 ? counter : 1);
Chris@42 338 }
Chris@42 339
Chris@42 340 /* sanity checks; prints message and returns 0 on failure.
Chris@42 341 undocumented feature: the return value on success is actually the
Chris@42 342 number of steps required for the schedule to complete, counting
Chris@42 343 stalls. */
Chris@42 344 int check_comm_schedule(int **sched, int npes)
Chris@42 345 {
Chris@42 346 int pe, i, comm_pe;
Chris@42 347
Chris@42 348 for (pe = 0; pe < npes; ++pe) {
Chris@42 349 for (comm_pe = 0; comm_pe < npes; ++comm_pe) {
Chris@42 350 for (i = 0; sched[pe][i] != -1 && sched[pe][i] != comm_pe; ++i)
Chris@42 351 ;
Chris@42 352 if (sched[pe][i] == -1) {
Chris@42 353 fprintf(stderr,"BUG: schedule never sends message from "
Chris@42 354 "%d to %d.\n",pe,comm_pe);
Chris@42 355 return 0; /* never send message to comm_pe */
Chris@42 356 }
Chris@42 357 }
Chris@42 358 for (i = 0; sched[pe][i] != -1; ++i)
Chris@42 359 ;
Chris@42 360 if (i != npes) {
Chris@42 361 fprintf(stderr,"BUG: schedule sends too many messages from "
Chris@42 362 "%d\n",pe);
Chris@42 363 return 0;
Chris@42 364 }
Chris@42 365 }
Chris@42 366 return check_schedule_deadlock(sched,npes);
Chris@42 367 }
Chris@42 368
Chris@42 369 /* invert the order of all the schedules; this has no effect on
Chris@42 370 its required properties. */
Chris@42 371 void invert_comm_schedule(int **sched, int npes)
Chris@42 372 {
Chris@42 373 int pe, i;
Chris@42 374
Chris@42 375 for (pe = 0; pe < npes; ++pe)
Chris@42 376 for (i = 0; i < npes/2; ++i) {
Chris@42 377 int dummy = sched[pe][i];
Chris@42 378 sched[pe][i] = sched[pe][npes-1-i];
Chris@42 379 sched[pe][npes-1-i] = dummy;
Chris@42 380 }
Chris@42 381 }
Chris@42 382
Chris@42 383 /* Sort the schedule for sort_pe in ascending order of processor
Chris@42 384 index. Unfortunately, for odd npes (when schedule has a stall
Chris@42 385 to begin with) this will introduce an extra stall due to
Chris@42 386 the motion of the self-communication past a stall. We could
Chris@42 387 fix this if it were really important. Actually, we don't
Chris@42 388 get an extra stall when sort_pe == 0 or npes-1, which is sufficient
Chris@42 389 for our purposes. */
Chris@42 390 void sort_comm_schedule(int **sched, int npes, int sort_pe)
Chris@42 391 {
Chris@42 392 int i,j,pe;
Chris@42 393
Chris@42 394 /* Note that we can do this sort in O(npes) swaps because we know
Chris@42 395 that the numbers we are sorting are just 0...npes-1. But we'll
Chris@42 396 just do a bubble sort for simplicity here. */
Chris@42 397
Chris@42 398 for (i = 0; i < npes - 1; ++i)
Chris@42 399 for (j = i + 1; j < npes; ++j)
Chris@42 400 if (sched[sort_pe][i] > sched[sort_pe][j]) {
Chris@42 401 for (pe = 0; pe < npes; ++pe) {
Chris@42 402 int s = sched[pe][i];
Chris@42 403 sched[pe][i] = sched[pe][j];
Chris@42 404 sched[pe][j] = s;
Chris@42 405 }
Chris@42 406 }
Chris@42 407 }
Chris@42 408
Chris@42 409 /* print the schedule (for debugging purposes) */
Chris@42 410 void print_comm_schedule(int **sched, int npes)
Chris@42 411 {
Chris@42 412 int pe, i, width;
Chris@42 413
Chris@42 414 if (npes < 10)
Chris@42 415 width = 1;
Chris@42 416 else if (npes < 100)
Chris@42 417 width = 2;
Chris@42 418 else
Chris@42 419 width = 3;
Chris@42 420
Chris@42 421 for (pe = 0; pe < npes; ++pe) {
Chris@42 422 printf("pe %*d schedule:", width, pe);
Chris@42 423 for (i = 0; sched[pe][i] != -1; ++i)
Chris@42 424 printf(" %*d",width,sched[pe][i]);
Chris@42 425 printf("\n");
Chris@42 426 }
Chris@42 427 }
Chris@42 428
Chris@42 429 int main(int argc, char **argv)
Chris@42 430 {
Chris@42 431 int **sched;
Chris@42 432 int npes = -1, sortpe = -1, steps, i;
Chris@42 433
Chris@42 434 if (argc >= 2) {
Chris@42 435 npes = atoi(argv[1]);
Chris@42 436 if (npes <= 0) {
Chris@42 437 fprintf(stderr,"npes must be positive!");
Chris@42 438 return 1;
Chris@42 439 }
Chris@42 440 }
Chris@42 441 if (argc >= 3) {
Chris@42 442 sortpe = atoi(argv[2]);
Chris@42 443 if (sortpe < 0 || sortpe >= npes) {
Chris@42 444 fprintf(stderr,"sortpe must be between 0 and npes-1.\n");
Chris@42 445 return 1;
Chris@42 446 }
Chris@42 447 }
Chris@42 448
Chris@42 449 if (npes != -1) {
Chris@42 450 printf("Computing schedule for npes = %d:\n",npes);
Chris@42 451 sched = make_comm_schedule(npes);
Chris@42 452 if (!sched) {
Chris@42 453 fprintf(stderr,"Out of memory!");
Chris@42 454 return 6;
Chris@42 455 }
Chris@42 456
Chris@42 457 if (steps = check_comm_schedule(sched,npes))
Chris@42 458 printf("schedule OK (takes %d steps to complete).\n", steps);
Chris@42 459 else
Chris@42 460 printf("schedule not OK.\n");
Chris@42 461
Chris@42 462 print_comm_schedule(sched, npes);
Chris@42 463
Chris@42 464 if (sortpe != -1) {
Chris@42 465 printf("\nRe-creating schedule for pe = %d...\n", sortpe);
Chris@42 466 int *sched1 = (int*) malloc(sizeof(int) * npes);
Chris@42 467 for (i = 0; i < npes; ++i) sched1[i] = -1;
Chris@42 468 fill1_comm_sched(sched1, sortpe, npes);
Chris@42 469 printf(" =");
Chris@42 470 for (i = 0; i < npes; ++i)
Chris@42 471 printf(" %*d", npes < 10 ? 1 : (npes < 100 ? 2 : 3),
Chris@42 472 sched1[i]);
Chris@42 473 printf("\n");
Chris@42 474
Chris@42 475 printf("\nSorting schedule for sortpe = %d...\n", sortpe);
Chris@42 476 sort_comm_schedule(sched,npes,sortpe);
Chris@42 477
Chris@42 478 if (steps = check_comm_schedule(sched,npes))
Chris@42 479 printf("schedule OK (takes %d steps to complete).\n",
Chris@42 480 steps);
Chris@42 481 else
Chris@42 482 printf("schedule not OK.\n");
Chris@42 483
Chris@42 484 print_comm_schedule(sched, npes);
Chris@42 485
Chris@42 486 printf("\nInverting schedule...\n");
Chris@42 487 invert_comm_schedule(sched,npes);
Chris@42 488
Chris@42 489 if (steps = check_comm_schedule(sched,npes))
Chris@42 490 printf("schedule OK (takes %d steps to complete).\n",
Chris@42 491 steps);
Chris@42 492 else
Chris@42 493 printf("schedule not OK.\n");
Chris@42 494
Chris@42 495 print_comm_schedule(sched, npes);
Chris@42 496
Chris@42 497 free_comm_schedule(sched,npes);
Chris@42 498
Chris@42 499 free(sched1);
Chris@42 500 }
Chris@42 501 }
Chris@42 502 else {
Chris@42 503 printf("Doing infinite tests...\n");
Chris@42 504 for (npes = 1; ; ++npes) {
Chris@42 505 int *sched1 = (int*) malloc(sizeof(int) * npes);
Chris@42 506 printf("npes = %d...",npes);
Chris@42 507 sched = make_comm_schedule(npes);
Chris@42 508 if (!sched) {
Chris@42 509 fprintf(stderr,"Out of memory!\n");
Chris@42 510 return 5;
Chris@42 511 }
Chris@42 512 for (sortpe = 0; sortpe < npes; ++sortpe) {
Chris@42 513 empty_comm_schedule(sched,npes);
Chris@42 514 fill_comm_schedule(sched,npes);
Chris@42 515 if (!check_comm_schedule(sched,npes)) {
Chris@42 516 fprintf(stderr,
Chris@42 517 "\n -- fill error for sortpe = %d!\n",sortpe);
Chris@42 518 return 2;
Chris@42 519 }
Chris@42 520
Chris@42 521 for (i = 0; i < npes; ++i) sched1[i] = -1;
Chris@42 522 fill1_comm_sched(sched1, sortpe, npes);
Chris@42 523 for (i = 0; i < npes; ++i)
Chris@42 524 if (sched1[i] != sched[sortpe][i])
Chris@42 525 fprintf(stderr,
Chris@42 526 "\n -- fill1 error for pe = %d!\n",
Chris@42 527 sortpe);
Chris@42 528
Chris@42 529 sort_comm_schedule(sched,npes,sortpe);
Chris@42 530 if (!check_comm_schedule(sched,npes)) {
Chris@42 531 fprintf(stderr,
Chris@42 532 "\n -- sort error for sortpe = %d!\n",sortpe);
Chris@42 533 return 3;
Chris@42 534 }
Chris@42 535 invert_comm_schedule(sched,npes);
Chris@42 536 if (!check_comm_schedule(sched,npes)) {
Chris@42 537 fprintf(stderr,
Chris@42 538 "\n -- invert error for sortpe = %d!\n",
Chris@42 539 sortpe);
Chris@42 540 return 4;
Chris@42 541 }
Chris@42 542 }
Chris@42 543 free_comm_schedule(sched,npes);
Chris@42 544 printf("OK\n");
Chris@42 545 if (npes % 50 == 0)
Chris@42 546 printf("(...Hit Ctrl-C to stop...)\n");
Chris@42 547 free(sched1);
Chris@42 548 }
Chris@42 549 }
Chris@42 550
Chris@42 551 return 0;
Chris@42 552 }