annotate src/fftw-3.3.8/threads/threads.c @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents bd3cc4d1df30
children
rev   line source
cannam@167 1 /*
cannam@167 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
cannam@167 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
cannam@167 4 *
cannam@167 5 * This program is free software; you can redistribute it and/or modify
cannam@167 6 * it under the terms of the GNU General Public License as published by
cannam@167 7 * the Free Software Foundation; either version 2 of the License, or
cannam@167 8 * (at your option) any later version.
cannam@167 9 *
cannam@167 10 * This program is distributed in the hope that it will be useful,
cannam@167 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cannam@167 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cannam@167 13 * GNU General Public License for more details.
cannam@167 14 *
cannam@167 15 * You should have received a copy of the GNU General Public License
cannam@167 16 * along with this program; if not, write to the Free Software
cannam@167 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cannam@167 18 *
cannam@167 19 */
cannam@167 20
cannam@167 21 /* threads.c: Portable thread spawning for loops, via the X(spawn_loop)
cannam@167 22 function. The first portion of this file is a set of macros to
cannam@167 23 spawn and join threads on various systems. */
cannam@167 24
cannam@167 25 #include "threads/threads.h"
cannam@167 26 #include "api/api.h"
cannam@167 27
cannam@167 28 #if defined(USING_POSIX_THREADS)
cannam@167 29
cannam@167 30 #include <pthread.h>
cannam@167 31
cannam@167 32 #ifdef HAVE_UNISTD_H
cannam@167 33 # include <unistd.h>
cannam@167 34 #endif
cannam@167 35
cannam@167 36 /* imlementation of semaphores and mutexes: */
cannam@167 37 #if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES >= 200112L))
cannam@167 38
cannam@167 39 /* If optional POSIX semaphores are supported, use them to
cannam@167 40 implement both semaphores and mutexes. */
cannam@167 41 # include <semaphore.h>
cannam@167 42 # include <errno.h>
cannam@167 43
cannam@167 44 typedef sem_t os_sem_t;
cannam@167 45
cannam@167 46 static void os_sem_init(os_sem_t *s) { sem_init(s, 0, 0); }
cannam@167 47 static void os_sem_destroy(os_sem_t *s) { sem_destroy(s); }
cannam@167 48
cannam@167 49 static void os_sem_down(os_sem_t *s)
cannam@167 50 {
cannam@167 51 int err;
cannam@167 52 do {
cannam@167 53 err = sem_wait(s);
cannam@167 54 } while (err == -1 && errno == EINTR);
cannam@167 55 CK(err == 0);
cannam@167 56 }
cannam@167 57
cannam@167 58 static void os_sem_up(os_sem_t *s) { sem_post(s); }
cannam@167 59
cannam@167 60 /*
cannam@167 61 The reason why we use sem_t to implement mutexes is that I have
cannam@167 62 seen mysterious hangs with glibc-2.7 and linux-2.6.22 when using
cannam@167 63 pthread_mutex_t, but no hangs with sem_t or with linux >=
cannam@167 64 2.6.24. For lack of better information, sem_t looks like the
cannam@167 65 safest choice.
cannam@167 66 */
cannam@167 67 typedef sem_t os_mutex_t;
cannam@167 68 static void os_mutex_init(os_mutex_t *s) { sem_init(s, 0, 1); }
cannam@167 69 #define os_mutex_destroy os_sem_destroy
cannam@167 70 #define os_mutex_lock os_sem_down
cannam@167 71 #define os_mutex_unlock os_sem_up
cannam@167 72
cannam@167 73 #else
cannam@167 74
cannam@167 75 /* If optional POSIX semaphores are not defined, use pthread
cannam@167 76 mutexes for mutexes, and simulate semaphores with condition
cannam@167 77 variables */
cannam@167 78 typedef pthread_mutex_t os_mutex_t;
cannam@167 79
cannam@167 80 static void os_mutex_init(os_mutex_t *s)
cannam@167 81 {
cannam@167 82 pthread_mutex_init(s, (pthread_mutexattr_t *)0);
cannam@167 83 }
cannam@167 84
cannam@167 85 static void os_mutex_destroy(os_mutex_t *s) { pthread_mutex_destroy(s); }
cannam@167 86 static void os_mutex_lock(os_mutex_t *s) { pthread_mutex_lock(s); }
cannam@167 87 static void os_mutex_unlock(os_mutex_t *s) { pthread_mutex_unlock(s); }
cannam@167 88
cannam@167 89 typedef struct {
cannam@167 90 pthread_mutex_t m;
cannam@167 91 pthread_cond_t c;
cannam@167 92 volatile int x;
cannam@167 93 } os_sem_t;
cannam@167 94
cannam@167 95 static void os_sem_init(os_sem_t *s)
cannam@167 96 {
cannam@167 97 pthread_mutex_init(&s->m, (pthread_mutexattr_t *)0);
cannam@167 98 pthread_cond_init(&s->c, (pthread_condattr_t *)0);
cannam@167 99
cannam@167 100 /* wrap initialization in lock to exploit the release
cannam@167 101 semantics of pthread_mutex_unlock() */
cannam@167 102 pthread_mutex_lock(&s->m);
cannam@167 103 s->x = 0;
cannam@167 104 pthread_mutex_unlock(&s->m);
cannam@167 105 }
cannam@167 106
cannam@167 107 static void os_sem_destroy(os_sem_t *s)
cannam@167 108 {
cannam@167 109 pthread_mutex_destroy(&s->m);
cannam@167 110 pthread_cond_destroy(&s->c);
cannam@167 111 }
cannam@167 112
cannam@167 113 static void os_sem_down(os_sem_t *s)
cannam@167 114 {
cannam@167 115 pthread_mutex_lock(&s->m);
cannam@167 116 while (s->x <= 0)
cannam@167 117 pthread_cond_wait(&s->c, &s->m);
cannam@167 118 --s->x;
cannam@167 119 pthread_mutex_unlock(&s->m);
cannam@167 120 }
cannam@167 121
cannam@167 122 static void os_sem_up(os_sem_t *s)
cannam@167 123 {
cannam@167 124 pthread_mutex_lock(&s->m);
cannam@167 125 ++s->x;
cannam@167 126 pthread_cond_signal(&s->c);
cannam@167 127 pthread_mutex_unlock(&s->m);
cannam@167 128 }
cannam@167 129
cannam@167 130 #endif
cannam@167 131
cannam@167 132 #define FFTW_WORKER void *
cannam@167 133
cannam@167 134 static void os_create_thread(FFTW_WORKER (*worker)(void *arg),
cannam@167 135 void *arg)
cannam@167 136 {
cannam@167 137 pthread_attr_t attr;
cannam@167 138 pthread_t tid;
cannam@167 139
cannam@167 140 pthread_attr_init(&attr);
cannam@167 141 pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
cannam@167 142 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
cannam@167 143
cannam@167 144 pthread_create(&tid, &attr, worker, (void *)arg);
cannam@167 145 pthread_attr_destroy(&attr);
cannam@167 146 }
cannam@167 147
cannam@167 148 static void os_destroy_thread(void)
cannam@167 149 {
cannam@167 150 pthread_exit((void *)0);
cannam@167 151 }
cannam@167 152
cannam@167 153 /* support for static mutexes */
cannam@167 154 typedef pthread_mutex_t os_static_mutex_t;
cannam@167 155 #define OS_STATIC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
cannam@167 156 static void os_static_mutex_lock(os_static_mutex_t *s) { pthread_mutex_lock(s); }
cannam@167 157 static void os_static_mutex_unlock(os_static_mutex_t *s) { pthread_mutex_unlock(s); }
cannam@167 158
cannam@167 159 #elif defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS)
cannam@167 160 /* hack: windef.h defines INT for its own purposes and this causes
cannam@167 161 a conflict with our own INT in ifftw.h. Divert the windows
cannam@167 162 definition into another name unlikely to cause a conflict */
cannam@167 163 #define INT magnus_ab_INTegro_seclorum_nascitur_ordo
cannam@167 164 #include <windows.h>
cannam@167 165 #include <process.h>
cannam@167 166 #include <intrin.h>
cannam@167 167 #undef INT
cannam@167 168
cannam@167 169 typedef HANDLE os_mutex_t;
cannam@167 170
cannam@167 171 static void os_mutex_init(os_mutex_t *s)
cannam@167 172 {
cannam@167 173 *s = CreateMutex(NULL, FALSE, NULL);
cannam@167 174 }
cannam@167 175
cannam@167 176 static void os_mutex_destroy(os_mutex_t *s)
cannam@167 177 {
cannam@167 178 CloseHandle(*s);
cannam@167 179 }
cannam@167 180
cannam@167 181 static void os_mutex_lock(os_mutex_t *s)
cannam@167 182 {
cannam@167 183 WaitForSingleObject(*s, INFINITE);
cannam@167 184 }
cannam@167 185
cannam@167 186 static void os_mutex_unlock(os_mutex_t *s)
cannam@167 187 {
cannam@167 188 ReleaseMutex(*s);
cannam@167 189 }
cannam@167 190
cannam@167 191 typedef HANDLE os_sem_t;
cannam@167 192
cannam@167 193 static void os_sem_init(os_sem_t *s)
cannam@167 194 {
cannam@167 195 *s = CreateSemaphore(NULL, 0, 0x7FFFFFFFL, NULL);
cannam@167 196 }
cannam@167 197
cannam@167 198 static void os_sem_destroy(os_sem_t *s)
cannam@167 199 {
cannam@167 200 CloseHandle(*s);
cannam@167 201 }
cannam@167 202
cannam@167 203 static void os_sem_down(os_sem_t *s)
cannam@167 204 {
cannam@167 205 WaitForSingleObject(*s, INFINITE);
cannam@167 206 }
cannam@167 207
cannam@167 208 static void os_sem_up(os_sem_t *s)
cannam@167 209 {
cannam@167 210 ReleaseSemaphore(*s, 1, NULL);
cannam@167 211 }
cannam@167 212
cannam@167 213 #define FFTW_WORKER unsigned __stdcall
cannam@167 214 typedef unsigned (__stdcall *winthread_start) (void *);
cannam@167 215
cannam@167 216 static void os_create_thread(winthread_start worker,
cannam@167 217 void *arg)
cannam@167 218 {
cannam@167 219 _beginthreadex((void *)NULL, /* security attrib */
cannam@167 220 0, /* stack size */
cannam@167 221 worker, /* start address */
cannam@167 222 arg, /* parameters */
cannam@167 223 0, /* creation flags */
cannam@167 224 (unsigned *)NULL); /* tid */
cannam@167 225 }
cannam@167 226
cannam@167 227 static void os_destroy_thread(void)
cannam@167 228 {
cannam@167 229 _endthreadex(0);
cannam@167 230 }
cannam@167 231
cannam@167 232 /* windows does not have statically-initialized mutexes---fake a
cannam@167 233 spinlock */
cannam@167 234 typedef volatile LONG os_static_mutex_t;
cannam@167 235 #define OS_STATIC_MUTEX_INITIALIZER 0
cannam@167 236 static void os_static_mutex_lock(os_static_mutex_t *s)
cannam@167 237 {
cannam@167 238 while (InterlockedExchange(s, 1) == 1) {
cannam@167 239 YieldProcessor();
cannam@167 240 Sleep(0);
cannam@167 241 }
cannam@167 242 }
cannam@167 243 static void os_static_mutex_unlock(os_static_mutex_t *s)
cannam@167 244 {
cannam@167 245 LONG old = InterlockedExchange(s, 0);
cannam@167 246 A(old == 1);
cannam@167 247 }
cannam@167 248 #else
cannam@167 249 #error "No threading layer defined"
cannam@167 250 #endif
cannam@167 251
cannam@167 252 /************************************************************************/
cannam@167 253
cannam@167 254 /* Main code: */
cannam@167 255 struct worker {
cannam@167 256 os_sem_t ready;
cannam@167 257 os_sem_t done;
cannam@167 258 struct work *w;
cannam@167 259 struct worker *cdr;
cannam@167 260 };
cannam@167 261
cannam@167 262 static struct worker *make_worker(void)
cannam@167 263 {
cannam@167 264 struct worker *q = (struct worker *)MALLOC(sizeof(*q), OTHER);
cannam@167 265 os_sem_init(&q->ready);
cannam@167 266 os_sem_init(&q->done);
cannam@167 267 return q;
cannam@167 268 }
cannam@167 269
cannam@167 270 static void unmake_worker(struct worker *q)
cannam@167 271 {
cannam@167 272 os_sem_destroy(&q->done);
cannam@167 273 os_sem_destroy(&q->ready);
cannam@167 274 X(ifree)(q);
cannam@167 275 }
cannam@167 276
cannam@167 277 struct work {
cannam@167 278 spawn_function proc;
cannam@167 279 spawn_data d;
cannam@167 280 struct worker *q; /* the worker responsible for performing this work */
cannam@167 281 };
cannam@167 282
cannam@167 283 static os_mutex_t queue_lock;
cannam@167 284 static os_sem_t termination_semaphore;
cannam@167 285
cannam@167 286 static struct worker *worker_queue;
cannam@167 287 #define WITH_QUEUE_LOCK(what) \
cannam@167 288 { \
cannam@167 289 os_mutex_lock(&queue_lock); \
cannam@167 290 what; \
cannam@167 291 os_mutex_unlock(&queue_lock); \
cannam@167 292 }
cannam@167 293
cannam@167 294 static FFTW_WORKER worker(void *arg)
cannam@167 295 {
cannam@167 296 struct worker *ego = (struct worker *)arg;
cannam@167 297 struct work *w;
cannam@167 298
cannam@167 299 for (;;) {
cannam@167 300 /* wait until work becomes available */
cannam@167 301 os_sem_down(&ego->ready);
cannam@167 302
cannam@167 303 w = ego->w;
cannam@167 304
cannam@167 305 /* !w->proc ==> terminate worker */
cannam@167 306 if (!w->proc) break;
cannam@167 307
cannam@167 308 /* do the work */
cannam@167 309 w->proc(&w->d);
cannam@167 310
cannam@167 311 /* signal that work is done */
cannam@167 312 os_sem_up(&ego->done);
cannam@167 313 }
cannam@167 314
cannam@167 315 /* termination protocol */
cannam@167 316 os_sem_up(&termination_semaphore);
cannam@167 317
cannam@167 318 os_destroy_thread();
cannam@167 319 /* UNREACHABLE */
cannam@167 320 return 0;
cannam@167 321 }
cannam@167 322
cannam@167 323 static void enqueue(struct worker *q)
cannam@167 324 {
cannam@167 325 WITH_QUEUE_LOCK({
cannam@167 326 q->cdr = worker_queue;
cannam@167 327 worker_queue = q;
cannam@167 328 });
cannam@167 329 }
cannam@167 330
cannam@167 331 static struct worker *dequeue(void)
cannam@167 332 {
cannam@167 333 struct worker *q;
cannam@167 334
cannam@167 335 WITH_QUEUE_LOCK({
cannam@167 336 q = worker_queue;
cannam@167 337 if (q)
cannam@167 338 worker_queue = q->cdr;
cannam@167 339 });
cannam@167 340
cannam@167 341 if (!q) {
cannam@167 342 /* no worker is available. Create one */
cannam@167 343 q = make_worker();
cannam@167 344 os_create_thread(worker, q);
cannam@167 345 }
cannam@167 346
cannam@167 347 return q;
cannam@167 348 }
cannam@167 349
cannam@167 350
cannam@167 351 static void kill_workforce(void)
cannam@167 352 {
cannam@167 353 struct work w;
cannam@167 354
cannam@167 355 w.proc = 0;
cannam@167 356
cannam@167 357 WITH_QUEUE_LOCK({
cannam@167 358 /* tell all workers that they must terminate.
cannam@167 359
cannam@167 360 Because workers enqueue themselves before signaling the
cannam@167 361 completion of the work, all workers belong to the worker queue
cannam@167 362 if we get here. Also, all workers are waiting at
cannam@167 363 os_sem_down(ready), so we can hold the queue lock without
cannam@167 364 deadlocking */
cannam@167 365 while (worker_queue) {
cannam@167 366 struct worker *q = worker_queue;
cannam@167 367 worker_queue = q->cdr;
cannam@167 368 q->w = &w;
cannam@167 369 os_sem_up(&q->ready);
cannam@167 370 os_sem_down(&termination_semaphore);
cannam@167 371 unmake_worker(q);
cannam@167 372 }
cannam@167 373 });
cannam@167 374 }
cannam@167 375
cannam@167 376 static os_static_mutex_t initialization_mutex = OS_STATIC_MUTEX_INITIALIZER;
cannam@167 377
cannam@167 378 int X(ithreads_init)(void)
cannam@167 379 {
cannam@167 380 os_static_mutex_lock(&initialization_mutex); {
cannam@167 381 os_mutex_init(&queue_lock);
cannam@167 382 os_sem_init(&termination_semaphore);
cannam@167 383
cannam@167 384 WITH_QUEUE_LOCK({
cannam@167 385 worker_queue = 0;
cannam@167 386 });
cannam@167 387 } os_static_mutex_unlock(&initialization_mutex);
cannam@167 388
cannam@167 389 return 0; /* no error */
cannam@167 390 }
cannam@167 391
cannam@167 392 /* Distribute a loop from 0 to loopmax-1 over nthreads threads.
cannam@167 393 proc(d) is called to execute a block of iterations from d->min
cannam@167 394 to d->max-1. d->thr_num indicate the number of the thread
cannam@167 395 that is executing proc (from 0 to nthreads-1), and d->data is
cannam@167 396 the same as the data parameter passed to X(spawn_loop).
cannam@167 397
cannam@167 398 This function returns only after all the threads have completed. */
cannam@167 399 void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data)
cannam@167 400 {
cannam@167 401 int block_size;
cannam@167 402 struct work *r;
cannam@167 403 int i;
cannam@167 404
cannam@167 405 A(loopmax >= 0);
cannam@167 406 A(nthr > 0);
cannam@167 407 A(proc);
cannam@167 408
cannam@167 409 if (!loopmax) return;
cannam@167 410
cannam@167 411 /* Choose the block size and number of threads in order to (1)
cannam@167 412 minimize the critical path and (2) use the fewest threads that
cannam@167 413 achieve the same critical path (to minimize overhead).
cannam@167 414 e.g. if loopmax is 5 and nthr is 4, we should use only 3
cannam@167 415 threads with block sizes of 2, 2, and 1. */
cannam@167 416 block_size = (loopmax + nthr - 1) / nthr;
cannam@167 417 nthr = (loopmax + block_size - 1) / block_size;
cannam@167 418
cannam@167 419 STACK_MALLOC(struct work *, r, sizeof(struct work) * nthr);
cannam@167 420
cannam@167 421 /* distribute work: */
cannam@167 422 for (i = 0; i < nthr; ++i) {
cannam@167 423 struct work *w = &r[i];
cannam@167 424 spawn_data *d = &w->d;
cannam@167 425
cannam@167 426 d->max = (d->min = i * block_size) + block_size;
cannam@167 427 if (d->max > loopmax)
cannam@167 428 d->max = loopmax;
cannam@167 429 d->thr_num = i;
cannam@167 430 d->data = data;
cannam@167 431 w->proc = proc;
cannam@167 432
cannam@167 433 if (i == nthr - 1) {
cannam@167 434 /* do the work ourselves */
cannam@167 435 proc(d);
cannam@167 436 } else {
cannam@167 437 /* assign a worker to W */
cannam@167 438 w->q = dequeue();
cannam@167 439
cannam@167 440 /* tell worker w->q to do it */
cannam@167 441 w->q->w = w; /* Dirac could have written this */
cannam@167 442 os_sem_up(&w->q->ready);
cannam@167 443 }
cannam@167 444 }
cannam@167 445
cannam@167 446 for (i = 0; i < nthr - 1; ++i) {
cannam@167 447 struct work *w = &r[i];
cannam@167 448 os_sem_down(&w->q->done);
cannam@167 449 enqueue(w->q);
cannam@167 450 }
cannam@167 451
cannam@167 452 STACK_FREE(r);
cannam@167 453 }
cannam@167 454
cannam@167 455 void X(threads_cleanup)(void)
cannam@167 456 {
cannam@167 457 kill_workforce();
cannam@167 458 os_mutex_destroy(&queue_lock);
cannam@167 459 os_sem_destroy(&termination_semaphore);
cannam@167 460 }
cannam@167 461
cannam@167 462 static os_static_mutex_t install_planner_hooks_mutex = OS_STATIC_MUTEX_INITIALIZER;
cannam@167 463 static os_mutex_t planner_mutex;
cannam@167 464 static int planner_hooks_installed = 0;
cannam@167 465
cannam@167 466 static void lock_planner_mutex(void)
cannam@167 467 {
cannam@167 468 os_mutex_lock(&planner_mutex);
cannam@167 469 }
cannam@167 470
cannam@167 471 static void unlock_planner_mutex(void)
cannam@167 472 {
cannam@167 473 os_mutex_unlock(&planner_mutex);
cannam@167 474 }
cannam@167 475
cannam@167 476 void X(threads_register_planner_hooks)(void)
cannam@167 477 {
cannam@167 478 os_static_mutex_lock(&install_planner_hooks_mutex); {
cannam@167 479 if (!planner_hooks_installed) {
cannam@167 480 os_mutex_init(&planner_mutex);
cannam@167 481 X(set_planner_hooks)(lock_planner_mutex, unlock_planner_mutex);
cannam@167 482 planner_hooks_installed = 1;
cannam@167 483 }
cannam@167 484 } os_static_mutex_unlock(&install_planner_hooks_mutex);
cannam@167 485 }