annotate src/fftw-3.3.3/threads/threads.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* threads.c: Portable thread spawning for loops, via the X(spawn_loop)
Chris@10 22 function. The first portion of this file is a set of macros to
Chris@10 23 spawn and join threads on various systems. */
Chris@10 24
Chris@10 25 #include "threads.h"
Chris@10 26
Chris@10 27 #if defined(USING_POSIX_THREADS)
Chris@10 28
Chris@10 29 #include <pthread.h>
Chris@10 30
Chris@10 31 #ifdef HAVE_UNISTD_H
Chris@10 32 # include <unistd.h>
Chris@10 33 #endif
Chris@10 34
Chris@10 35 /* imlementation of semaphores and mutexes: */
Chris@10 36 #if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES >= 200112L))
Chris@10 37
Chris@10 38 /* If optional POSIX semaphores are supported, use them to
Chris@10 39 implement both semaphores and mutexes. */
Chris@10 40 # include <semaphore.h>
Chris@10 41 # include <errno.h>
Chris@10 42
Chris@10 43 typedef sem_t os_sem_t;
Chris@10 44
Chris@10 45 static void os_sem_init(os_sem_t *s) { sem_init(s, 0, 0); }
Chris@10 46 static void os_sem_destroy(os_sem_t *s) { sem_destroy(s); }
Chris@10 47
Chris@10 48 static void os_sem_down(os_sem_t *s)
Chris@10 49 {
Chris@10 50 int err;
Chris@10 51 do {
Chris@10 52 err = sem_wait(s);
Chris@10 53 } while (err == -1 && errno == EINTR);
Chris@10 54 CK(err == 0);
Chris@10 55 }
Chris@10 56
Chris@10 57 static void os_sem_up(os_sem_t *s) { sem_post(s); }
Chris@10 58
Chris@10 59 /*
Chris@10 60 The reason why we use sem_t to implement mutexes is that I have
Chris@10 61 seen mysterious hangs with glibc-2.7 and linux-2.6.22 when using
Chris@10 62 pthread_mutex_t, but no hangs with sem_t or with linux >=
Chris@10 63 2.6.24. For lack of better information, sem_t looks like the
Chris@10 64 safest choice.
Chris@10 65 */
Chris@10 66 typedef sem_t os_mutex_t;
Chris@10 67 static void os_mutex_init(os_mutex_t *s) { sem_init(s, 0, 1); }
Chris@10 68 #define os_mutex_destroy os_sem_destroy
Chris@10 69 #define os_mutex_lock os_sem_down
Chris@10 70 #define os_mutex_unlock os_sem_up
Chris@10 71
Chris@10 72 #else
Chris@10 73
Chris@10 74 /* If optional POSIX semaphores are not defined, use pthread
Chris@10 75 mutexes for mutexes, and simulate semaphores with condition
Chris@10 76 variables */
Chris@10 77 typedef pthread_mutex_t os_mutex_t;
Chris@10 78
Chris@10 79 static void os_mutex_init(os_mutex_t *s)
Chris@10 80 {
Chris@10 81 pthread_mutex_init(s, (pthread_mutexattr_t *)0);
Chris@10 82 }
Chris@10 83
Chris@10 84 static void os_mutex_destroy(os_mutex_t *s) { pthread_mutex_destroy(s); }
Chris@10 85 static void os_mutex_lock(os_mutex_t *s) { pthread_mutex_lock(s); }
Chris@10 86 static void os_mutex_unlock(os_mutex_t *s) { pthread_mutex_unlock(s); }
Chris@10 87
Chris@10 88 typedef struct {
Chris@10 89 pthread_mutex_t m;
Chris@10 90 pthread_cond_t c;
Chris@10 91 volatile int x;
Chris@10 92 } os_sem_t;
Chris@10 93
Chris@10 94 static void os_sem_init(os_sem_t *s)
Chris@10 95 {
Chris@10 96 pthread_mutex_init(&s->m, (pthread_mutexattr_t *)0);
Chris@10 97 pthread_cond_init(&s->c, (pthread_condattr_t *)0);
Chris@10 98
Chris@10 99 /* wrap initialization in lock to exploit the release
Chris@10 100 semantics of pthread_mutex_unlock() */
Chris@10 101 pthread_mutex_lock(&s->m);
Chris@10 102 s->x = 0;
Chris@10 103 pthread_mutex_unlock(&s->m);
Chris@10 104 }
Chris@10 105
Chris@10 106 static void os_sem_destroy(os_sem_t *s)
Chris@10 107 {
Chris@10 108 pthread_mutex_destroy(&s->m);
Chris@10 109 pthread_cond_destroy(&s->c);
Chris@10 110 }
Chris@10 111
Chris@10 112 static void os_sem_down(os_sem_t *s)
Chris@10 113 {
Chris@10 114 pthread_mutex_lock(&s->m);
Chris@10 115 while (s->x <= 0)
Chris@10 116 pthread_cond_wait(&s->c, &s->m);
Chris@10 117 --s->x;
Chris@10 118 pthread_mutex_unlock(&s->m);
Chris@10 119 }
Chris@10 120
Chris@10 121 static void os_sem_up(os_sem_t *s)
Chris@10 122 {
Chris@10 123 pthread_mutex_lock(&s->m);
Chris@10 124 ++s->x;
Chris@10 125 pthread_cond_signal(&s->c);
Chris@10 126 pthread_mutex_unlock(&s->m);
Chris@10 127 }
Chris@10 128
Chris@10 129 #endif
Chris@10 130
Chris@10 131 #define FFTW_WORKER void *
Chris@10 132
Chris@10 133 static void os_create_thread(FFTW_WORKER (*worker)(void *arg),
Chris@10 134 void *arg)
Chris@10 135 {
Chris@10 136 pthread_attr_t attr;
Chris@10 137 pthread_t tid;
Chris@10 138
Chris@10 139 pthread_attr_init(&attr);
Chris@10 140 pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
Chris@10 141 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
Chris@10 142
Chris@10 143 pthread_create(&tid, &attr, worker, (void *)arg);
Chris@10 144 pthread_attr_destroy(&attr);
Chris@10 145 }
Chris@10 146
Chris@10 147 static void os_destroy_thread(void)
Chris@10 148 {
Chris@10 149 pthread_exit((void *)0);
Chris@10 150 }
Chris@10 151
Chris@10 152 #elif defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS)
Chris@10 153 /* hack: windef.h defines INT for its own purposes and this causes
Chris@10 154 a conflict with our own INT in ifftw.h. Divert the windows
Chris@10 155 definition into another name unlikely to cause a conflict */
Chris@10 156 #define INT magnus_ab_INTegro_seclorum_nascitur_ordo
Chris@10 157 #include <windows.h>
Chris@10 158 #include <process.h>
Chris@10 159 #undef INT
Chris@10 160
Chris@10 161 typedef HANDLE os_mutex_t;
Chris@10 162
Chris@10 163 static void os_mutex_init(os_mutex_t *s)
Chris@10 164 {
Chris@10 165 *s = CreateMutex(NULL, FALSE, NULL);
Chris@10 166 }
Chris@10 167
Chris@10 168 static void os_mutex_destroy(os_mutex_t *s)
Chris@10 169 {
Chris@10 170 CloseHandle(*s);
Chris@10 171 }
Chris@10 172
Chris@10 173 static void os_mutex_lock(os_mutex_t *s)
Chris@10 174 {
Chris@10 175 WaitForSingleObject(*s, INFINITE);
Chris@10 176 }
Chris@10 177
Chris@10 178 static void os_mutex_unlock(os_mutex_t *s)
Chris@10 179 {
Chris@10 180 ReleaseMutex(*s);
Chris@10 181 }
Chris@10 182
Chris@10 183 typedef HANDLE os_sem_t;
Chris@10 184
Chris@10 185 static void os_sem_init(os_sem_t *s)
Chris@10 186 {
Chris@10 187 *s = CreateSemaphore(NULL, 0, 0x7FFFFFFFL, NULL);
Chris@10 188 }
Chris@10 189
Chris@10 190 static void os_sem_destroy(os_sem_t *s)
Chris@10 191 {
Chris@10 192 CloseHandle(*s);
Chris@10 193 }
Chris@10 194
Chris@10 195 static void os_sem_down(os_sem_t *s)
Chris@10 196 {
Chris@10 197 WaitForSingleObject(*s, INFINITE);
Chris@10 198 }
Chris@10 199
Chris@10 200 static void os_sem_up(os_sem_t *s)
Chris@10 201 {
Chris@10 202 ReleaseSemaphore(*s, 1, NULL);
Chris@10 203 }
Chris@10 204
Chris@10 205 #define FFTW_WORKER unsigned __stdcall
Chris@10 206 typedef unsigned (__stdcall *winthread_start) (void *);
Chris@10 207
Chris@10 208 static void os_create_thread(winthread_start worker,
Chris@10 209 void *arg)
Chris@10 210 {
Chris@10 211 _beginthreadex((void *)NULL, /* security attrib */
Chris@10 212 0, /* stack size */
Chris@10 213 worker, /* start address */
Chris@10 214 arg, /* parameters */
Chris@10 215 0, /* creation flags */
Chris@10 216 (unsigned *)NULL); /* tid */
Chris@10 217 }
Chris@10 218
Chris@10 219 static void os_destroy_thread(void)
Chris@10 220 {
Chris@10 221 _endthreadex(0);
Chris@10 222 }
Chris@10 223
Chris@10 224
Chris@10 225 #else
Chris@10 226 #error "No threading layer defined"
Chris@10 227 #endif
Chris@10 228
Chris@10 229 /************************************************************************/
Chris@10 230
Chris@10 231 /* Main code: */
Chris@10 232 struct worker {
Chris@10 233 os_sem_t ready;
Chris@10 234 os_sem_t done;
Chris@10 235 struct work *w;
Chris@10 236 struct worker *cdr;
Chris@10 237 };
Chris@10 238
Chris@10 239 static struct worker *make_worker(void)
Chris@10 240 {
Chris@10 241 struct worker *q = (struct worker *)MALLOC(sizeof(*q), OTHER);
Chris@10 242 os_sem_init(&q->ready);
Chris@10 243 os_sem_init(&q->done);
Chris@10 244 return q;
Chris@10 245 }
Chris@10 246
Chris@10 247 static void unmake_worker(struct worker *q)
Chris@10 248 {
Chris@10 249 os_sem_destroy(&q->done);
Chris@10 250 os_sem_destroy(&q->ready);
Chris@10 251 X(ifree)(q);
Chris@10 252 }
Chris@10 253
Chris@10 254 struct work {
Chris@10 255 spawn_function proc;
Chris@10 256 spawn_data d;
Chris@10 257 struct worker *q; /* the worker responsible for performing this work */
Chris@10 258 };
Chris@10 259
Chris@10 260 static os_mutex_t queue_lock;
Chris@10 261 static os_sem_t termination_semaphore;
Chris@10 262
Chris@10 263 static struct worker *worker_queue;
Chris@10 264 #define WITH_QUEUE_LOCK(what) \
Chris@10 265 { \
Chris@10 266 os_mutex_lock(&queue_lock); \
Chris@10 267 what; \
Chris@10 268 os_mutex_unlock(&queue_lock); \
Chris@10 269 }
Chris@10 270
Chris@10 271 static FFTW_WORKER worker(void *arg)
Chris@10 272 {
Chris@10 273 struct worker *ego = (struct worker *)arg;
Chris@10 274 struct work *w;
Chris@10 275
Chris@10 276 for (;;) {
Chris@10 277 /* wait until work becomes available */
Chris@10 278 os_sem_down(&ego->ready);
Chris@10 279
Chris@10 280 w = ego->w;
Chris@10 281
Chris@10 282 /* !w->proc ==> terminate worker */
Chris@10 283 if (!w->proc) break;
Chris@10 284
Chris@10 285 /* do the work */
Chris@10 286 w->proc(&w->d);
Chris@10 287
Chris@10 288 /* signal that work is done */
Chris@10 289 os_sem_up(&ego->done);
Chris@10 290 }
Chris@10 291
Chris@10 292 /* termination protocol */
Chris@10 293 os_sem_up(&termination_semaphore);
Chris@10 294
Chris@10 295 os_destroy_thread();
Chris@10 296 /* UNREACHABLE */
Chris@10 297 return 0;
Chris@10 298 }
Chris@10 299
Chris@10 300 static void enqueue(struct worker *q)
Chris@10 301 {
Chris@10 302 WITH_QUEUE_LOCK({
Chris@10 303 q->cdr = worker_queue;
Chris@10 304 worker_queue = q;
Chris@10 305 });
Chris@10 306 }
Chris@10 307
Chris@10 308 static struct worker *dequeue(void)
Chris@10 309 {
Chris@10 310 struct worker *q;
Chris@10 311
Chris@10 312 WITH_QUEUE_LOCK({
Chris@10 313 q = worker_queue;
Chris@10 314 if (q)
Chris@10 315 worker_queue = q->cdr;
Chris@10 316 });
Chris@10 317
Chris@10 318 if (!q) {
Chris@10 319 /* no worker is available. Create one */
Chris@10 320 q = make_worker();
Chris@10 321 os_create_thread(worker, q);
Chris@10 322 }
Chris@10 323
Chris@10 324 return q;
Chris@10 325 }
Chris@10 326
Chris@10 327
Chris@10 328 static void kill_workforce(void)
Chris@10 329 {
Chris@10 330 struct work w;
Chris@10 331
Chris@10 332 w.proc = 0;
Chris@10 333
Chris@10 334 THREAD_ON; /* needed for debugging mode: since make_worker
Chris@10 335 is called from dequeue which is only called in
Chris@10 336 thread_on mode, we need to unmake_worker in thread_on. */
Chris@10 337 WITH_QUEUE_LOCK({
Chris@10 338 /* tell all workers that they must terminate.
Chris@10 339
Chris@10 340 Because workers enqueue themselves before signaling the
Chris@10 341 completion of the work, all workers belong to the worker queue
Chris@10 342 if we get here. Also, all workers are waiting at
Chris@10 343 os_sem_down(ready), so we can hold the queue lock without
Chris@10 344 deadlocking */
Chris@10 345 while (worker_queue) {
Chris@10 346 struct worker *q = worker_queue;
Chris@10 347 worker_queue = q->cdr;
Chris@10 348 q->w = &w;
Chris@10 349 os_sem_up(&q->ready);
Chris@10 350 os_sem_down(&termination_semaphore);
Chris@10 351 unmake_worker(q);
Chris@10 352 }
Chris@10 353 });
Chris@10 354 THREAD_OFF;
Chris@10 355 }
Chris@10 356
Chris@10 357 int X(ithreads_init)(void)
Chris@10 358 {
Chris@10 359 os_mutex_init(&queue_lock);
Chris@10 360 os_sem_init(&termination_semaphore);
Chris@10 361
Chris@10 362 WITH_QUEUE_LOCK({
Chris@10 363 worker_queue = 0;
Chris@10 364 })
Chris@10 365
Chris@10 366 return 0; /* no error */
Chris@10 367 }
Chris@10 368
Chris@10 369 /* Distribute a loop from 0 to loopmax-1 over nthreads threads.
Chris@10 370 proc(d) is called to execute a block of iterations from d->min
Chris@10 371 to d->max-1. d->thr_num indicate the number of the thread
Chris@10 372 that is executing proc (from 0 to nthreads-1), and d->data is
Chris@10 373 the same as the data parameter passed to X(spawn_loop).
Chris@10 374
Chris@10 375 This function returns only after all the threads have completed. */
Chris@10 376 void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data)
Chris@10 377 {
Chris@10 378 int block_size;
Chris@10 379 struct work *r;
Chris@10 380 int i;
Chris@10 381
Chris@10 382 A(loopmax >= 0);
Chris@10 383 A(nthr > 0);
Chris@10 384 A(proc);
Chris@10 385
Chris@10 386 if (!loopmax) return;
Chris@10 387
Chris@10 388 /* Choose the block size and number of threads in order to (1)
Chris@10 389 minimize the critical path and (2) use the fewest threads that
Chris@10 390 achieve the same critical path (to minimize overhead).
Chris@10 391 e.g. if loopmax is 5 and nthr is 4, we should use only 3
Chris@10 392 threads with block sizes of 2, 2, and 1. */
Chris@10 393 block_size = (loopmax + nthr - 1) / nthr;
Chris@10 394 nthr = (loopmax + block_size - 1) / block_size;
Chris@10 395
Chris@10 396 THREAD_ON; /* prevent debugging mode from failing under threads */
Chris@10 397 STACK_MALLOC(struct work *, r, sizeof(struct work) * nthr);
Chris@10 398
Chris@10 399 /* distribute work: */
Chris@10 400 for (i = 0; i < nthr; ++i) {
Chris@10 401 struct work *w = &r[i];
Chris@10 402 spawn_data *d = &w->d;
Chris@10 403
Chris@10 404 d->max = (d->min = i * block_size) + block_size;
Chris@10 405 if (d->max > loopmax)
Chris@10 406 d->max = loopmax;
Chris@10 407 d->thr_num = i;
Chris@10 408 d->data = data;
Chris@10 409 w->proc = proc;
Chris@10 410
Chris@10 411 if (i == nthr - 1) {
Chris@10 412 /* do the work ourselves */
Chris@10 413 proc(d);
Chris@10 414 } else {
Chris@10 415 /* assign a worker to W */
Chris@10 416 w->q = dequeue();
Chris@10 417
Chris@10 418 /* tell worker w->q to do it */
Chris@10 419 w->q->w = w; /* Dirac could have written this */
Chris@10 420 os_sem_up(&w->q->ready);
Chris@10 421 }
Chris@10 422 }
Chris@10 423
Chris@10 424 for (i = 0; i < nthr - 1; ++i) {
Chris@10 425 struct work *w = &r[i];
Chris@10 426 os_sem_down(&w->q->done);
Chris@10 427 enqueue(w->q);
Chris@10 428 }
Chris@10 429
Chris@10 430 STACK_FREE(r);
Chris@10 431 THREAD_OFF; /* prevent debugging mode from failing under threads */
Chris@10 432 }
Chris@10 433
Chris@10 434 void X(threads_cleanup)(void)
Chris@10 435 {
Chris@10 436 kill_workforce();
Chris@10 437 os_mutex_destroy(&queue_lock);
Chris@10 438 os_sem_destroy(&termination_semaphore);
Chris@10 439 }