annotate fft/fftw/fftw-3.3.4/threads/threads.c @ 40:223f770b5341 kissfft-double tip

Try a double-precision kissfft
author Chris Cannam
date Wed, 07 Sep 2016 10:40:32 +0100
parents 26056e866c29
children
rev   line source
Chris@19 1 /*
Chris@19 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@19 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@19 4 *
Chris@19 5 * This program is free software; you can redistribute it and/or modify
Chris@19 6 * it under the terms of the GNU General Public License as published by
Chris@19 7 * the Free Software Foundation; either version 2 of the License, or
Chris@19 8 * (at your option) any later version.
Chris@19 9 *
Chris@19 10 * This program is distributed in the hope that it will be useful,
Chris@19 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@19 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@19 13 * GNU General Public License for more details.
Chris@19 14 *
Chris@19 15 * You should have received a copy of the GNU General Public License
Chris@19 16 * along with this program; if not, write to the Free Software
Chris@19 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@19 18 *
Chris@19 19 */
Chris@19 20
Chris@19 21 /* threads.c: Portable thread spawning for loops, via the X(spawn_loop)
Chris@19 22 function. The first portion of this file is a set of macros to
Chris@19 23 spawn and join threads on various systems. */
Chris@19 24
Chris@19 25 #include "threads.h"
Chris@19 26
Chris@19 27 #if defined(USING_POSIX_THREADS)
Chris@19 28
Chris@19 29 #include <pthread.h>
Chris@19 30
Chris@19 31 #ifdef HAVE_UNISTD_H
Chris@19 32 # include <unistd.h>
Chris@19 33 #endif
Chris@19 34
Chris@19 35 /* imlementation of semaphores and mutexes: */
Chris@19 36 #if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES >= 200112L))
Chris@19 37
Chris@19 38 /* If optional POSIX semaphores are supported, use them to
Chris@19 39 implement both semaphores and mutexes. */
Chris@19 40 # include <semaphore.h>
Chris@19 41 # include <errno.h>
Chris@19 42
Chris@19 43 typedef sem_t os_sem_t;
Chris@19 44
Chris@19 45 static void os_sem_init(os_sem_t *s) { sem_init(s, 0, 0); }
Chris@19 46 static void os_sem_destroy(os_sem_t *s) { sem_destroy(s); }
Chris@19 47
Chris@19 48 static void os_sem_down(os_sem_t *s)
Chris@19 49 {
Chris@19 50 int err;
Chris@19 51 do {
Chris@19 52 err = sem_wait(s);
Chris@19 53 } while (err == -1 && errno == EINTR);
Chris@19 54 CK(err == 0);
Chris@19 55 }
Chris@19 56
Chris@19 57 static void os_sem_up(os_sem_t *s) { sem_post(s); }
Chris@19 58
Chris@19 59 /*
Chris@19 60 The reason why we use sem_t to implement mutexes is that I have
Chris@19 61 seen mysterious hangs with glibc-2.7 and linux-2.6.22 when using
Chris@19 62 pthread_mutex_t, but no hangs with sem_t or with linux >=
Chris@19 63 2.6.24. For lack of better information, sem_t looks like the
Chris@19 64 safest choice.
Chris@19 65 */
Chris@19 66 typedef sem_t os_mutex_t;
Chris@19 67 static void os_mutex_init(os_mutex_t *s) { sem_init(s, 0, 1); }
Chris@19 68 #define os_mutex_destroy os_sem_destroy
Chris@19 69 #define os_mutex_lock os_sem_down
Chris@19 70 #define os_mutex_unlock os_sem_up
Chris@19 71
Chris@19 72 #else
Chris@19 73
Chris@19 74 /* If optional POSIX semaphores are not defined, use pthread
Chris@19 75 mutexes for mutexes, and simulate semaphores with condition
Chris@19 76 variables */
Chris@19 77 typedef pthread_mutex_t os_mutex_t;
Chris@19 78
Chris@19 79 static void os_mutex_init(os_mutex_t *s)
Chris@19 80 {
Chris@19 81 pthread_mutex_init(s, (pthread_mutexattr_t *)0);
Chris@19 82 }
Chris@19 83
Chris@19 84 static void os_mutex_destroy(os_mutex_t *s) { pthread_mutex_destroy(s); }
Chris@19 85 static void os_mutex_lock(os_mutex_t *s) { pthread_mutex_lock(s); }
Chris@19 86 static void os_mutex_unlock(os_mutex_t *s) { pthread_mutex_unlock(s); }
Chris@19 87
Chris@19 88 typedef struct {
Chris@19 89 pthread_mutex_t m;
Chris@19 90 pthread_cond_t c;
Chris@19 91 volatile int x;
Chris@19 92 } os_sem_t;
Chris@19 93
Chris@19 94 static void os_sem_init(os_sem_t *s)
Chris@19 95 {
Chris@19 96 pthread_mutex_init(&s->m, (pthread_mutexattr_t *)0);
Chris@19 97 pthread_cond_init(&s->c, (pthread_condattr_t *)0);
Chris@19 98
Chris@19 99 /* wrap initialization in lock to exploit the release
Chris@19 100 semantics of pthread_mutex_unlock() */
Chris@19 101 pthread_mutex_lock(&s->m);
Chris@19 102 s->x = 0;
Chris@19 103 pthread_mutex_unlock(&s->m);
Chris@19 104 }
Chris@19 105
Chris@19 106 static void os_sem_destroy(os_sem_t *s)
Chris@19 107 {
Chris@19 108 pthread_mutex_destroy(&s->m);
Chris@19 109 pthread_cond_destroy(&s->c);
Chris@19 110 }
Chris@19 111
Chris@19 112 static void os_sem_down(os_sem_t *s)
Chris@19 113 {
Chris@19 114 pthread_mutex_lock(&s->m);
Chris@19 115 while (s->x <= 0)
Chris@19 116 pthread_cond_wait(&s->c, &s->m);
Chris@19 117 --s->x;
Chris@19 118 pthread_mutex_unlock(&s->m);
Chris@19 119 }
Chris@19 120
Chris@19 121 static void os_sem_up(os_sem_t *s)
Chris@19 122 {
Chris@19 123 pthread_mutex_lock(&s->m);
Chris@19 124 ++s->x;
Chris@19 125 pthread_cond_signal(&s->c);
Chris@19 126 pthread_mutex_unlock(&s->m);
Chris@19 127 }
Chris@19 128
Chris@19 129 #endif
Chris@19 130
Chris@19 131 #define FFTW_WORKER void *
Chris@19 132
Chris@19 133 static void os_create_thread(FFTW_WORKER (*worker)(void *arg),
Chris@19 134 void *arg)
Chris@19 135 {
Chris@19 136 pthread_attr_t attr;
Chris@19 137 pthread_t tid;
Chris@19 138
Chris@19 139 pthread_attr_init(&attr);
Chris@19 140 pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
Chris@19 141 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
Chris@19 142
Chris@19 143 pthread_create(&tid, &attr, worker, (void *)arg);
Chris@19 144 pthread_attr_destroy(&attr);
Chris@19 145 }
Chris@19 146
Chris@19 147 static void os_destroy_thread(void)
Chris@19 148 {
Chris@19 149 pthread_exit((void *)0);
Chris@19 150 }
Chris@19 151
Chris@19 152 #elif defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS)
Chris@19 153 /* hack: windef.h defines INT for its own purposes and this causes
Chris@19 154 a conflict with our own INT in ifftw.h. Divert the windows
Chris@19 155 definition into another name unlikely to cause a conflict */
Chris@19 156 #define INT magnus_ab_INTegro_seclorum_nascitur_ordo
Chris@19 157 #include <windows.h>
Chris@19 158 #include <process.h>
Chris@19 159 #undef INT
Chris@19 160
Chris@19 161 typedef HANDLE os_mutex_t;
Chris@19 162
Chris@19 163 static void os_mutex_init(os_mutex_t *s)
Chris@19 164 {
Chris@19 165 *s = CreateMutex(NULL, FALSE, NULL);
Chris@19 166 }
Chris@19 167
Chris@19 168 static void os_mutex_destroy(os_mutex_t *s)
Chris@19 169 {
Chris@19 170 CloseHandle(*s);
Chris@19 171 }
Chris@19 172
Chris@19 173 static void os_mutex_lock(os_mutex_t *s)
Chris@19 174 {
Chris@19 175 WaitForSingleObject(*s, INFINITE);
Chris@19 176 }
Chris@19 177
Chris@19 178 static void os_mutex_unlock(os_mutex_t *s)
Chris@19 179 {
Chris@19 180 ReleaseMutex(*s);
Chris@19 181 }
Chris@19 182
Chris@19 183 typedef HANDLE os_sem_t;
Chris@19 184
Chris@19 185 static void os_sem_init(os_sem_t *s)
Chris@19 186 {
Chris@19 187 *s = CreateSemaphore(NULL, 0, 0x7FFFFFFFL, NULL);
Chris@19 188 }
Chris@19 189
Chris@19 190 static void os_sem_destroy(os_sem_t *s)
Chris@19 191 {
Chris@19 192 CloseHandle(*s);
Chris@19 193 }
Chris@19 194
Chris@19 195 static void os_sem_down(os_sem_t *s)
Chris@19 196 {
Chris@19 197 WaitForSingleObject(*s, INFINITE);
Chris@19 198 }
Chris@19 199
Chris@19 200 static void os_sem_up(os_sem_t *s)
Chris@19 201 {
Chris@19 202 ReleaseSemaphore(*s, 1, NULL);
Chris@19 203 }
Chris@19 204
Chris@19 205 #define FFTW_WORKER unsigned __stdcall
Chris@19 206 typedef unsigned (__stdcall *winthread_start) (void *);
Chris@19 207
Chris@19 208 static void os_create_thread(winthread_start worker,
Chris@19 209 void *arg)
Chris@19 210 {
Chris@19 211 _beginthreadex((void *)NULL, /* security attrib */
Chris@19 212 0, /* stack size */
Chris@19 213 worker, /* start address */
Chris@19 214 arg, /* parameters */
Chris@19 215 0, /* creation flags */
Chris@19 216 (unsigned *)NULL); /* tid */
Chris@19 217 }
Chris@19 218
Chris@19 219 static void os_destroy_thread(void)
Chris@19 220 {
Chris@19 221 _endthreadex(0);
Chris@19 222 }
Chris@19 223
Chris@19 224
Chris@19 225 #else
Chris@19 226 #error "No threading layer defined"
Chris@19 227 #endif
Chris@19 228
Chris@19 229 /************************************************************************/
Chris@19 230
Chris@19 231 /* Main code: */
Chris@19 232 struct worker {
Chris@19 233 os_sem_t ready;
Chris@19 234 os_sem_t done;
Chris@19 235 struct work *w;
Chris@19 236 struct worker *cdr;
Chris@19 237 };
Chris@19 238
Chris@19 239 static struct worker *make_worker(void)
Chris@19 240 {
Chris@19 241 struct worker *q = (struct worker *)MALLOC(sizeof(*q), OTHER);
Chris@19 242 os_sem_init(&q->ready);
Chris@19 243 os_sem_init(&q->done);
Chris@19 244 return q;
Chris@19 245 }
Chris@19 246
Chris@19 247 static void unmake_worker(struct worker *q)
Chris@19 248 {
Chris@19 249 os_sem_destroy(&q->done);
Chris@19 250 os_sem_destroy(&q->ready);
Chris@19 251 X(ifree)(q);
Chris@19 252 }
Chris@19 253
Chris@19 254 struct work {
Chris@19 255 spawn_function proc;
Chris@19 256 spawn_data d;
Chris@19 257 struct worker *q; /* the worker responsible for performing this work */
Chris@19 258 };
Chris@19 259
Chris@19 260 static os_mutex_t queue_lock;
Chris@19 261 static os_sem_t termination_semaphore;
Chris@19 262
Chris@19 263 static struct worker *worker_queue;
Chris@19 264 #define WITH_QUEUE_LOCK(what) \
Chris@19 265 { \
Chris@19 266 os_mutex_lock(&queue_lock); \
Chris@19 267 what; \
Chris@19 268 os_mutex_unlock(&queue_lock); \
Chris@19 269 }
Chris@19 270
Chris@19 271 static FFTW_WORKER worker(void *arg)
Chris@19 272 {
Chris@19 273 struct worker *ego = (struct worker *)arg;
Chris@19 274 struct work *w;
Chris@19 275
Chris@19 276 for (;;) {
Chris@19 277 /* wait until work becomes available */
Chris@19 278 os_sem_down(&ego->ready);
Chris@19 279
Chris@19 280 w = ego->w;
Chris@19 281
Chris@19 282 /* !w->proc ==> terminate worker */
Chris@19 283 if (!w->proc) break;
Chris@19 284
Chris@19 285 /* do the work */
Chris@19 286 w->proc(&w->d);
Chris@19 287
Chris@19 288 /* signal that work is done */
Chris@19 289 os_sem_up(&ego->done);
Chris@19 290 }
Chris@19 291
Chris@19 292 /* termination protocol */
Chris@19 293 os_sem_up(&termination_semaphore);
Chris@19 294
Chris@19 295 os_destroy_thread();
Chris@19 296 /* UNREACHABLE */
Chris@19 297 return 0;
Chris@19 298 }
Chris@19 299
Chris@19 300 static void enqueue(struct worker *q)
Chris@19 301 {
Chris@19 302 WITH_QUEUE_LOCK({
Chris@19 303 q->cdr = worker_queue;
Chris@19 304 worker_queue = q;
Chris@19 305 });
Chris@19 306 }
Chris@19 307
Chris@19 308 static struct worker *dequeue(void)
Chris@19 309 {
Chris@19 310 struct worker *q;
Chris@19 311
Chris@19 312 WITH_QUEUE_LOCK({
Chris@19 313 q = worker_queue;
Chris@19 314 if (q)
Chris@19 315 worker_queue = q->cdr;
Chris@19 316 });
Chris@19 317
Chris@19 318 if (!q) {
Chris@19 319 /* no worker is available. Create one */
Chris@19 320 q = make_worker();
Chris@19 321 os_create_thread(worker, q);
Chris@19 322 }
Chris@19 323
Chris@19 324 return q;
Chris@19 325 }
Chris@19 326
Chris@19 327
Chris@19 328 static void kill_workforce(void)
Chris@19 329 {
Chris@19 330 struct work w;
Chris@19 331
Chris@19 332 w.proc = 0;
Chris@19 333
Chris@19 334 THREAD_ON; /* needed for debugging mode: since make_worker
Chris@19 335 is called from dequeue which is only called in
Chris@19 336 thread_on mode, we need to unmake_worker in thread_on. */
Chris@19 337 WITH_QUEUE_LOCK({
Chris@19 338 /* tell all workers that they must terminate.
Chris@19 339
Chris@19 340 Because workers enqueue themselves before signaling the
Chris@19 341 completion of the work, all workers belong to the worker queue
Chris@19 342 if we get here. Also, all workers are waiting at
Chris@19 343 os_sem_down(ready), so we can hold the queue lock without
Chris@19 344 deadlocking */
Chris@19 345 while (worker_queue) {
Chris@19 346 struct worker *q = worker_queue;
Chris@19 347 worker_queue = q->cdr;
Chris@19 348 q->w = &w;
Chris@19 349 os_sem_up(&q->ready);
Chris@19 350 os_sem_down(&termination_semaphore);
Chris@19 351 unmake_worker(q);
Chris@19 352 }
Chris@19 353 });
Chris@19 354 THREAD_OFF;
Chris@19 355 }
Chris@19 356
Chris@19 357 int X(ithreads_init)(void)
Chris@19 358 {
Chris@19 359 os_mutex_init(&queue_lock);
Chris@19 360 os_sem_init(&termination_semaphore);
Chris@19 361
Chris@19 362 WITH_QUEUE_LOCK({
Chris@19 363 worker_queue = 0;
Chris@19 364 })
Chris@19 365
Chris@19 366 return 0; /* no error */
Chris@19 367 }
Chris@19 368
Chris@19 369 /* Distribute a loop from 0 to loopmax-1 over nthreads threads.
Chris@19 370 proc(d) is called to execute a block of iterations from d->min
Chris@19 371 to d->max-1. d->thr_num indicate the number of the thread
Chris@19 372 that is executing proc (from 0 to nthreads-1), and d->data is
Chris@19 373 the same as the data parameter passed to X(spawn_loop).
Chris@19 374
Chris@19 375 This function returns only after all the threads have completed. */
Chris@19 376 void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data)
Chris@19 377 {
Chris@19 378 int block_size;
Chris@19 379 struct work *r;
Chris@19 380 int i;
Chris@19 381
Chris@19 382 A(loopmax >= 0);
Chris@19 383 A(nthr > 0);
Chris@19 384 A(proc);
Chris@19 385
Chris@19 386 if (!loopmax) return;
Chris@19 387
Chris@19 388 /* Choose the block size and number of threads in order to (1)
Chris@19 389 minimize the critical path and (2) use the fewest threads that
Chris@19 390 achieve the same critical path (to minimize overhead).
Chris@19 391 e.g. if loopmax is 5 and nthr is 4, we should use only 3
Chris@19 392 threads with block sizes of 2, 2, and 1. */
Chris@19 393 block_size = (loopmax + nthr - 1) / nthr;
Chris@19 394 nthr = (loopmax + block_size - 1) / block_size;
Chris@19 395
Chris@19 396 THREAD_ON; /* prevent debugging mode from failing under threads */
Chris@19 397 STACK_MALLOC(struct work *, r, sizeof(struct work) * nthr);
Chris@19 398
Chris@19 399 /* distribute work: */
Chris@19 400 for (i = 0; i < nthr; ++i) {
Chris@19 401 struct work *w = &r[i];
Chris@19 402 spawn_data *d = &w->d;
Chris@19 403
Chris@19 404 d->max = (d->min = i * block_size) + block_size;
Chris@19 405 if (d->max > loopmax)
Chris@19 406 d->max = loopmax;
Chris@19 407 d->thr_num = i;
Chris@19 408 d->data = data;
Chris@19 409 w->proc = proc;
Chris@19 410
Chris@19 411 if (i == nthr - 1) {
Chris@19 412 /* do the work ourselves */
Chris@19 413 proc(d);
Chris@19 414 } else {
Chris@19 415 /* assign a worker to W */
Chris@19 416 w->q = dequeue();
Chris@19 417
Chris@19 418 /* tell worker w->q to do it */
Chris@19 419 w->q->w = w; /* Dirac could have written this */
Chris@19 420 os_sem_up(&w->q->ready);
Chris@19 421 }
Chris@19 422 }
Chris@19 423
Chris@19 424 for (i = 0; i < nthr - 1; ++i) {
Chris@19 425 struct work *w = &r[i];
Chris@19 426 os_sem_down(&w->q->done);
Chris@19 427 enqueue(w->q);
Chris@19 428 }
Chris@19 429
Chris@19 430 STACK_FREE(r);
Chris@19 431 THREAD_OFF; /* prevent debugging mode from failing under threads */
Chris@19 432 }
Chris@19 433
Chris@19 434 void X(threads_cleanup)(void)
Chris@19 435 {
Chris@19 436 kill_workforce();
Chris@19 437 os_mutex_destroy(&queue_lock);
Chris@19 438 os_sem_destroy(&termination_semaphore);
Chris@19 439 }