cannam@127: /* cannam@127: * Copyright (c) 2003, 2007-14 Matteo Frigo cannam@127: * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology cannam@127: * cannam@127: * This program is free software; you can redistribute it and/or modify cannam@127: * it under the terms of the GNU General Public License as published by cannam@127: * the Free Software Foundation; either version 2 of the License, or cannam@127: * (at your option) any later version. cannam@127: * cannam@127: * This program is distributed in the hope that it will be useful, cannam@127: * but WITHOUT ANY WARRANTY; without even the implied warranty of cannam@127: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the cannam@127: * GNU General Public License for more details. cannam@127: * cannam@127: * You should have received a copy of the GNU General Public License cannam@127: * along with this program; if not, write to the Free Software cannam@127: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA cannam@127: * cannam@127: */ cannam@127: cannam@127: /* threads.c: Portable thread spawning for loops, via the X(spawn_loop) cannam@127: function. The first portion of this file is a set of macros to cannam@127: spawn and join threads on various systems. */ cannam@127: cannam@127: #include "threads.h" cannam@127: #include "api.h" cannam@127: cannam@127: #if defined(USING_POSIX_THREADS) cannam@127: cannam@127: #include cannam@127: cannam@127: #ifdef HAVE_UNISTD_H cannam@127: # include cannam@127: #endif cannam@127: cannam@127: /* imlementation of semaphores and mutexes: */ cannam@127: #if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES >= 200112L)) cannam@127: cannam@127: /* If optional POSIX semaphores are supported, use them to cannam@127: implement both semaphores and mutexes. */ cannam@127: # include cannam@127: # include cannam@127: cannam@127: typedef sem_t os_sem_t; cannam@127: cannam@127: static void os_sem_init(os_sem_t *s) { sem_init(s, 0, 0); } cannam@127: static void os_sem_destroy(os_sem_t *s) { sem_destroy(s); } cannam@127: cannam@127: static void os_sem_down(os_sem_t *s) cannam@127: { cannam@127: int err; cannam@127: do { cannam@127: err = sem_wait(s); cannam@127: } while (err == -1 && errno == EINTR); cannam@127: CK(err == 0); cannam@127: } cannam@127: cannam@127: static void os_sem_up(os_sem_t *s) { sem_post(s); } cannam@127: cannam@127: /* cannam@127: The reason why we use sem_t to implement mutexes is that I have cannam@127: seen mysterious hangs with glibc-2.7 and linux-2.6.22 when using cannam@127: pthread_mutex_t, but no hangs with sem_t or with linux >= cannam@127: 2.6.24. For lack of better information, sem_t looks like the cannam@127: safest choice. cannam@127: */ cannam@127: typedef sem_t os_mutex_t; cannam@127: static void os_mutex_init(os_mutex_t *s) { sem_init(s, 0, 1); } cannam@127: #define os_mutex_destroy os_sem_destroy cannam@127: #define os_mutex_lock os_sem_down cannam@127: #define os_mutex_unlock os_sem_up cannam@127: cannam@127: #else cannam@127: cannam@127: /* If optional POSIX semaphores are not defined, use pthread cannam@127: mutexes for mutexes, and simulate semaphores with condition cannam@127: variables */ cannam@127: typedef pthread_mutex_t os_mutex_t; cannam@127: cannam@127: static void os_mutex_init(os_mutex_t *s) cannam@127: { cannam@127: pthread_mutex_init(s, (pthread_mutexattr_t *)0); cannam@127: } cannam@127: cannam@127: static void os_mutex_destroy(os_mutex_t *s) { pthread_mutex_destroy(s); } cannam@127: static void os_mutex_lock(os_mutex_t *s) { pthread_mutex_lock(s); } cannam@127: static void os_mutex_unlock(os_mutex_t *s) { pthread_mutex_unlock(s); } cannam@127: cannam@127: typedef struct { cannam@127: pthread_mutex_t m; cannam@127: pthread_cond_t c; cannam@127: volatile int x; cannam@127: } os_sem_t; cannam@127: cannam@127: static void os_sem_init(os_sem_t *s) cannam@127: { cannam@127: pthread_mutex_init(&s->m, (pthread_mutexattr_t *)0); cannam@127: pthread_cond_init(&s->c, (pthread_condattr_t *)0); cannam@127: cannam@127: /* wrap initialization in lock to exploit the release cannam@127: semantics of pthread_mutex_unlock() */ cannam@127: pthread_mutex_lock(&s->m); cannam@127: s->x = 0; cannam@127: pthread_mutex_unlock(&s->m); cannam@127: } cannam@127: cannam@127: static void os_sem_destroy(os_sem_t *s) cannam@127: { cannam@127: pthread_mutex_destroy(&s->m); cannam@127: pthread_cond_destroy(&s->c); cannam@127: } cannam@127: cannam@127: static void os_sem_down(os_sem_t *s) cannam@127: { cannam@127: pthread_mutex_lock(&s->m); cannam@127: while (s->x <= 0) cannam@127: pthread_cond_wait(&s->c, &s->m); cannam@127: --s->x; cannam@127: pthread_mutex_unlock(&s->m); cannam@127: } cannam@127: cannam@127: static void os_sem_up(os_sem_t *s) cannam@127: { cannam@127: pthread_mutex_lock(&s->m); cannam@127: ++s->x; cannam@127: pthread_cond_signal(&s->c); cannam@127: pthread_mutex_unlock(&s->m); cannam@127: } cannam@127: cannam@127: #endif cannam@127: cannam@127: #define FFTW_WORKER void * cannam@127: cannam@127: static void os_create_thread(FFTW_WORKER (*worker)(void *arg), cannam@127: void *arg) cannam@127: { cannam@127: pthread_attr_t attr; cannam@127: pthread_t tid; cannam@127: cannam@127: pthread_attr_init(&attr); cannam@127: pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); cannam@127: pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); cannam@127: cannam@127: pthread_create(&tid, &attr, worker, (void *)arg); cannam@127: pthread_attr_destroy(&attr); cannam@127: } cannam@127: cannam@127: static void os_destroy_thread(void) cannam@127: { cannam@127: pthread_exit((void *)0); cannam@127: } cannam@127: cannam@127: /* support for static mutexes */ cannam@127: typedef pthread_mutex_t os_static_mutex_t; cannam@127: #define OS_STATIC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER cannam@127: static void os_static_mutex_lock(os_static_mutex_t *s) { pthread_mutex_lock(s); } cannam@127: static void os_static_mutex_unlock(os_static_mutex_t *s) { pthread_mutex_unlock(s); } cannam@127: cannam@127: #elif defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS) cannam@127: /* hack: windef.h defines INT for its own purposes and this causes cannam@127: a conflict with our own INT in ifftw.h. Divert the windows cannam@127: definition into another name unlikely to cause a conflict */ cannam@127: #define INT magnus_ab_INTegro_seclorum_nascitur_ordo cannam@127: #include cannam@127: #include cannam@127: #undef INT cannam@127: cannam@127: typedef HANDLE os_mutex_t; cannam@127: cannam@127: static void os_mutex_init(os_mutex_t *s) cannam@127: { cannam@127: *s = CreateMutex(NULL, FALSE, NULL); cannam@127: } cannam@127: cannam@127: static void os_mutex_destroy(os_mutex_t *s) cannam@127: { cannam@127: CloseHandle(*s); cannam@127: } cannam@127: cannam@127: static void os_mutex_lock(os_mutex_t *s) cannam@127: { cannam@127: WaitForSingleObject(*s, INFINITE); cannam@127: } cannam@127: cannam@127: static void os_mutex_unlock(os_mutex_t *s) cannam@127: { cannam@127: ReleaseMutex(*s); cannam@127: } cannam@127: cannam@127: typedef HANDLE os_sem_t; cannam@127: cannam@127: static void os_sem_init(os_sem_t *s) cannam@127: { cannam@127: *s = CreateSemaphore(NULL, 0, 0x7FFFFFFFL, NULL); cannam@127: } cannam@127: cannam@127: static void os_sem_destroy(os_sem_t *s) cannam@127: { cannam@127: CloseHandle(*s); cannam@127: } cannam@127: cannam@127: static void os_sem_down(os_sem_t *s) cannam@127: { cannam@127: WaitForSingleObject(*s, INFINITE); cannam@127: } cannam@127: cannam@127: static void os_sem_up(os_sem_t *s) cannam@127: { cannam@127: ReleaseSemaphore(*s, 1, NULL); cannam@127: } cannam@127: cannam@127: #define FFTW_WORKER unsigned __stdcall cannam@127: typedef unsigned (__stdcall *winthread_start) (void *); cannam@127: cannam@127: static void os_create_thread(winthread_start worker, cannam@127: void *arg) cannam@127: { cannam@127: _beginthreadex((void *)NULL, /* security attrib */ cannam@127: 0, /* stack size */ cannam@127: worker, /* start address */ cannam@127: arg, /* parameters */ cannam@127: 0, /* creation flags */ cannam@127: (unsigned *)NULL); /* tid */ cannam@127: } cannam@127: cannam@127: static void os_destroy_thread(void) cannam@127: { cannam@127: _endthreadex(0); cannam@127: } cannam@127: cannam@127: /* windows does not have statically-initialized mutexes---fake a cannam@127: spinlock */ cannam@127: typedef volatile LONG os_static_mutex_t; cannam@127: #define OS_STATIC_MUTEX_INITIALIZER 0 cannam@127: static void os_static_mutex_lock(os_static_mutex_t *s) cannam@127: { cannam@127: while (InterlockedExchange(s, 1) == 1) { cannam@127: YieldProcessor(); cannam@127: Sleep(0); cannam@127: } cannam@127: } cannam@127: static void os_static_mutex_unlock(os_static_mutex_t *s) cannam@127: { cannam@127: LONG old = InterlockedExchange(s, 0); cannam@127: A(old == 1); cannam@127: } cannam@127: #else cannam@127: #error "No threading layer defined" cannam@127: #endif cannam@127: cannam@127: /************************************************************************/ cannam@127: cannam@127: /* Main code: */ cannam@127: struct worker { cannam@127: os_sem_t ready; cannam@127: os_sem_t done; cannam@127: struct work *w; cannam@127: struct worker *cdr; cannam@127: }; cannam@127: cannam@127: static struct worker *make_worker(void) cannam@127: { cannam@127: struct worker *q = (struct worker *)MALLOC(sizeof(*q), OTHER); cannam@127: os_sem_init(&q->ready); cannam@127: os_sem_init(&q->done); cannam@127: return q; cannam@127: } cannam@127: cannam@127: static void unmake_worker(struct worker *q) cannam@127: { cannam@127: os_sem_destroy(&q->done); cannam@127: os_sem_destroy(&q->ready); cannam@127: X(ifree)(q); cannam@127: } cannam@127: cannam@127: struct work { cannam@127: spawn_function proc; cannam@127: spawn_data d; cannam@127: struct worker *q; /* the worker responsible for performing this work */ cannam@127: }; cannam@127: cannam@127: static os_mutex_t queue_lock; cannam@127: static os_sem_t termination_semaphore; cannam@127: cannam@127: static struct worker *worker_queue; cannam@127: #define WITH_QUEUE_LOCK(what) \ cannam@127: { \ cannam@127: os_mutex_lock(&queue_lock); \ cannam@127: what; \ cannam@127: os_mutex_unlock(&queue_lock); \ cannam@127: } cannam@127: cannam@127: static FFTW_WORKER worker(void *arg) cannam@127: { cannam@127: struct worker *ego = (struct worker *)arg; cannam@127: struct work *w; cannam@127: cannam@127: for (;;) { cannam@127: /* wait until work becomes available */ cannam@127: os_sem_down(&ego->ready); cannam@127: cannam@127: w = ego->w; cannam@127: cannam@127: /* !w->proc ==> terminate worker */ cannam@127: if (!w->proc) break; cannam@127: cannam@127: /* do the work */ cannam@127: w->proc(&w->d); cannam@127: cannam@127: /* signal that work is done */ cannam@127: os_sem_up(&ego->done); cannam@127: } cannam@127: cannam@127: /* termination protocol */ cannam@127: os_sem_up(&termination_semaphore); cannam@127: cannam@127: os_destroy_thread(); cannam@127: /* UNREACHABLE */ cannam@127: return 0; cannam@127: } cannam@127: cannam@127: static void enqueue(struct worker *q) cannam@127: { cannam@127: WITH_QUEUE_LOCK({ cannam@127: q->cdr = worker_queue; cannam@127: worker_queue = q; cannam@127: }); cannam@127: } cannam@127: cannam@127: static struct worker *dequeue(void) cannam@127: { cannam@127: struct worker *q; cannam@127: cannam@127: WITH_QUEUE_LOCK({ cannam@127: q = worker_queue; cannam@127: if (q) cannam@127: worker_queue = q->cdr; cannam@127: }); cannam@127: cannam@127: if (!q) { cannam@127: /* no worker is available. Create one */ cannam@127: q = make_worker(); cannam@127: os_create_thread(worker, q); cannam@127: } cannam@127: cannam@127: return q; cannam@127: } cannam@127: cannam@127: cannam@127: static void kill_workforce(void) cannam@127: { cannam@127: struct work w; cannam@127: cannam@127: w.proc = 0; cannam@127: cannam@127: THREAD_ON; /* needed for debugging mode: since make_worker cannam@127: is called from dequeue which is only called in cannam@127: thread_on mode, we need to unmake_worker in thread_on. */ cannam@127: WITH_QUEUE_LOCK({ cannam@127: /* tell all workers that they must terminate. cannam@127: cannam@127: Because workers enqueue themselves before signaling the cannam@127: completion of the work, all workers belong to the worker queue cannam@127: if we get here. Also, all workers are waiting at cannam@127: os_sem_down(ready), so we can hold the queue lock without cannam@127: deadlocking */ cannam@127: while (worker_queue) { cannam@127: struct worker *q = worker_queue; cannam@127: worker_queue = q->cdr; cannam@127: q->w = &w; cannam@127: os_sem_up(&q->ready); cannam@127: os_sem_down(&termination_semaphore); cannam@127: unmake_worker(q); cannam@127: } cannam@127: }); cannam@127: THREAD_OFF; cannam@127: } cannam@127: cannam@127: static os_static_mutex_t initialization_mutex = OS_STATIC_MUTEX_INITIALIZER; cannam@127: cannam@127: int X(ithreads_init)(void) cannam@127: { cannam@127: os_static_mutex_lock(&initialization_mutex); { cannam@127: os_mutex_init(&queue_lock); cannam@127: os_sem_init(&termination_semaphore); cannam@127: cannam@127: WITH_QUEUE_LOCK({ cannam@127: worker_queue = 0; cannam@127: }); cannam@127: } os_static_mutex_unlock(&initialization_mutex); cannam@127: cannam@127: return 0; /* no error */ cannam@127: } cannam@127: cannam@127: /* Distribute a loop from 0 to loopmax-1 over nthreads threads. cannam@127: proc(d) is called to execute a block of iterations from d->min cannam@127: to d->max-1. d->thr_num indicate the number of the thread cannam@127: that is executing proc (from 0 to nthreads-1), and d->data is cannam@127: the same as the data parameter passed to X(spawn_loop). cannam@127: cannam@127: This function returns only after all the threads have completed. */ cannam@127: void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) cannam@127: { cannam@127: int block_size; cannam@127: struct work *r; cannam@127: int i; cannam@127: cannam@127: A(loopmax >= 0); cannam@127: A(nthr > 0); cannam@127: A(proc); cannam@127: cannam@127: if (!loopmax) return; cannam@127: cannam@127: /* Choose the block size and number of threads in order to (1) cannam@127: minimize the critical path and (2) use the fewest threads that cannam@127: achieve the same critical path (to minimize overhead). cannam@127: e.g. if loopmax is 5 and nthr is 4, we should use only 3 cannam@127: threads with block sizes of 2, 2, and 1. */ cannam@127: block_size = (loopmax + nthr - 1) / nthr; cannam@127: nthr = (loopmax + block_size - 1) / block_size; cannam@127: cannam@127: THREAD_ON; /* prevent debugging mode from failing under threads */ cannam@127: STACK_MALLOC(struct work *, r, sizeof(struct work) * nthr); cannam@127: cannam@127: /* distribute work: */ cannam@127: for (i = 0; i < nthr; ++i) { cannam@127: struct work *w = &r[i]; cannam@127: spawn_data *d = &w->d; cannam@127: cannam@127: d->max = (d->min = i * block_size) + block_size; cannam@127: if (d->max > loopmax) cannam@127: d->max = loopmax; cannam@127: d->thr_num = i; cannam@127: d->data = data; cannam@127: w->proc = proc; cannam@127: cannam@127: if (i == nthr - 1) { cannam@127: /* do the work ourselves */ cannam@127: proc(d); cannam@127: } else { cannam@127: /* assign a worker to W */ cannam@127: w->q = dequeue(); cannam@127: cannam@127: /* tell worker w->q to do it */ cannam@127: w->q->w = w; /* Dirac could have written this */ cannam@127: os_sem_up(&w->q->ready); cannam@127: } cannam@127: } cannam@127: cannam@127: for (i = 0; i < nthr - 1; ++i) { cannam@127: struct work *w = &r[i]; cannam@127: os_sem_down(&w->q->done); cannam@127: enqueue(w->q); cannam@127: } cannam@127: cannam@127: STACK_FREE(r); cannam@127: THREAD_OFF; /* prevent debugging mode from failing under threads */ cannam@127: } cannam@127: cannam@127: void X(threads_cleanup)(void) cannam@127: { cannam@127: kill_workforce(); cannam@127: os_mutex_destroy(&queue_lock); cannam@127: os_sem_destroy(&termination_semaphore); cannam@127: } cannam@127: cannam@127: static os_static_mutex_t install_planner_hooks_mutex = OS_STATIC_MUTEX_INITIALIZER; cannam@127: static os_mutex_t planner_mutex; cannam@127: static int planner_hooks_installed = 0; cannam@127: cannam@127: static void lock_planner_mutex(void) cannam@127: { cannam@127: os_mutex_lock(&planner_mutex); cannam@127: } cannam@127: cannam@127: static void unlock_planner_mutex(void) cannam@127: { cannam@127: os_mutex_unlock(&planner_mutex); cannam@127: } cannam@127: cannam@127: void X(threads_register_planner_hooks)(void) cannam@127: { cannam@127: os_static_mutex_lock(&install_planner_hooks_mutex); { cannam@127: if (!planner_hooks_installed) { cannam@127: os_mutex_init(&planner_mutex); cannam@127: X(set_planner_hooks)(lock_planner_mutex, unlock_planner_mutex); cannam@127: planner_hooks_installed = 1; cannam@127: } cannam@127: } os_static_mutex_unlock(&install_planner_hooks_mutex); cannam@127: }