Chris@19: /* Chris@19: * Copyright (c) 2003, 2007-14 Matteo Frigo Chris@19: * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology Chris@19: * Chris@19: * This program is free software; you can redistribute it and/or modify Chris@19: * it under the terms of the GNU General Public License as published by Chris@19: * the Free Software Foundation; either version 2 of the License, or Chris@19: * (at your option) any later version. Chris@19: * Chris@19: * This program is distributed in the hope that it will be useful, Chris@19: * but WITHOUT ANY WARRANTY; without even the implied warranty of Chris@19: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Chris@19: * GNU General Public License for more details. Chris@19: * Chris@19: * You should have received a copy of the GNU General Public License Chris@19: * along with this program; if not, write to the Free Software Chris@19: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Chris@19: * Chris@19: */ Chris@19: Chris@19: /* threads.c: Portable thread spawning for loops, via the X(spawn_loop) Chris@19: function. The first portion of this file is a set of macros to Chris@19: spawn and join threads on various systems. */ Chris@19: Chris@19: #include "threads.h" Chris@19: Chris@19: #if defined(USING_POSIX_THREADS) Chris@19: Chris@19: #include Chris@19: Chris@19: #ifdef HAVE_UNISTD_H Chris@19: # include Chris@19: #endif Chris@19: Chris@19: /* imlementation of semaphores and mutexes: */ Chris@19: #if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES >= 200112L)) Chris@19: Chris@19: /* If optional POSIX semaphores are supported, use them to Chris@19: implement both semaphores and mutexes. */ Chris@19: # include Chris@19: # include Chris@19: Chris@19: typedef sem_t os_sem_t; Chris@19: Chris@19: static void os_sem_init(os_sem_t *s) { sem_init(s, 0, 0); } Chris@19: static void os_sem_destroy(os_sem_t *s) { sem_destroy(s); } Chris@19: Chris@19: static void os_sem_down(os_sem_t *s) Chris@19: { Chris@19: int err; Chris@19: do { Chris@19: err = sem_wait(s); Chris@19: } while (err == -1 && errno == EINTR); Chris@19: CK(err == 0); Chris@19: } Chris@19: Chris@19: static void os_sem_up(os_sem_t *s) { sem_post(s); } Chris@19: Chris@19: /* Chris@19: The reason why we use sem_t to implement mutexes is that I have Chris@19: seen mysterious hangs with glibc-2.7 and linux-2.6.22 when using Chris@19: pthread_mutex_t, but no hangs with sem_t or with linux >= Chris@19: 2.6.24. For lack of better information, sem_t looks like the Chris@19: safest choice. Chris@19: */ Chris@19: typedef sem_t os_mutex_t; Chris@19: static void os_mutex_init(os_mutex_t *s) { sem_init(s, 0, 1); } Chris@19: #define os_mutex_destroy os_sem_destroy Chris@19: #define os_mutex_lock os_sem_down Chris@19: #define os_mutex_unlock os_sem_up Chris@19: Chris@19: #else Chris@19: Chris@19: /* If optional POSIX semaphores are not defined, use pthread Chris@19: mutexes for mutexes, and simulate semaphores with condition Chris@19: variables */ Chris@19: typedef pthread_mutex_t os_mutex_t; Chris@19: Chris@19: static void os_mutex_init(os_mutex_t *s) Chris@19: { Chris@19: pthread_mutex_init(s, (pthread_mutexattr_t *)0); Chris@19: } Chris@19: Chris@19: static void os_mutex_destroy(os_mutex_t *s) { pthread_mutex_destroy(s); } Chris@19: static void os_mutex_lock(os_mutex_t *s) { pthread_mutex_lock(s); } Chris@19: static void os_mutex_unlock(os_mutex_t *s) { pthread_mutex_unlock(s); } Chris@19: Chris@19: typedef struct { Chris@19: pthread_mutex_t m; Chris@19: pthread_cond_t c; Chris@19: volatile int x; Chris@19: } os_sem_t; Chris@19: Chris@19: static void os_sem_init(os_sem_t *s) Chris@19: { Chris@19: pthread_mutex_init(&s->m, (pthread_mutexattr_t *)0); Chris@19: pthread_cond_init(&s->c, (pthread_condattr_t *)0); Chris@19: Chris@19: /* wrap initialization in lock to exploit the release Chris@19: semantics of pthread_mutex_unlock() */ Chris@19: pthread_mutex_lock(&s->m); Chris@19: s->x = 0; Chris@19: pthread_mutex_unlock(&s->m); Chris@19: } Chris@19: Chris@19: static void os_sem_destroy(os_sem_t *s) Chris@19: { Chris@19: pthread_mutex_destroy(&s->m); Chris@19: pthread_cond_destroy(&s->c); Chris@19: } Chris@19: Chris@19: static void os_sem_down(os_sem_t *s) Chris@19: { Chris@19: pthread_mutex_lock(&s->m); Chris@19: while (s->x <= 0) Chris@19: pthread_cond_wait(&s->c, &s->m); Chris@19: --s->x; Chris@19: pthread_mutex_unlock(&s->m); Chris@19: } Chris@19: Chris@19: static void os_sem_up(os_sem_t *s) Chris@19: { Chris@19: pthread_mutex_lock(&s->m); Chris@19: ++s->x; Chris@19: pthread_cond_signal(&s->c); Chris@19: pthread_mutex_unlock(&s->m); Chris@19: } Chris@19: Chris@19: #endif Chris@19: Chris@19: #define FFTW_WORKER void * Chris@19: Chris@19: static void os_create_thread(FFTW_WORKER (*worker)(void *arg), Chris@19: void *arg) Chris@19: { Chris@19: pthread_attr_t attr; Chris@19: pthread_t tid; Chris@19: Chris@19: pthread_attr_init(&attr); Chris@19: pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); Chris@19: pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); Chris@19: Chris@19: pthread_create(&tid, &attr, worker, (void *)arg); Chris@19: pthread_attr_destroy(&attr); Chris@19: } Chris@19: Chris@19: static void os_destroy_thread(void) Chris@19: { Chris@19: pthread_exit((void *)0); Chris@19: } Chris@19: Chris@19: #elif defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS) Chris@19: /* hack: windef.h defines INT for its own purposes and this causes Chris@19: a conflict with our own INT in ifftw.h. Divert the windows Chris@19: definition into another name unlikely to cause a conflict */ Chris@19: #define INT magnus_ab_INTegro_seclorum_nascitur_ordo Chris@19: #include Chris@19: #include Chris@19: #undef INT Chris@19: Chris@19: typedef HANDLE os_mutex_t; Chris@19: Chris@19: static void os_mutex_init(os_mutex_t *s) Chris@19: { Chris@19: *s = CreateMutex(NULL, FALSE, NULL); Chris@19: } Chris@19: Chris@19: static void os_mutex_destroy(os_mutex_t *s) Chris@19: { Chris@19: CloseHandle(*s); Chris@19: } Chris@19: Chris@19: static void os_mutex_lock(os_mutex_t *s) Chris@19: { Chris@19: WaitForSingleObject(*s, INFINITE); Chris@19: } Chris@19: Chris@19: static void os_mutex_unlock(os_mutex_t *s) Chris@19: { Chris@19: ReleaseMutex(*s); Chris@19: } Chris@19: Chris@19: typedef HANDLE os_sem_t; Chris@19: Chris@19: static void os_sem_init(os_sem_t *s) Chris@19: { Chris@19: *s = CreateSemaphore(NULL, 0, 0x7FFFFFFFL, NULL); Chris@19: } Chris@19: Chris@19: static void os_sem_destroy(os_sem_t *s) Chris@19: { Chris@19: CloseHandle(*s); Chris@19: } Chris@19: Chris@19: static void os_sem_down(os_sem_t *s) Chris@19: { Chris@19: WaitForSingleObject(*s, INFINITE); Chris@19: } Chris@19: Chris@19: static void os_sem_up(os_sem_t *s) Chris@19: { Chris@19: ReleaseSemaphore(*s, 1, NULL); Chris@19: } Chris@19: Chris@19: #define FFTW_WORKER unsigned __stdcall Chris@19: typedef unsigned (__stdcall *winthread_start) (void *); Chris@19: Chris@19: static void os_create_thread(winthread_start worker, Chris@19: void *arg) Chris@19: { Chris@19: _beginthreadex((void *)NULL, /* security attrib */ Chris@19: 0, /* stack size */ Chris@19: worker, /* start address */ Chris@19: arg, /* parameters */ Chris@19: 0, /* creation flags */ Chris@19: (unsigned *)NULL); /* tid */ Chris@19: } Chris@19: Chris@19: static void os_destroy_thread(void) Chris@19: { Chris@19: _endthreadex(0); Chris@19: } Chris@19: Chris@19: Chris@19: #else Chris@19: #error "No threading layer defined" Chris@19: #endif Chris@19: Chris@19: /************************************************************************/ Chris@19: Chris@19: /* Main code: */ Chris@19: struct worker { Chris@19: os_sem_t ready; Chris@19: os_sem_t done; Chris@19: struct work *w; Chris@19: struct worker *cdr; Chris@19: }; Chris@19: Chris@19: static struct worker *make_worker(void) Chris@19: { Chris@19: struct worker *q = (struct worker *)MALLOC(sizeof(*q), OTHER); Chris@19: os_sem_init(&q->ready); Chris@19: os_sem_init(&q->done); Chris@19: return q; Chris@19: } Chris@19: Chris@19: static void unmake_worker(struct worker *q) Chris@19: { Chris@19: os_sem_destroy(&q->done); Chris@19: os_sem_destroy(&q->ready); Chris@19: X(ifree)(q); Chris@19: } Chris@19: Chris@19: struct work { Chris@19: spawn_function proc; Chris@19: spawn_data d; Chris@19: struct worker *q; /* the worker responsible for performing this work */ Chris@19: }; Chris@19: Chris@19: static os_mutex_t queue_lock; Chris@19: static os_sem_t termination_semaphore; Chris@19: Chris@19: static struct worker *worker_queue; Chris@19: #define WITH_QUEUE_LOCK(what) \ Chris@19: { \ Chris@19: os_mutex_lock(&queue_lock); \ Chris@19: what; \ Chris@19: os_mutex_unlock(&queue_lock); \ Chris@19: } Chris@19: Chris@19: static FFTW_WORKER worker(void *arg) Chris@19: { Chris@19: struct worker *ego = (struct worker *)arg; Chris@19: struct work *w; Chris@19: Chris@19: for (;;) { Chris@19: /* wait until work becomes available */ Chris@19: os_sem_down(&ego->ready); Chris@19: Chris@19: w = ego->w; Chris@19: Chris@19: /* !w->proc ==> terminate worker */ Chris@19: if (!w->proc) break; Chris@19: Chris@19: /* do the work */ Chris@19: w->proc(&w->d); Chris@19: Chris@19: /* signal that work is done */ Chris@19: os_sem_up(&ego->done); Chris@19: } Chris@19: Chris@19: /* termination protocol */ Chris@19: os_sem_up(&termination_semaphore); Chris@19: Chris@19: os_destroy_thread(); Chris@19: /* UNREACHABLE */ Chris@19: return 0; Chris@19: } Chris@19: Chris@19: static void enqueue(struct worker *q) Chris@19: { Chris@19: WITH_QUEUE_LOCK({ Chris@19: q->cdr = worker_queue; Chris@19: worker_queue = q; Chris@19: }); Chris@19: } Chris@19: Chris@19: static struct worker *dequeue(void) Chris@19: { Chris@19: struct worker *q; Chris@19: Chris@19: WITH_QUEUE_LOCK({ Chris@19: q = worker_queue; Chris@19: if (q) Chris@19: worker_queue = q->cdr; Chris@19: }); Chris@19: Chris@19: if (!q) { Chris@19: /* no worker is available. Create one */ Chris@19: q = make_worker(); Chris@19: os_create_thread(worker, q); Chris@19: } Chris@19: Chris@19: return q; Chris@19: } Chris@19: Chris@19: Chris@19: static void kill_workforce(void) Chris@19: { Chris@19: struct work w; Chris@19: Chris@19: w.proc = 0; Chris@19: Chris@19: THREAD_ON; /* needed for debugging mode: since make_worker Chris@19: is called from dequeue which is only called in Chris@19: thread_on mode, we need to unmake_worker in thread_on. */ Chris@19: WITH_QUEUE_LOCK({ Chris@19: /* tell all workers that they must terminate. Chris@19: Chris@19: Because workers enqueue themselves before signaling the Chris@19: completion of the work, all workers belong to the worker queue Chris@19: if we get here. Also, all workers are waiting at Chris@19: os_sem_down(ready), so we can hold the queue lock without Chris@19: deadlocking */ Chris@19: while (worker_queue) { Chris@19: struct worker *q = worker_queue; Chris@19: worker_queue = q->cdr; Chris@19: q->w = &w; Chris@19: os_sem_up(&q->ready); Chris@19: os_sem_down(&termination_semaphore); Chris@19: unmake_worker(q); Chris@19: } Chris@19: }); Chris@19: THREAD_OFF; Chris@19: } Chris@19: Chris@19: int X(ithreads_init)(void) Chris@19: { Chris@19: os_mutex_init(&queue_lock); Chris@19: os_sem_init(&termination_semaphore); Chris@19: Chris@19: WITH_QUEUE_LOCK({ Chris@19: worker_queue = 0; Chris@19: }) Chris@19: Chris@19: return 0; /* no error */ Chris@19: } Chris@19: Chris@19: /* Distribute a loop from 0 to loopmax-1 over nthreads threads. Chris@19: proc(d) is called to execute a block of iterations from d->min Chris@19: to d->max-1. d->thr_num indicate the number of the thread Chris@19: that is executing proc (from 0 to nthreads-1), and d->data is Chris@19: the same as the data parameter passed to X(spawn_loop). Chris@19: Chris@19: This function returns only after all the threads have completed. */ Chris@19: void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) Chris@19: { Chris@19: int block_size; Chris@19: struct work *r; Chris@19: int i; Chris@19: Chris@19: A(loopmax >= 0); Chris@19: A(nthr > 0); Chris@19: A(proc); Chris@19: Chris@19: if (!loopmax) return; Chris@19: Chris@19: /* Choose the block size and number of threads in order to (1) Chris@19: minimize the critical path and (2) use the fewest threads that Chris@19: achieve the same critical path (to minimize overhead). Chris@19: e.g. if loopmax is 5 and nthr is 4, we should use only 3 Chris@19: threads with block sizes of 2, 2, and 1. */ Chris@19: block_size = (loopmax + nthr - 1) / nthr; Chris@19: nthr = (loopmax + block_size - 1) / block_size; Chris@19: Chris@19: THREAD_ON; /* prevent debugging mode from failing under threads */ Chris@19: STACK_MALLOC(struct work *, r, sizeof(struct work) * nthr); Chris@19: Chris@19: /* distribute work: */ Chris@19: for (i = 0; i < nthr; ++i) { Chris@19: struct work *w = &r[i]; Chris@19: spawn_data *d = &w->d; Chris@19: Chris@19: d->max = (d->min = i * block_size) + block_size; Chris@19: if (d->max > loopmax) Chris@19: d->max = loopmax; Chris@19: d->thr_num = i; Chris@19: d->data = data; Chris@19: w->proc = proc; Chris@19: Chris@19: if (i == nthr - 1) { Chris@19: /* do the work ourselves */ Chris@19: proc(d); Chris@19: } else { Chris@19: /* assign a worker to W */ Chris@19: w->q = dequeue(); Chris@19: Chris@19: /* tell worker w->q to do it */ Chris@19: w->q->w = w; /* Dirac could have written this */ Chris@19: os_sem_up(&w->q->ready); Chris@19: } Chris@19: } Chris@19: Chris@19: for (i = 0; i < nthr - 1; ++i) { Chris@19: struct work *w = &r[i]; Chris@19: os_sem_down(&w->q->done); Chris@19: enqueue(w->q); Chris@19: } Chris@19: Chris@19: STACK_FREE(r); Chris@19: THREAD_OFF; /* prevent debugging mode from failing under threads */ Chris@19: } Chris@19: Chris@19: void X(threads_cleanup)(void) Chris@19: { Chris@19: kill_workforce(); Chris@19: os_mutex_destroy(&queue_lock); Chris@19: os_sem_destroy(&termination_semaphore); Chris@19: }