diff src/fftw-3.3.3/threads/threads.c @ 10:37bf6b4a2645

Add FFTW3
author Chris Cannam
date Wed, 20 Mar 2013 15:35:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/fftw-3.3.3/threads/threads.c	Wed Mar 20 15:35:50 2013 +0000
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) 2003, 2007-11 Matteo Frigo
+ * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+/* threads.c: Portable thread spawning for loops, via the X(spawn_loop)
+   function.  The first portion of this file is a set of macros to
+   spawn and join threads on various systems. */
+
+#include "threads.h"
+
+#if defined(USING_POSIX_THREADS)
+
+#include <pthread.h>
+
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>
+#endif
+
+/* imlementation of semaphores and mutexes: */
+#if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES >= 200112L))
+
+   /* If optional POSIX semaphores are supported, use them to
+      implement both semaphores and mutexes. */
+#  include <semaphore.h>
+#  include <errno.h>
+
+   typedef sem_t os_sem_t;
+
+   static void os_sem_init(os_sem_t *s) { sem_init(s, 0, 0); }
+   static void os_sem_destroy(os_sem_t *s) { sem_destroy(s); }
+
+   static void os_sem_down(os_sem_t *s)
+   {
+	int err;
+	do {
+	     err = sem_wait(s);
+	} while (err == -1 && errno == EINTR);
+	CK(err == 0);
+   }
+
+   static void os_sem_up(os_sem_t *s) { sem_post(s); }
+
+   /*
+      The reason why we use sem_t to implement mutexes is that I have
+      seen mysterious hangs with glibc-2.7 and linux-2.6.22 when using
+      pthread_mutex_t, but no hangs with sem_t or with linux >=
+      2.6.24.  For lack of better information, sem_t looks like the
+      safest choice.
+   */
+   typedef sem_t os_mutex_t;
+   static void os_mutex_init(os_mutex_t *s) { sem_init(s, 0, 1); }
+   #define os_mutex_destroy os_sem_destroy
+   #define os_mutex_lock os_sem_down
+   #define os_mutex_unlock os_sem_up
+
+#else
+
+   /* If optional POSIX semaphores are not defined, use pthread
+      mutexes for mutexes, and simulate semaphores with condition
+      variables */
+   typedef pthread_mutex_t os_mutex_t;
+
+   static void os_mutex_init(os_mutex_t *s) 
+   { 
+	pthread_mutex_init(s, (pthread_mutexattr_t *)0);
+   }
+
+   static void os_mutex_destroy(os_mutex_t *s) { pthread_mutex_destroy(s); }
+   static void os_mutex_lock(os_mutex_t *s) { pthread_mutex_lock(s); }
+   static void os_mutex_unlock(os_mutex_t *s) { pthread_mutex_unlock(s); }
+
+   typedef struct {
+	pthread_mutex_t m;
+	pthread_cond_t c;
+	volatile int x;
+   } os_sem_t; 
+
+   static void os_sem_init(os_sem_t *s)
+   {
+	pthread_mutex_init(&s->m, (pthread_mutexattr_t *)0);
+	pthread_cond_init(&s->c, (pthread_condattr_t *)0);
+
+	/* wrap initialization in lock to exploit the release
+	   semantics of pthread_mutex_unlock() */
+	pthread_mutex_lock(&s->m);
+	s->x = 0;
+	pthread_mutex_unlock(&s->m);
+   }
+
+   static void os_sem_destroy(os_sem_t *s)
+   {
+	pthread_mutex_destroy(&s->m);
+	pthread_cond_destroy(&s->c);
+   }
+
+   static void os_sem_down(os_sem_t *s)
+   {
+	pthread_mutex_lock(&s->m);
+	while (s->x <= 0) 
+	     pthread_cond_wait(&s->c, &s->m);
+	--s->x;
+	pthread_mutex_unlock(&s->m);
+   }
+
+   static void os_sem_up(os_sem_t *s)
+   {
+	pthread_mutex_lock(&s->m);
+	++s->x;
+	pthread_cond_signal(&s->c);
+	pthread_mutex_unlock(&s->m);
+   }
+
+#endif
+
+#define FFTW_WORKER void *
+
+static void os_create_thread(FFTW_WORKER (*worker)(void *arg), 
+			     void *arg)
+{
+     pthread_attr_t attr;
+     pthread_t tid;
+
+     pthread_attr_init(&attr);
+     pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); 
+     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+     pthread_create(&tid, &attr, worker, (void *)arg);
+     pthread_attr_destroy(&attr);
+}
+
+static void os_destroy_thread(void)
+{
+     pthread_exit((void *)0);
+}
+
+#elif defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS)
+/* hack: windef.h defines INT for its own purposes and this causes
+   a conflict with our own INT in ifftw.h.  Divert the windows
+   definition into another name unlikely to cause a conflict */
+#define INT magnus_ab_INTegro_seclorum_nascitur_ordo
+#include <windows.h>
+#include <process.h>
+#undef INT
+
+typedef HANDLE os_mutex_t;
+
+static void os_mutex_init(os_mutex_t *s) 
+{ 
+     *s = CreateMutex(NULL, FALSE, NULL);
+}
+
+static void os_mutex_destroy(os_mutex_t *s) 
+{ 
+     CloseHandle(*s);
+}
+
+static void os_mutex_lock(os_mutex_t *s)
+{ 
+     WaitForSingleObject(*s, INFINITE);
+}
+
+static void os_mutex_unlock(os_mutex_t *s) 
+{ 
+     ReleaseMutex(*s);
+}
+
+typedef HANDLE os_sem_t;
+
+static void os_sem_init(os_sem_t *s) 
+{
+     *s = CreateSemaphore(NULL, 0, 0x7FFFFFFFL, NULL);
+}
+
+static void os_sem_destroy(os_sem_t *s) 
+{ 
+     CloseHandle(*s);
+}
+
+static void os_sem_down(os_sem_t *s) 
+{ 
+     WaitForSingleObject(*s, INFINITE);
+}
+
+static void os_sem_up(os_sem_t *s) 
+{
+     ReleaseSemaphore(*s, 1, NULL);
+}
+
+#define FFTW_WORKER unsigned __stdcall
+typedef unsigned (__stdcall *winthread_start) (void *);
+
+static void os_create_thread(winthread_start worker,
+			     void *arg)
+{
+     _beginthreadex((void *)NULL,               /* security attrib */
+		    0,				/* stack size */
+		    worker,                     /* start address */
+		    arg,			/* parameters */
+		    0,				/* creation flags */
+		    (unsigned *)NULL);		/* tid */
+}
+
+static void os_destroy_thread(void)
+{
+     _endthreadex(0);
+}
+
+
+#else
+#error "No threading layer defined"
+#endif
+
+/************************************************************************/
+
+/* Main code: */
+struct worker {
+     os_sem_t ready;
+     os_sem_t done;
+     struct work *w;
+     struct worker *cdr;
+};
+
+static struct worker *make_worker(void)
+{
+     struct worker *q = (struct worker *)MALLOC(sizeof(*q), OTHER);
+     os_sem_init(&q->ready);
+     os_sem_init(&q->done);
+     return q;
+}
+
+static void unmake_worker(struct worker *q)
+{
+     os_sem_destroy(&q->done);
+     os_sem_destroy(&q->ready);
+     X(ifree)(q);
+}
+
+struct work {
+     spawn_function proc;
+     spawn_data d;
+     struct worker *q; /* the worker responsible for performing this work */
+};
+
+static os_mutex_t queue_lock;
+static os_sem_t termination_semaphore;
+
+static struct worker *worker_queue;
+#define WITH_QUEUE_LOCK(what)			\
+{						\
+     os_mutex_lock(&queue_lock);		\
+     what;					\
+     os_mutex_unlock(&queue_lock);		\
+}
+
+static FFTW_WORKER worker(void *arg)
+{
+     struct worker *ego = (struct worker *)arg;
+     struct work *w;
+
+     for (;;) {
+	  /* wait until work becomes available */
+	  os_sem_down(&ego->ready);
+
+	  w = ego->w;
+
+	  /* !w->proc ==> terminate worker */
+	  if (!w->proc) break;
+
+	  /* do the work */
+          w->proc(&w->d);
+
+	  /* signal that work is done */
+	  os_sem_up(&ego->done);
+     }
+
+     /* termination protocol */
+     os_sem_up(&termination_semaphore);
+
+     os_destroy_thread();
+     /* UNREACHABLE */
+     return 0;
+}
+
+static void enqueue(struct worker *q)
+{
+     WITH_QUEUE_LOCK({
+	  q->cdr = worker_queue;
+	  worker_queue = q;
+     });
+}
+
+static struct worker *dequeue(void)
+{
+     struct worker *q;
+
+     WITH_QUEUE_LOCK({
+	  q = worker_queue;
+	  if (q) 
+	       worker_queue = q->cdr;
+     });
+
+     if (!q) {
+	  /* no worker is available.  Create one */
+	  q = make_worker();
+	  os_create_thread(worker, q);
+     }
+
+     return q;
+}
+
+
+static void kill_workforce(void)
+{
+     struct work w;
+
+     w.proc = 0;
+
+     THREAD_ON; /* needed for debugging mode: since make_worker
+		   is called from dequeue which is only called in
+		   thread_on mode, we need to unmake_worker in thread_on. */
+     WITH_QUEUE_LOCK({
+	  /* tell all workers that they must terminate.  
+
+	     Because workers enqueue themselves before signaling the
+	     completion of the work, all workers belong to the worker queue
+	     if we get here.  Also, all workers are waiting at
+	     os_sem_down(ready), so we can hold the queue lock without
+	     deadlocking */
+	  while (worker_queue) {
+	       struct worker *q = worker_queue;
+	       worker_queue = q->cdr;
+	       q->w = &w;
+	       os_sem_up(&q->ready);
+	       os_sem_down(&termination_semaphore);
+	       unmake_worker(q);
+	  }
+     });
+     THREAD_OFF;
+}
+
+int X(ithreads_init)(void)
+{
+     os_mutex_init(&queue_lock);
+     os_sem_init(&termination_semaphore);
+
+     WITH_QUEUE_LOCK({
+	  worker_queue = 0;
+     })
+
+     return 0; /* no error */
+}
+
+/* Distribute a loop from 0 to loopmax-1 over nthreads threads.
+   proc(d) is called to execute a block of iterations from d->min
+   to d->max-1.  d->thr_num indicate the number of the thread
+   that is executing proc (from 0 to nthreads-1), and d->data is
+   the same as the data parameter passed to X(spawn_loop).
+
+   This function returns only after all the threads have completed. */
+void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data)
+{
+     int block_size;
+     struct work *r;
+     int i;
+
+     A(loopmax >= 0);
+     A(nthr > 0);
+     A(proc);
+
+     if (!loopmax) return;
+
+     /* Choose the block size and number of threads in order to (1)
+        minimize the critical path and (2) use the fewest threads that
+        achieve the same critical path (to minimize overhead).
+        e.g. if loopmax is 5 and nthr is 4, we should use only 3
+        threads with block sizes of 2, 2, and 1. */
+     block_size = (loopmax + nthr - 1) / nthr;
+     nthr = (loopmax + block_size - 1) / block_size;
+
+     THREAD_ON; /* prevent debugging mode from failing under threads */
+     STACK_MALLOC(struct work *, r, sizeof(struct work) * nthr);
+	  
+     /* distribute work: */
+     for (i = 0; i < nthr; ++i) {
+	  struct work *w = &r[i];
+	  spawn_data *d = &w->d;
+
+	  d->max = (d->min = i * block_size) + block_size;
+	  if (d->max > loopmax)
+	       d->max = loopmax;
+	  d->thr_num = i;
+	  d->data = data;
+	  w->proc = proc;
+	   
+	  if (i == nthr - 1) {
+	       /* do the work ourselves */
+	       proc(d);
+	  } else {
+	       /* assign a worker to W */
+	       w->q = dequeue();
+
+	       /* tell worker w->q to do it */
+	       w->q->w = w; /* Dirac could have written this */
+	       os_sem_up(&w->q->ready);
+	  }
+     }
+
+     for (i = 0; i < nthr - 1; ++i) { 
+	  struct work *w = &r[i];
+	  os_sem_down(&w->q->done);
+	  enqueue(w->q);
+     }
+
+     STACK_FREE(r);
+     THREAD_OFF; /* prevent debugging mode from failing under threads */
+}
+
+void X(threads_cleanup)(void)
+{
+     kill_workforce();
+     os_mutex_destroy(&queue_lock);
+     os_sem_destroy(&termination_semaphore);
+}