Chris@10
|
1 /*
|
Chris@10
|
2 * Copyright (c) 2003, 2007-11 Matteo Frigo
|
Chris@10
|
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
|
Chris@10
|
4 *
|
Chris@10
|
5 * This program is free software; you can redistribute it and/or modify
|
Chris@10
|
6 * it under the terms of the GNU General Public License as published by
|
Chris@10
|
7 * the Free Software Foundation; either version 2 of the License, or
|
Chris@10
|
8 * (at your option) any later version.
|
Chris@10
|
9 *
|
Chris@10
|
10 * This program is distributed in the hope that it will be useful,
|
Chris@10
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@10
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@10
|
13 * GNU General Public License for more details.
|
Chris@10
|
14 *
|
Chris@10
|
15 * You should have received a copy of the GNU General Public License
|
Chris@10
|
16 * along with this program; if not, write to the Free Software
|
Chris@10
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Chris@10
|
18 *
|
Chris@10
|
19 */
|
Chris@10
|
20
|
Chris@10
|
21 /* threads.c: Portable thread spawning for loops, via the X(spawn_loop)
|
Chris@10
|
22 function. The first portion of this file is a set of macros to
|
Chris@10
|
23 spawn and join threads on various systems. */
|
Chris@10
|
24
|
Chris@10
|
25 #include "threads.h"
|
Chris@10
|
26
|
Chris@10
|
27 #if defined(USING_POSIX_THREADS)
|
Chris@10
|
28
|
Chris@10
|
29 #include <pthread.h>
|
Chris@10
|
30
|
Chris@10
|
31 #ifdef HAVE_UNISTD_H
|
Chris@10
|
32 # include <unistd.h>
|
Chris@10
|
33 #endif
|
Chris@10
|
34
|
Chris@10
|
35 /* imlementation of semaphores and mutexes: */
|
Chris@10
|
36 #if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES >= 200112L))
|
Chris@10
|
37
|
Chris@10
|
38 /* If optional POSIX semaphores are supported, use them to
|
Chris@10
|
39 implement both semaphores and mutexes. */
|
Chris@10
|
40 # include <semaphore.h>
|
Chris@10
|
41 # include <errno.h>
|
Chris@10
|
42
|
Chris@10
|
43 typedef sem_t os_sem_t;
|
Chris@10
|
44
|
Chris@10
|
45 static void os_sem_init(os_sem_t *s) { sem_init(s, 0, 0); }
|
Chris@10
|
46 static void os_sem_destroy(os_sem_t *s) { sem_destroy(s); }
|
Chris@10
|
47
|
Chris@10
|
48 static void os_sem_down(os_sem_t *s)
|
Chris@10
|
49 {
|
Chris@10
|
50 int err;
|
Chris@10
|
51 do {
|
Chris@10
|
52 err = sem_wait(s);
|
Chris@10
|
53 } while (err == -1 && errno == EINTR);
|
Chris@10
|
54 CK(err == 0);
|
Chris@10
|
55 }
|
Chris@10
|
56
|
Chris@10
|
57 static void os_sem_up(os_sem_t *s) { sem_post(s); }
|
Chris@10
|
58
|
Chris@10
|
59 /*
|
Chris@10
|
60 The reason why we use sem_t to implement mutexes is that I have
|
Chris@10
|
61 seen mysterious hangs with glibc-2.7 and linux-2.6.22 when using
|
Chris@10
|
62 pthread_mutex_t, but no hangs with sem_t or with linux >=
|
Chris@10
|
63 2.6.24. For lack of better information, sem_t looks like the
|
Chris@10
|
64 safest choice.
|
Chris@10
|
65 */
|
Chris@10
|
66 typedef sem_t os_mutex_t;
|
Chris@10
|
67 static void os_mutex_init(os_mutex_t *s) { sem_init(s, 0, 1); }
|
Chris@10
|
68 #define os_mutex_destroy os_sem_destroy
|
Chris@10
|
69 #define os_mutex_lock os_sem_down
|
Chris@10
|
70 #define os_mutex_unlock os_sem_up
|
Chris@10
|
71
|
Chris@10
|
72 #else
|
Chris@10
|
73
|
Chris@10
|
74 /* If optional POSIX semaphores are not defined, use pthread
|
Chris@10
|
75 mutexes for mutexes, and simulate semaphores with condition
|
Chris@10
|
76 variables */
|
Chris@10
|
77 typedef pthread_mutex_t os_mutex_t;
|
Chris@10
|
78
|
Chris@10
|
79 static void os_mutex_init(os_mutex_t *s)
|
Chris@10
|
80 {
|
Chris@10
|
81 pthread_mutex_init(s, (pthread_mutexattr_t *)0);
|
Chris@10
|
82 }
|
Chris@10
|
83
|
Chris@10
|
84 static void os_mutex_destroy(os_mutex_t *s) { pthread_mutex_destroy(s); }
|
Chris@10
|
85 static void os_mutex_lock(os_mutex_t *s) { pthread_mutex_lock(s); }
|
Chris@10
|
86 static void os_mutex_unlock(os_mutex_t *s) { pthread_mutex_unlock(s); }
|
Chris@10
|
87
|
Chris@10
|
88 typedef struct {
|
Chris@10
|
89 pthread_mutex_t m;
|
Chris@10
|
90 pthread_cond_t c;
|
Chris@10
|
91 volatile int x;
|
Chris@10
|
92 } os_sem_t;
|
Chris@10
|
93
|
Chris@10
|
94 static void os_sem_init(os_sem_t *s)
|
Chris@10
|
95 {
|
Chris@10
|
96 pthread_mutex_init(&s->m, (pthread_mutexattr_t *)0);
|
Chris@10
|
97 pthread_cond_init(&s->c, (pthread_condattr_t *)0);
|
Chris@10
|
98
|
Chris@10
|
99 /* wrap initialization in lock to exploit the release
|
Chris@10
|
100 semantics of pthread_mutex_unlock() */
|
Chris@10
|
101 pthread_mutex_lock(&s->m);
|
Chris@10
|
102 s->x = 0;
|
Chris@10
|
103 pthread_mutex_unlock(&s->m);
|
Chris@10
|
104 }
|
Chris@10
|
105
|
Chris@10
|
106 static void os_sem_destroy(os_sem_t *s)
|
Chris@10
|
107 {
|
Chris@10
|
108 pthread_mutex_destroy(&s->m);
|
Chris@10
|
109 pthread_cond_destroy(&s->c);
|
Chris@10
|
110 }
|
Chris@10
|
111
|
Chris@10
|
112 static void os_sem_down(os_sem_t *s)
|
Chris@10
|
113 {
|
Chris@10
|
114 pthread_mutex_lock(&s->m);
|
Chris@10
|
115 while (s->x <= 0)
|
Chris@10
|
116 pthread_cond_wait(&s->c, &s->m);
|
Chris@10
|
117 --s->x;
|
Chris@10
|
118 pthread_mutex_unlock(&s->m);
|
Chris@10
|
119 }
|
Chris@10
|
120
|
Chris@10
|
121 static void os_sem_up(os_sem_t *s)
|
Chris@10
|
122 {
|
Chris@10
|
123 pthread_mutex_lock(&s->m);
|
Chris@10
|
124 ++s->x;
|
Chris@10
|
125 pthread_cond_signal(&s->c);
|
Chris@10
|
126 pthread_mutex_unlock(&s->m);
|
Chris@10
|
127 }
|
Chris@10
|
128
|
Chris@10
|
129 #endif
|
Chris@10
|
130
|
Chris@10
|
131 #define FFTW_WORKER void *
|
Chris@10
|
132
|
Chris@10
|
133 static void os_create_thread(FFTW_WORKER (*worker)(void *arg),
|
Chris@10
|
134 void *arg)
|
Chris@10
|
135 {
|
Chris@10
|
136 pthread_attr_t attr;
|
Chris@10
|
137 pthread_t tid;
|
Chris@10
|
138
|
Chris@10
|
139 pthread_attr_init(&attr);
|
Chris@10
|
140 pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
|
Chris@10
|
141 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
|
Chris@10
|
142
|
Chris@10
|
143 pthread_create(&tid, &attr, worker, (void *)arg);
|
Chris@10
|
144 pthread_attr_destroy(&attr);
|
Chris@10
|
145 }
|
Chris@10
|
146
|
Chris@10
|
147 static void os_destroy_thread(void)
|
Chris@10
|
148 {
|
Chris@10
|
149 pthread_exit((void *)0);
|
Chris@10
|
150 }
|
Chris@10
|
151
|
Chris@10
|
152 #elif defined(__WIN32__) || defined(_WIN32) || defined(_WINDOWS)
|
Chris@10
|
153 /* hack: windef.h defines INT for its own purposes and this causes
|
Chris@10
|
154 a conflict with our own INT in ifftw.h. Divert the windows
|
Chris@10
|
155 definition into another name unlikely to cause a conflict */
|
Chris@10
|
156 #define INT magnus_ab_INTegro_seclorum_nascitur_ordo
|
Chris@10
|
157 #include <windows.h>
|
Chris@10
|
158 #include <process.h>
|
Chris@10
|
159 #undef INT
|
Chris@10
|
160
|
Chris@10
|
161 typedef HANDLE os_mutex_t;
|
Chris@10
|
162
|
Chris@10
|
163 static void os_mutex_init(os_mutex_t *s)
|
Chris@10
|
164 {
|
Chris@10
|
165 *s = CreateMutex(NULL, FALSE, NULL);
|
Chris@10
|
166 }
|
Chris@10
|
167
|
Chris@10
|
168 static void os_mutex_destroy(os_mutex_t *s)
|
Chris@10
|
169 {
|
Chris@10
|
170 CloseHandle(*s);
|
Chris@10
|
171 }
|
Chris@10
|
172
|
Chris@10
|
173 static void os_mutex_lock(os_mutex_t *s)
|
Chris@10
|
174 {
|
Chris@10
|
175 WaitForSingleObject(*s, INFINITE);
|
Chris@10
|
176 }
|
Chris@10
|
177
|
Chris@10
|
178 static void os_mutex_unlock(os_mutex_t *s)
|
Chris@10
|
179 {
|
Chris@10
|
180 ReleaseMutex(*s);
|
Chris@10
|
181 }
|
Chris@10
|
182
|
Chris@10
|
183 typedef HANDLE os_sem_t;
|
Chris@10
|
184
|
Chris@10
|
185 static void os_sem_init(os_sem_t *s)
|
Chris@10
|
186 {
|
Chris@10
|
187 *s = CreateSemaphore(NULL, 0, 0x7FFFFFFFL, NULL);
|
Chris@10
|
188 }
|
Chris@10
|
189
|
Chris@10
|
190 static void os_sem_destroy(os_sem_t *s)
|
Chris@10
|
191 {
|
Chris@10
|
192 CloseHandle(*s);
|
Chris@10
|
193 }
|
Chris@10
|
194
|
Chris@10
|
195 static void os_sem_down(os_sem_t *s)
|
Chris@10
|
196 {
|
Chris@10
|
197 WaitForSingleObject(*s, INFINITE);
|
Chris@10
|
198 }
|
Chris@10
|
199
|
Chris@10
|
200 static void os_sem_up(os_sem_t *s)
|
Chris@10
|
201 {
|
Chris@10
|
202 ReleaseSemaphore(*s, 1, NULL);
|
Chris@10
|
203 }
|
Chris@10
|
204
|
Chris@10
|
205 #define FFTW_WORKER unsigned __stdcall
|
Chris@10
|
206 typedef unsigned (__stdcall *winthread_start) (void *);
|
Chris@10
|
207
|
Chris@10
|
208 static void os_create_thread(winthread_start worker,
|
Chris@10
|
209 void *arg)
|
Chris@10
|
210 {
|
Chris@10
|
211 _beginthreadex((void *)NULL, /* security attrib */
|
Chris@10
|
212 0, /* stack size */
|
Chris@10
|
213 worker, /* start address */
|
Chris@10
|
214 arg, /* parameters */
|
Chris@10
|
215 0, /* creation flags */
|
Chris@10
|
216 (unsigned *)NULL); /* tid */
|
Chris@10
|
217 }
|
Chris@10
|
218
|
Chris@10
|
219 static void os_destroy_thread(void)
|
Chris@10
|
220 {
|
Chris@10
|
221 _endthreadex(0);
|
Chris@10
|
222 }
|
Chris@10
|
223
|
Chris@10
|
224
|
Chris@10
|
225 #else
|
Chris@10
|
226 #error "No threading layer defined"
|
Chris@10
|
227 #endif
|
Chris@10
|
228
|
Chris@10
|
229 /************************************************************************/
|
Chris@10
|
230
|
Chris@10
|
231 /* Main code: */
|
Chris@10
|
232 struct worker {
|
Chris@10
|
233 os_sem_t ready;
|
Chris@10
|
234 os_sem_t done;
|
Chris@10
|
235 struct work *w;
|
Chris@10
|
236 struct worker *cdr;
|
Chris@10
|
237 };
|
Chris@10
|
238
|
Chris@10
|
239 static struct worker *make_worker(void)
|
Chris@10
|
240 {
|
Chris@10
|
241 struct worker *q = (struct worker *)MALLOC(sizeof(*q), OTHER);
|
Chris@10
|
242 os_sem_init(&q->ready);
|
Chris@10
|
243 os_sem_init(&q->done);
|
Chris@10
|
244 return q;
|
Chris@10
|
245 }
|
Chris@10
|
246
|
Chris@10
|
247 static void unmake_worker(struct worker *q)
|
Chris@10
|
248 {
|
Chris@10
|
249 os_sem_destroy(&q->done);
|
Chris@10
|
250 os_sem_destroy(&q->ready);
|
Chris@10
|
251 X(ifree)(q);
|
Chris@10
|
252 }
|
Chris@10
|
253
|
Chris@10
|
254 struct work {
|
Chris@10
|
255 spawn_function proc;
|
Chris@10
|
256 spawn_data d;
|
Chris@10
|
257 struct worker *q; /* the worker responsible for performing this work */
|
Chris@10
|
258 };
|
Chris@10
|
259
|
Chris@10
|
260 static os_mutex_t queue_lock;
|
Chris@10
|
261 static os_sem_t termination_semaphore;
|
Chris@10
|
262
|
Chris@10
|
263 static struct worker *worker_queue;
|
Chris@10
|
264 #define WITH_QUEUE_LOCK(what) \
|
Chris@10
|
265 { \
|
Chris@10
|
266 os_mutex_lock(&queue_lock); \
|
Chris@10
|
267 what; \
|
Chris@10
|
268 os_mutex_unlock(&queue_lock); \
|
Chris@10
|
269 }
|
Chris@10
|
270
|
Chris@10
|
271 static FFTW_WORKER worker(void *arg)
|
Chris@10
|
272 {
|
Chris@10
|
273 struct worker *ego = (struct worker *)arg;
|
Chris@10
|
274 struct work *w;
|
Chris@10
|
275
|
Chris@10
|
276 for (;;) {
|
Chris@10
|
277 /* wait until work becomes available */
|
Chris@10
|
278 os_sem_down(&ego->ready);
|
Chris@10
|
279
|
Chris@10
|
280 w = ego->w;
|
Chris@10
|
281
|
Chris@10
|
282 /* !w->proc ==> terminate worker */
|
Chris@10
|
283 if (!w->proc) break;
|
Chris@10
|
284
|
Chris@10
|
285 /* do the work */
|
Chris@10
|
286 w->proc(&w->d);
|
Chris@10
|
287
|
Chris@10
|
288 /* signal that work is done */
|
Chris@10
|
289 os_sem_up(&ego->done);
|
Chris@10
|
290 }
|
Chris@10
|
291
|
Chris@10
|
292 /* termination protocol */
|
Chris@10
|
293 os_sem_up(&termination_semaphore);
|
Chris@10
|
294
|
Chris@10
|
295 os_destroy_thread();
|
Chris@10
|
296 /* UNREACHABLE */
|
Chris@10
|
297 return 0;
|
Chris@10
|
298 }
|
Chris@10
|
299
|
Chris@10
|
300 static void enqueue(struct worker *q)
|
Chris@10
|
301 {
|
Chris@10
|
302 WITH_QUEUE_LOCK({
|
Chris@10
|
303 q->cdr = worker_queue;
|
Chris@10
|
304 worker_queue = q;
|
Chris@10
|
305 });
|
Chris@10
|
306 }
|
Chris@10
|
307
|
Chris@10
|
308 static struct worker *dequeue(void)
|
Chris@10
|
309 {
|
Chris@10
|
310 struct worker *q;
|
Chris@10
|
311
|
Chris@10
|
312 WITH_QUEUE_LOCK({
|
Chris@10
|
313 q = worker_queue;
|
Chris@10
|
314 if (q)
|
Chris@10
|
315 worker_queue = q->cdr;
|
Chris@10
|
316 });
|
Chris@10
|
317
|
Chris@10
|
318 if (!q) {
|
Chris@10
|
319 /* no worker is available. Create one */
|
Chris@10
|
320 q = make_worker();
|
Chris@10
|
321 os_create_thread(worker, q);
|
Chris@10
|
322 }
|
Chris@10
|
323
|
Chris@10
|
324 return q;
|
Chris@10
|
325 }
|
Chris@10
|
326
|
Chris@10
|
327
|
Chris@10
|
328 static void kill_workforce(void)
|
Chris@10
|
329 {
|
Chris@10
|
330 struct work w;
|
Chris@10
|
331
|
Chris@10
|
332 w.proc = 0;
|
Chris@10
|
333
|
Chris@10
|
334 THREAD_ON; /* needed for debugging mode: since make_worker
|
Chris@10
|
335 is called from dequeue which is only called in
|
Chris@10
|
336 thread_on mode, we need to unmake_worker in thread_on. */
|
Chris@10
|
337 WITH_QUEUE_LOCK({
|
Chris@10
|
338 /* tell all workers that they must terminate.
|
Chris@10
|
339
|
Chris@10
|
340 Because workers enqueue themselves before signaling the
|
Chris@10
|
341 completion of the work, all workers belong to the worker queue
|
Chris@10
|
342 if we get here. Also, all workers are waiting at
|
Chris@10
|
343 os_sem_down(ready), so we can hold the queue lock without
|
Chris@10
|
344 deadlocking */
|
Chris@10
|
345 while (worker_queue) {
|
Chris@10
|
346 struct worker *q = worker_queue;
|
Chris@10
|
347 worker_queue = q->cdr;
|
Chris@10
|
348 q->w = &w;
|
Chris@10
|
349 os_sem_up(&q->ready);
|
Chris@10
|
350 os_sem_down(&termination_semaphore);
|
Chris@10
|
351 unmake_worker(q);
|
Chris@10
|
352 }
|
Chris@10
|
353 });
|
Chris@10
|
354 THREAD_OFF;
|
Chris@10
|
355 }
|
Chris@10
|
356
|
Chris@10
|
357 int X(ithreads_init)(void)
|
Chris@10
|
358 {
|
Chris@10
|
359 os_mutex_init(&queue_lock);
|
Chris@10
|
360 os_sem_init(&termination_semaphore);
|
Chris@10
|
361
|
Chris@10
|
362 WITH_QUEUE_LOCK({
|
Chris@10
|
363 worker_queue = 0;
|
Chris@10
|
364 })
|
Chris@10
|
365
|
Chris@10
|
366 return 0; /* no error */
|
Chris@10
|
367 }
|
Chris@10
|
368
|
Chris@10
|
369 /* Distribute a loop from 0 to loopmax-1 over nthreads threads.
|
Chris@10
|
370 proc(d) is called to execute a block of iterations from d->min
|
Chris@10
|
371 to d->max-1. d->thr_num indicate the number of the thread
|
Chris@10
|
372 that is executing proc (from 0 to nthreads-1), and d->data is
|
Chris@10
|
373 the same as the data parameter passed to X(spawn_loop).
|
Chris@10
|
374
|
Chris@10
|
375 This function returns only after all the threads have completed. */
|
Chris@10
|
376 void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data)
|
Chris@10
|
377 {
|
Chris@10
|
378 int block_size;
|
Chris@10
|
379 struct work *r;
|
Chris@10
|
380 int i;
|
Chris@10
|
381
|
Chris@10
|
382 A(loopmax >= 0);
|
Chris@10
|
383 A(nthr > 0);
|
Chris@10
|
384 A(proc);
|
Chris@10
|
385
|
Chris@10
|
386 if (!loopmax) return;
|
Chris@10
|
387
|
Chris@10
|
388 /* Choose the block size and number of threads in order to (1)
|
Chris@10
|
389 minimize the critical path and (2) use the fewest threads that
|
Chris@10
|
390 achieve the same critical path (to minimize overhead).
|
Chris@10
|
391 e.g. if loopmax is 5 and nthr is 4, we should use only 3
|
Chris@10
|
392 threads with block sizes of 2, 2, and 1. */
|
Chris@10
|
393 block_size = (loopmax + nthr - 1) / nthr;
|
Chris@10
|
394 nthr = (loopmax + block_size - 1) / block_size;
|
Chris@10
|
395
|
Chris@10
|
396 THREAD_ON; /* prevent debugging mode from failing under threads */
|
Chris@10
|
397 STACK_MALLOC(struct work *, r, sizeof(struct work) * nthr);
|
Chris@10
|
398
|
Chris@10
|
399 /* distribute work: */
|
Chris@10
|
400 for (i = 0; i < nthr; ++i) {
|
Chris@10
|
401 struct work *w = &r[i];
|
Chris@10
|
402 spawn_data *d = &w->d;
|
Chris@10
|
403
|
Chris@10
|
404 d->max = (d->min = i * block_size) + block_size;
|
Chris@10
|
405 if (d->max > loopmax)
|
Chris@10
|
406 d->max = loopmax;
|
Chris@10
|
407 d->thr_num = i;
|
Chris@10
|
408 d->data = data;
|
Chris@10
|
409 w->proc = proc;
|
Chris@10
|
410
|
Chris@10
|
411 if (i == nthr - 1) {
|
Chris@10
|
412 /* do the work ourselves */
|
Chris@10
|
413 proc(d);
|
Chris@10
|
414 } else {
|
Chris@10
|
415 /* assign a worker to W */
|
Chris@10
|
416 w->q = dequeue();
|
Chris@10
|
417
|
Chris@10
|
418 /* tell worker w->q to do it */
|
Chris@10
|
419 w->q->w = w; /* Dirac could have written this */
|
Chris@10
|
420 os_sem_up(&w->q->ready);
|
Chris@10
|
421 }
|
Chris@10
|
422 }
|
Chris@10
|
423
|
Chris@10
|
424 for (i = 0; i < nthr - 1; ++i) {
|
Chris@10
|
425 struct work *w = &r[i];
|
Chris@10
|
426 os_sem_down(&w->q->done);
|
Chris@10
|
427 enqueue(w->q);
|
Chris@10
|
428 }
|
Chris@10
|
429
|
Chris@10
|
430 STACK_FREE(r);
|
Chris@10
|
431 THREAD_OFF; /* prevent debugging mode from failing under threads */
|
Chris@10
|
432 }
|
Chris@10
|
433
|
Chris@10
|
434 void X(threads_cleanup)(void)
|
Chris@10
|
435 {
|
Chris@10
|
436 kill_workforce();
|
Chris@10
|
437 os_mutex_destroy(&queue_lock);
|
Chris@10
|
438 os_sem_destroy(&termination_semaphore);
|
Chris@10
|
439 }
|