sofia@0
|
1 /*
|
sofia@0
|
2 GPU multi-rate FIR filter bank example software
|
sofia@0
|
3
|
sofia@0
|
4 Oxford e-Research Centre, Oxford University
|
sofia@0
|
5
|
sofia@0
|
6 Centre for Digital Music, Queen Mary, University of London.
|
sofia@0
|
7
|
sofia@0
|
8 This program is free software: you can redistribute it and/or modify
|
sofia@0
|
9 it under the terms of the GNU General Public License as published by
|
sofia@0
|
10 the Free Software Foundation, either version 3 of the License,
|
sofia@0
|
11 or (at your option) any later version.
|
sofia@0
|
12 See the file COPYING included with this distribution for more information.
|
sofia@0
|
13 */
|
sofia@0
|
14
|
sofia@0
|
15
|
sofia@0
|
16 #ifndef FILTERS_H
|
sofia@0
|
17 #define FILTERS_H
|
sofia@0
|
18
|
sofia@0
|
19 #include <cuda_runtime.h>
|
sofia@0
|
20
|
sofia@0
|
21 // Filter parameters
|
sofia@0
|
22
|
sofia@0
|
23 // use shared memory GPU kernel?
|
sofia@0
|
24 // it appears to be faster and can be used with long filters
|
sofia@0
|
25 #define CUDA_SHM 1 //!< Use shared memory GPU kernel
|
sofia@0
|
26
|
sofia@0
|
27 #define SIZE_MUL 1 //!< Multiplier of 11 to derive filter length
|
sofia@0
|
28 #define B_SIZE 11 * SIZE_MUL //!< filter length
|
sofia@0
|
29
|
sofia@0
|
30 #define A_SIZE (B_SIZE - 1) //!< For IIR filter. not used currently
|
sofia@0
|
31
|
sofia@0
|
32 #define OFFSET 20 //!< Offset for CPU filter input buffer
|
sofia@0
|
33
|
sofia@0
|
34 #define MAX_RATES 10 //!< Maximum number of sampling rates
|
sofia@0
|
35 #define MAX_FILTERS 1024 //!< Maximum number of filters
|
sofia@0
|
36
|
sofia@0
|
37 //general params
|
sofia@0
|
38 typedef struct{
|
sofia@0
|
39 int nfilters; //!< total number of filters
|
sofia@0
|
40 int fsize; //!< filter size
|
sofia@0
|
41 int nrates; //!< number of input sampling rates to process
|
sofia@0
|
42 int rnumf[MAX_RATES]; //!< number of filters for each sampling rate
|
sofia@0
|
43 int streams; //!< use streams?
|
sofia@0
|
44 } params;
|
sofia@0
|
45
|
sofia@0
|
46 //structs of arrays preferred to allow contiguous memory accesses.
|
sofia@0
|
47
|
sofia@0
|
48 typedef struct{
|
sofia@0
|
49 float buf_in[MAX_FILTERS][B_SIZE + OFFSET]; //!< CPU buffer
|
sofia@0
|
50 float buf_out[MAX_FILTERS][A_SIZE + OFFSET]; //!< for IIR filter
|
sofia@0
|
51 int m_offb[MAX_FILTERS]; //!< offset counter for CPU buffers
|
sofia@0
|
52 float *bk; //!< filter coefficients array
|
sofia@0
|
53 } filter_arrays;
|
sofia@0
|
54
|
sofia@0
|
55 typedef struct{
|
sofia@0
|
56 float *d_filters[MAX_RATES]; //!< filters to be stored on the GPU
|
sofia@0
|
57 float *d_in[MAX_RATES]; //!< GPU input array for each sampling rate
|
sofia@0
|
58 int isize[MAX_RATES]; //!< input size for each sampling rate
|
sofia@0
|
59 float *d_out[MAX_RATES]; //!< GPU output array for each sampling rate
|
sofia@0
|
60 int osize[MAX_RATES]; //!< total output size for each set of filters
|
sofia@0
|
61 float *h_in[MAX_RATES]; //!< host input arrays for GPU transfer
|
sofia@0
|
62 float *h_out[MAX_RATES]; //!< host output arrays for GPU transfer
|
sofia@0
|
63 cudaStream_t stream[MAX_RATES]; //!< CUDA stream objects
|
sofia@0
|
64 } gpu_arrays;
|
sofia@0
|
65
|
sofia@0
|
66 // command line arguments
|
sofia@0
|
67 typedef struct {
|
sofia@0
|
68 int nrates; //!< how many sampling rates to process
|
sofia@0
|
69 int nf; //!< total number of filters to process
|
sofia@0
|
70 int insize; //!< input size before resampling
|
sofia@0
|
71 int rconst; //!< for nrates=1 keep the initial input size
|
sofia@0
|
72 int tim; //!< time process
|
sofia@0
|
73 }cmd_args;
|
sofia@0
|
74
|
sofia@0
|
75 static int rdiv[MAX_RATES] = {2, 10, 50, 4, 16, 32, 8, 24, 36, 42}; //!<Decimation factors for multiple input rates
|
sofia@0
|
76
|
sofia@0
|
77 // host cuda prototypes
|
sofia@0
|
78 void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N);
|
sofia@0
|
79
|
sofia@0
|
80 void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N);
|
sofia@0
|
81
|
sofia@0
|
82 void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N);
|
sofia@0
|
83
|
sofia@0
|
84 void read_command_line(int argc, char *argv[], cmd_args *args);
|
sofia@0
|
85
|
sofia@0
|
86 void print_usage();
|
sofia@0
|
87
|
sofia@0
|
88 #ifdef __cplusplus
|
sofia@0
|
89
|
sofia@0
|
90 extern "C" {
|
sofia@0
|
91
|
sofia@0
|
92 void cudaMultiFilterFirStreams(gpu_arrays *arrays, params *params);
|
sofia@0
|
93
|
sofia@0
|
94 void cudaMultiFilterFirInit(float **in, float **out, float *filters, params *params, gpu_arrays *arrays);
|
sofia@0
|
95
|
sofia@0
|
96 void cudaMultiFilterFirClose(params *params, gpu_arrays *arrays);
|
sofia@0
|
97 }
|
sofia@0
|
98
|
sofia@0
|
99 #endif
|
sofia@0
|
100
|
sofia@0
|
101 #endif // FILTERS_H
|