filtermain.cpp
Go to the documentation of this file.
1 /*
2  GPU multi-rate FIR filter bank example software
3 
4  Oxford e-Research Centre, Oxford University
5 
6  Centre for Digital Music, Queen Mary, University of London.
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License,
11  or (at your option) any later version.
12  See the file COPYING included with this distribution for more information.
13 */
14 
15 #include <cstdio>
16 #include <cstdlib>
17 #include <cstring>
18 
19 #include <sys/time.h>
20 #include "filters.h"
21 
23 // Program main
25 
26 using namespace std;
27 
28 
29 int main( int argc, char** argv)
30 {
31  unsigned int N = 100; // total number of input blocks before resampling
32 
33  int bufb_size = B_SIZE + OFFSET;
34 
35 // the total input signal to loop through is
36 // essentially used to check results and provide multiple
37 // block iterations.
38 
39  float *h_in[MAX_RATES], *h_out[MAX_RATES], *h_reference[MAX_RATES]; // input and output arrays
40  int out_sz[MAX_RATES]; //total output size for all filters
41 
42  //timing
43  struct timeval t_start, t_end;
44  double t_gpu = 0.0, t_mcpu = 0.0;
45 
46  // parameters, filter and GPU arrays structures
47  // object pointers can be passed to these for C++ code
48  filter_arrays farr;
49  params gparams;
50  gpu_arrays gpuarrays;
51  int ratenumf[MAX_RATES];
52 
53  // command line arguments
54  cmd_args args;
55 
56  //deaults
57  args.nrates = 3;
58  args.nf = 60;
59  args.insize = 1024;
60  args.rconst = 0;
61  args.tim=1;
62 
63  read_command_line(argc, argv, &args);
64 
65  //initialise parameters
66  int nf = args.nf;
67  int numrates = args.nrates;
68  int rem= nf%numrates;
69 
70  printf("\nGPU FIR filter parameters\n------\n");
71  printf("\nTotal number of input blocks = %d\n", N);
72  gparams.nfilters = nf; // not sure if this is needed
73  printf("\nTotal number of filters = %d\n", gparams.nfilters);
74  gparams.fsize = B_SIZE;
75  printf("\nFilter size = %d\n", gparams.fsize);
76  gparams.nrates = numrates;
77  printf("\nNumber of sampling rates = %d\n", gparams.nrates);
78  gparams.streams = 1;
79  printf("\nCUDA streams flag = %d\n", gparams.streams);
80 
81  //dividing sampling rates equally...
82  for (int i=0; i< numrates; ++i){
83  ratenumf[i] = nf/numrates;
84  }
85  if (rem > 0){
86  for (int i=0; i<rem; ++i)
87  ratenumf[i]++;
88  }
89 
90  int in_sz = args.insize;
91  if (numrates == 1 && args.rconst==1){
92  gpuarrays.osize[0] = in_sz;
93  }
94  else{
95  for (int i=0; i< numrates; ++i){
96  gpuarrays.osize[i] = in_sz/rdiv[i];
97  if (gpuarrays.osize[i] < B_SIZE ){
98  printf("\n\nInput size for rate %d is shorter than filter size.\nChose a longer input block or a shorter filter.\n", i);
99  printf("\nFilter size = %d\n", B_SIZE);
100  printf("\nDecimated size = %d\n", gpuarrays.osize[i]);
101  printf("\nInitial input block size = %d\n", in_sz);
102  printf("\nDecimation factor = %d\n\n", rdiv[i]);
103  exit(EXIT_FAILURE);
104  }
105  }
106  }
107 
108  int out_blk_sz = 0;
109 
110  for (int i=0; i<numrates; i++){
111  gparams.rnumf[i] = ratenumf[i];
112  printf("\nNumber of filters for rate %d = %d\n", i, gparams.rnumf[i]);
113  gpuarrays.isize[i] = gpuarrays.osize[i] + gparams.fsize;
114  printf("\nGPU Input size for rate %d = %d\n", i, gpuarrays.isize[i]);
115  out_sz[i] = N * gpuarrays.osize[i];
116  out_blk_sz +=ratenumf[i] *gpuarrays.osize[i];
117  printf("\nGPU output size for 1 block for this rate: %d\n", ratenumf[i] *gpuarrays.osize[i]);
118  }
119  printf("\ntotal output size for 1 block for all rates: %d\n", out_blk_sz);
120  printf("----------------\n");
121 
122  int oindex = 0;
123  int pos[MAX_RATES];
124  int sum = 0;
125  pos[0] = 0;
126  for (int r=1; r<numrates; r++){
127  sum+=ratenumf[r-1];
128  pos[r] = sum;
129  }
130 
131  // Initialize arrays
132  for (int i=0; i<numrates; i++){
133  h_in[i] = (float*) malloc((gpuarrays.osize[i]*N + gparams.fsize)*sizeof(float));
134 
135  h_out[i] = (float*) malloc(gpuarrays.osize[i]*ratenumf[i]*N*sizeof(float));
136 
137  h_reference[i] = (float*) malloc(gpuarrays.osize[i]*ratenumf[i]*N*sizeof(float));
138 
139  for (int n=0; n<gparams.fsize; ++n)
140  h_in[i][n] = 0.0f;
141 
142  for (int n=0; n<gpuarrays.osize[i]*N; ++n)
143  h_in[i][n + gparams.fsize] = rand() / (float)RAND_MAX;
144 
145  for (int n=0; n<gpuarrays.osize[i]*ratenumf[i]*N; ++n){
146  h_out[i][n] = 0.0f;
147  h_reference[i][n] = 0.0f;
148  }
149  }
150 
151  // initialize filters with random numbers
152  farr.bk = (float*) malloc(nf*gparams.fsize*sizeof(float));
153  for (int i=0; i < nf; ++i){
154  farr.m_offb[i] = OFFSET;
155  for (int b=0; b < gparams.fsize; ++b)
156  farr.bk[i*gparams.fsize+b] = rand() / (float)RAND_MAX;
157  }
158 
159  //initialize cpu buffers for each filter
160  for (int f=0; f < nf; ++f){
161  for (int i=0; i < bufb_size ; i++){
162  farr.buf_in[f][i] = 0.0;
163  }
164  }
165 
166  printf("\nRunning multirate filter bank test on GPU, CPU, and CPU OpenMP\n");
167  // printf("----------------\n");
168  // compute reference solution
169  printf("\ncompute reference solution (1 CPU)\n");
170 
171  compute_ref( h_in, h_reference, &gpuarrays, &gparams, &args, &farr, N);
172 
173  //reset CPU buffers
174  for (int f=0; f < nf; ++f){
175  farr.m_offb[f] = OFFSET;
176  for (int i=0; i < bufb_size ; ++i)
177  farr.buf_in[f][i] = 0.0;
178  }
179 
180 // compute OPENMP solution
181  printf("\nCompute OPENMP solution\n");
182 
183  compute_omp( h_in, h_reference, &gpuarrays, &gparams, &args, &farr, N);
184 
185  // compute CUDA solution
186  printf("\nCompute cuda solution\n");
187 
188 // init
189  cudaMultiFilterFirInit(h_in, h_out, farr.bk, &gparams, &gpuarrays);
190  for (int i=0; i<numrates; i++)
191  memcpy(gpuarrays.h_in[i], &h_in[i][0], gpuarrays.isize[i]*sizeof(float) );
192  //time execution
193  oindex = 0;
194  if (args.tim)
195  gettimeofday(&t_start, NULL);
196 
197  for (int ii=0; ii<N; ++ii){ // loop through input blocks
198 
199  //need to copy data to pinned memory for streams...
200  for (int i=0; i<numrates; i++){
201  if (ii==0)
202  memcpy(gpuarrays.h_in[i], &h_in[i][0], gpuarrays.isize[i]*sizeof(float) );
203  else
204  memcpy(gpuarrays.h_in[i], &h_in[i][ii*gpuarrays.osize[i]], gpuarrays.isize[i]*sizeof(float) );
205 }
206 
207  //call GPU function
208  cudaMultiFilterFirStreams(&gpuarrays, &gparams);
209 
210  // ... and copy data back from pinned memory...
211  for (int i=0; i<numrates; i++){
212  oindex = ii*ratenumf[i]*gpuarrays.osize[i];
213  cudaDeviceSynchronize();
214  memcpy(&h_out[i][oindex], gpuarrays.h_out[i], gpuarrays.osize[i]* ratenumf[i] *sizeof(float) );
215  }
216  }
217 
218  if (args.tim){
219  cudaDeviceSynchronize();
220  gettimeofday(&t_end, NULL);
221  t_gpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
222  printf("Finished GPU FIR\nProcessing gpu took: %f ms\n", t_gpu/(double)N);
223  }
224 
225  // check results
226  check_results(h_reference, h_out, &gpuarrays, &gparams, N);
227 
228  //close GPU
229  cudaMultiFilterFirClose(&gparams, &gpuarrays);
230 
231  // cleanup memory
232  for (int i = 0; i < numrates; ++i){
233  free(h_out[i]);
234  free(h_in[i]);
235  free(h_reference[i]);
236  }
237 
238  free(farr.bk);
239 
240 }
static int rdiv[MAX_RATES]
Decimation factors for multiple input rates.
Definition: filters.h:75
int rnumf[MAX_RATES]
number of filters for each sampling rate
Definition: filters.h:42
float * bk
filter coefficients array
Definition: filters.h:52
int streams
use streams?
Definition: filters.h:43
void read_command_line(int argc, char *argv[], cmd_args *args)
float buf_in[MAX_FILTERS][B_SIZE+OFFSET]
CPU buffer.
Definition: filters.h:49
int fsize
filter size
Definition: filters.h:40
float * h_in[MAX_RATES]
host input arrays for GPU transfer
Definition: filters.h:61
int tim
time process
Definition: filters.h:72
int nrates
how many sampling rates to process
Definition: filters.h:68
void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
int rconst
for nrates=1 keep the initial input size
Definition: filters.h:71
int nrates
number of input sampling rates to process
Definition: filters.h:41
int m_offb[MAX_FILTERS]
offset counter for CPU buffers
Definition: filters.h:51
int isize[MAX_RATES]
input size for each sampling rate
Definition: filters.h:58
int nf
total number of filters to process
Definition: filters.h:69
int main(int argc, char **argv)
Definition: filtermain.cpp:29
int nfilters
total number of filters
Definition: filters.h:39
void compute_ref(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
int insize
input size before resampling
Definition: filters.h:70
void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N)
#define MAX_RATES
Maximum number of sampling rates.
Definition: filters.h:34
Definition: filters.h:38
#define OFFSET
Offset for CPU filter input buffer.
Definition: filters.h:32
#define B_SIZE
filter length
Definition: filters.h:28
int osize[MAX_RATES]
total output size for each set of filters
Definition: filters.h:60
float * h_out[MAX_RATES]
host output arrays for GPU transfer
Definition: filters.h:62