filters_host.cpp
Go to the documentation of this file.
1 /*
2  GPU multi-rate FIR filter bank example software
3 
4  Oxford e-Research Centre, Oxford University
5 
6  Centre for Digital Music, Queen Mary, University of London.
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License,
11  or (at your option) any later version.
12  See the file COPYING included with this distribution for more information.
13 */
14 
15 #include <iostream>
16 #include <cstdio>
17 #include <cstdlib>
18 #include <cmath>
19 
20 #include <getopt.h>
21 #include <sys/time.h>
22 #include <omp.h>
23 
24 #include "filters.h"
25 
27 // CPU routines
29 
30 void filterCpuFir(float* out, float* in, filter_arrays *farr, int n, int nf)
41 {
42  // Initialize variables and buffers
43  int m_offb = farr->m_offb[nf];
44  int m_sz = B_SIZE;
45 
46  //main loop
47  for (int s = 0; s < n; ++s) {
48  if (m_offb > 0) --m_offb;
49  else {
50  for (int i = m_sz - 2; i >= 0; --i) {
51  farr->buf_in[nf][i + OFFSET + 1] = farr->buf_in[nf][i];
52  }
53  m_offb = OFFSET;
54  }
55  farr->buf_in[nf][m_offb] = in[s];
56 
57  float b_sum = 0.0f;
58  for (int i = 0; i < m_sz; ++i) {
59  b_sum += farr->bk[nf*m_sz+i] * farr->buf_in[nf][i + m_offb];
60  }
61 
62  out[s] = b_sum;
63  }
64  farr->m_offb[nf] = m_offb;
65 
66 }
67 
68 
69 void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
70 {
82  int oindex = 0;
83  int pos[MAX_RATES];
84  int sum = 0;
85  struct timeval t_start, t_end;
86  double t_cpu = 0.0;
87 
88  int numrates = gparams->nrates;
89 
90  pos[0] = 0;
91  for (int r=1; r<numrates; r++){
92  sum+= gparams->rnumf[r-1];
93  pos[r] = sum;
94  }
95 
96  //start test
97  if (args->tim){
98  gettimeofday(&t_start, NULL);
99  }
100 
101  for (int ii=0; ii<N; ++ii){ // loop through input blocks
102 
103  for (int i=0; i<gparams->nrates; ++i){ //through number of resampled per block
104  oindex = ii* gparams->rnumf[i]*gpuarrays->osize[i];
105 
106  for (int f=0; f< gparams->rnumf[i]; ++f){ // through filters per resampled
107  // run one process per filter serially
108  filterCpuFir(&h_reference[i][oindex + f*gpuarrays->osize[i]], &h_in[i][ii*gpuarrays->osize[i] + gparams->fsize ], farr, gpuarrays->osize[i], pos[i] + f);
109  }
110  }
111  }
112 
113  if (args->tim){
114  gettimeofday(&t_end, NULL);
115  t_cpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
116 
117  printf("\nFinished host single CPU FIR\nProcessing with single CPU took: %f ms\n", t_cpu/(double)N);
118 
119  }
120 }
121 
122 
123 void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
134 {
135  int pos[MAX_RATES];
136  int sum = 0;
137  int oindex = 0;
138  int tid;
139  int numrates = gparams->nrates;
140 
141  struct timeval t_start, t_end;
142  double t_mcpu = 0.0;
143 
144  pos[0] = 0;
145  for (int r=1; r<numrates; r++){
146  sum+= gparams->rnumf[r-1];
147  pos[r] = sum;
148  }
149 
150  omp_set_dynamic(0);
151  int nthreads = omp_get_max_threads();
152  printf("max omp threads: %d\n", nthreads);
153  omp_set_num_threads(nthreads);
154 
155  if (args->tim)
156  gettimeofday(&t_start, NULL);
157 
158  for (int ii=0; ii<N; ++ii){ // loop through input blocks
159  for (int i=0; i<numrates; i++){
160  int nfilters = gparams->rnumf[i];
161  int o_sz = gpuarrays->osize[i];
162  int fpos = pos[i];
163  int ratecount = i;
164  int blockcount = ii;
165  int fsize = gparams->fsize;
166 
167 #pragma omp parallel for firstprivate(nfilters, o_sz, fpos, ratecount, blockcount, fsize )
168 
169  for (int f=0; f<nfilters; ++f){
170 
171  filterCpuFir(&h_reference[ratecount][ blockcount*nfilters*o_sz + f*o_sz], &h_in[ratecount][blockcount*o_sz + fsize], farr, o_sz, fpos + f);
172 
173 
174  }
175  }
176 
177  }
178 
179  if (args->tim){
180  gettimeofday(&t_end, NULL);
181  t_mcpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
182 
183  printf("Finished host multi core CPU FIR\nProcessing OPENMP took: %f ms\n", t_mcpu/(double)N);
184 
185  }
186 }
187 
188 void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N)
199 {
200  float err=0.0;
201  float diff, ref;
202  int oindex = 0;
203  int numrates = gparams->nrates;
204 
205  //this loop helps with debugging
206  for (int b = 0; b < N; ++b) { //loop through input blocks
207 
208  for (int i = 0; i < numrates; ++i) { //loop through resampled
209  oindex = b* gparams->rnumf[i]*gpuarrays->osize[i];
210 
211  for (int f = 0; f < gparams->rnumf[i]; ++f) { // and individual filters
212 
213  for (int p = 0; p < gpuarrays->osize[i]; ++p) { //and points per filter per block
214  diff = h_out[i][ oindex + f*gpuarrays->osize[i] + p ] - h_reference[i][ oindex + f*gpuarrays->osize[i] + p ];
215  err += diff*diff;
216  ref += h_reference[i][ oindex + f*gpuarrays->osize[i] + p] * h_reference[i][ oindex + f*gpuarrays->osize[i] + p ];
217  }
218  }
219  }
220 
221  }
222 
223  float normref = sqrtf(ref);
224  float normerr = sqrtf(err);
225  err = normerr / normref;
226 
227  printf("\nL2 error = %f\n\n",err);
228 
229 }
230 
231 void read_command_line(int argc, char *argv[], cmd_args *args)
235 {
236  static struct option long_options[] = {
237  /* name has_arg flag val */
238  /* ------------------------------------------ */
239  {"help", no_argument, NULL, 0 },
240  {"nrates", required_argument, NULL, 1 },
241  {"nf", required_argument, NULL, 2 },
242  {"insize", required_argument, NULL, 3 },
243  {"rconst", no_argument, NULL, 4 },
244  {"tim", no_argument, NULL, 5 },
245  { 0, 0, 0, 0 }
246  };
247 
248  int long_index =0;
249  int opt;
250 
251  while ((opt = getopt_long_only(argc, argv,"", long_options, &long_index )) != -1) {
252  switch (opt) {
253 
254  case 0:
255  print_usage();
256  exit(EXIT_FAILURE);
257 
258  case 1 :
259  args->nrates = atoi(optarg);
260  break;
261 
262  case 2 :
263  args->nf = atoi(optarg);
264  break;
265 
266  case 3 :
267  args->insize = atoi(optarg);
268  break;
269 
270  case 4 :
271  args->rconst = 1;
272  break;
273 
274  case 5 :
275  args->tim = 1;
276  break;
277 
278  default:
279  exit(EXIT_FAILURE);
280  }
281  }
282 
283  if (optind < argc || optind == 1) {
284  print_usage();
285 
286  printf("\n\nNo arguments given, run with default values? (y/n): ");
287  while (1)
288  {
289  int c, answer;
290 
291  /* Read the first character of the line.
292  This should be the answer character, but might not be. */
293  c = tolower (fgetc (stdin));
294  answer = c;
295  /* Discard rest of input line. */
296  while (c != '\n' && c != EOF)
297  c = fgetc (stdin);
298  /* Obey the answer if it was valid. */
299  if (answer == 'y'){
300  printf("\nRunning with default values\n");
301  break;
302  }
303  if (answer == 'n'){
304  printf("\nAborting.\n\n");
305  exit(EXIT_SUCCESS);
306  }
307  /* Answer was invalid: ask for valid answer. */
308  fputs ("Please answer y or n: ", stdout);
309  }
310  }
311 
312 }
313 
318 {
319  printf("\nUsage: ./filter [-nrates <n> -nf <n> -insize <n> -rconst -tim]\n-------\n");
320  printf("\n\t-nrates [n]\tNumber of sampling rates (default 3)\n");
321  printf("\n\t-nf [n]\t\tTotal number of filters (default 60)\n");
322  printf("\n\t-rconst\t\tUse constant rate equal to input size (default no)\n");
323  printf("\n\t-insize [n]\tSize of input block (default 1024)\n");
324 
325  printf("\n\t-tim\t\tMeasure execution time (default yes)\n\n");
326 }
int rnumf[MAX_RATES]
number of filters for each sampling rate
Definition: filters.h:42
float * bk
filter coefficients array
Definition: filters.h:52
void filterCpuFir(float *out, float *in, filter_arrays *farr, int n, int nf)
float buf_in[MAX_FILTERS][B_SIZE+OFFSET]
CPU buffer.
Definition: filters.h:49
int fsize
filter size
Definition: filters.h:40
int tim
time process
Definition: filters.h:72
int nrates
how many sampling rates to process
Definition: filters.h:68
void compute_ref(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N)
int rconst
for nrates=1 keep the initial input size
Definition: filters.h:71
int nrates
number of input sampling rates to process
Definition: filters.h:41
int m_offb[MAX_FILTERS]
offset counter for CPU buffers
Definition: filters.h:51
int nf
total number of filters to process
Definition: filters.h:69
void print_usage()
int insize
input size before resampling
Definition: filters.h:70
#define MAX_RATES
Maximum number of sampling rates.
Definition: filters.h:34
Definition: filters.h:38
#define OFFSET
Offset for CPU filter input buffer.
Definition: filters.h:32
#define B_SIZE
filter length
Definition: filters.h:28
void read_command_line(int argc, char *argv[], cmd_args *args)
int osize[MAX_RATES]
total output size for each set of filters
Definition: filters.h:60
void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)