43   int m_offb = farr->
m_offb[nf];
    47   for (
int s = 0; s < n; ++s) {
    48     if (m_offb > 0) --m_offb;
    50       for (
int i = m_sz - 2; i >= 0; --i) {
    55     farr->
buf_in[nf][m_offb] = in[s];
    58     for (
int i = 0; i < m_sz; ++i) {
    59             b_sum += farr->
bk[nf*m_sz+i] * farr->
buf_in[nf][i + m_offb];
    85   struct timeval t_start, t_end;
    88   int numrates = gparams->
nrates;
    91   for (
int r=1; r<numrates; r++){
    92     sum+= gparams->
rnumf[r-1];
    98     gettimeofday(&t_start, NULL);
   101   for (
int ii=0; ii<N; ++ii){ 
   103     for (
int i=0; i<gparams->
nrates; ++i){  
   104       oindex = ii* gparams->
rnumf[i]*gpuarrays->
osize[i];
   106       for (
int f=0; f< gparams->
rnumf[i]; ++f){ 
   114     gettimeofday(&t_end, NULL); 
   115     t_cpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0)  - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0; 
   117     printf(
"\nFinished host single CPU FIR\nProcessing with single CPU took: %f ms\n", t_cpu/(
double)N);
   139   int numrates = gparams->
nrates;
   141   struct timeval t_start, t_end;
   145   for (
int r=1; r<numrates; r++){
   146     sum+= gparams->
rnumf[r-1];
   151   int nthreads =  omp_get_max_threads();
   152   printf(
"max omp threads: %d\n", nthreads);
   153   omp_set_num_threads(nthreads);
   156     gettimeofday(&t_start, NULL);
   158   for (
int ii=0; ii<N; ++ii){ 
   159     for (
int i=0; i<numrates; i++){  
   160       int nfilters =  gparams->
rnumf[i];
   161       int o_sz = gpuarrays->
osize[i];
   165       int fsize = gparams->
fsize;
   167 #pragma omp parallel  for  firstprivate(nfilters, o_sz, fpos, ratecount, blockcount, fsize )    169       for (
int f=0; f<nfilters; ++f){ 
   171     filterCpuFir(&h_reference[ratecount][ blockcount*nfilters*o_sz + f*o_sz], &h_in[ratecount][blockcount*o_sz + fsize], farr, o_sz, fpos + f); 
   180     gettimeofday(&t_end, NULL); 
   181     t_mcpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0)  - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0; 
   183     printf(
"Finished host multi core CPU FIR\nProcessing OPENMP took: %f ms\n", t_mcpu/(
double)N);
   203   int numrates = gparams->
nrates;
   206   for (
int b = 0; b < N; ++b) { 
   208     for (
int i = 0; i < numrates; ++i) { 
   209       oindex = b* gparams->
rnumf[i]*gpuarrays->
osize[i];
   211       for (
int f = 0; f <  gparams->
rnumf[i]; ++f) { 
   213     for (
int p = 0; p < gpuarrays->
osize[i]; ++p) { 
   214       diff = h_out[i][ oindex + f*gpuarrays->
osize[i] + p ] - h_reference[i][ oindex +  f*gpuarrays->
osize[i] + p ];
   216       ref += h_reference[i][ oindex + f*gpuarrays->
osize[i] + p] * h_reference[i][ oindex + f*gpuarrays->
osize[i] + p ];
   223   float normref = sqrtf(ref);
   224   float normerr = sqrtf(err);
   225   err = normerr / normref;
   227   printf(
"\nL2 error  = %f\n\n",err);
   236   static struct option long_options[] = {
   239     {
"help", no_argument,   NULL,   0 },
   240     {
"nrates", required_argument,   NULL,   1 },
   241     {
"nf", required_argument,   NULL,   2 },
   242     {
"insize", required_argument,   NULL,   3 },
   243     {
"rconst", no_argument,   NULL,   4 },
   244     {
"tim", no_argument,   NULL,   5 },
   251   while ((opt = getopt_long_only(argc, argv,
"", long_options, &long_index )) != -1) {
   259       args->
nrates = atoi(optarg); 
   263       args->
nf = atoi(optarg); 
   267       args->
insize = atoi(optarg); 
   283   if (optind < argc || optind == 1) {
   286     printf(
"\n\nNo arguments given, run with default values? (y/n): ");
   293     c = tolower (fgetc (stdin));
   296     while (c != 
'\n' && c != EOF)
   300       printf(
"\nRunning with default values\n");
   304       printf(
"\nAborting.\n\n");
   308     fputs (
"Please answer y or n: ", stdout);
   319       printf(
"\nUsage: ./filter [-nrates <n> -nf <n> -insize <n> -rconst -tim]\n-------\n");
   320       printf(
"\n\t-nrates [n]\tNumber of sampling rates (default 3)\n");
   321       printf(
"\n\t-nf [n]\t\tTotal number of filters (default 60)\n");
   322       printf(
"\n\t-rconst\t\tUse constant rate equal to input size (default no)\n");
   323       printf(
"\n\t-insize [n]\tSize of input block (default 1024)\n");
   325       printf(
"\n\t-tim\t\tMeasure execution time (default yes)\n\n");
 int rnumf[MAX_RATES]
number of filters for each sampling rate 
float * bk
filter coefficients array 
void filterCpuFir(float *out, float *in, filter_arrays *farr, int n, int nf)
float buf_in[MAX_FILTERS][B_SIZE+OFFSET]
CPU buffer. 
int nrates
how many sampling rates to process 
void compute_ref(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N)
int rconst
for nrates=1 keep the initial input size 
int nrates
number of input sampling rates to process 
int m_offb[MAX_FILTERS]
offset counter for CPU buffers 
int nf
total number of filters to process 
int insize
input size before resampling 
#define MAX_RATES
Maximum number of sampling rates. 
#define OFFSET
Offset for CPU filter input buffer. 
#define B_SIZE
filter length 
void read_command_line(int argc, char *argv[], cmd_args *args)
int osize[MAX_RATES]
total output size for each set of filters 
void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)