view filters_host.cpp @ 0:2b63f74a3010

First commit
author Sofia Dimoudi <sofia.dimoudi@gmail.com>
date Fri, 16 Sep 2016 15:38:38 +0100
parents
children
line wrap: on
line source
/*
  GPU multi-rate FIR filter bank example software

  Oxford e-Research Centre, Oxford University

  Centre for Digital Music, Queen Mary, University of London.

  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, 
  or (at your option) any later version.
  See the file COPYING included with this distribution for more information.
*/

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>

#include <getopt.h>
#include <sys/time.h>
#include <omp.h>

#include "filters.h"

///////////////////////////////////////////////////////////////////////////////
// CPU routines
///////////////////////////////////////////////////////////////////////////////

void filterCpuFir(float* out, float* in, filter_arrays *farr, int n, int nf)
/**
 * Basic serial implementation of an FIR filter. 
 * Based on the Tipic Vamp plugin .

 * @param out: the output array from a single filter
 * @param in: the input array
 * @param farr: structure containing filter object
 * @param n: length of input/output 
 * @param nf: the number of the filter being processed 
*/
{
  // Initialize variables and buffers
  int m_offb = farr->m_offb[nf];
  int m_sz = B_SIZE;

  //main loop
  for (int s = 0; s < n; ++s) {
    if (m_offb > 0) --m_offb;
    else {
      for (int i = m_sz - 2; i >= 0; --i) {
	 farr->buf_in[nf][i + OFFSET + 1] =  farr->buf_in[nf][i];
      }
      m_offb = OFFSET;
    }
    farr->buf_in[nf][m_offb] = in[s];

    float b_sum = 0.0f;
    for (int i = 0; i < m_sz; ++i) {
            b_sum += farr->bk[nf*m_sz+i] * farr->buf_in[nf][i + m_offb];
    }

    out[s] = b_sum;
  }
  farr->m_offb[nf] = m_offb;

}


void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
{
  /**
     Execution of the multirate filter configuration serially.
     * @param h_in: array containing all input blocks of the MR filter 
     * @param h_reference: array containing the MR filter output
     * @param gpuarrays: structure containg the gpu arrays and parameters
     * @param gparams: sructure containg parameters
     * @param args: structure containing command line arguments
     * @param farr: structure containg the filters
     * @param N: the number of input blocks to be processed.
  */

  int oindex = 0;
  int pos[MAX_RATES];
  int sum = 0;
  struct timeval t_start, t_end;
  double t_cpu = 0.0;

  int numrates = gparams->nrates;

  pos[0] = 0;
  for (int r=1; r<numrates; r++){
    sum+= gparams->rnumf[r-1];
    pos[r] = sum;
  }

    //start test  
  if (args->tim){    
    gettimeofday(&t_start, NULL);
  }

  for (int ii=0; ii<N; ++ii){ // loop through input blocks
 
    for (int i=0; i<gparams->nrates; ++i){  //through number of resampled per block
      oindex = ii* gparams->rnumf[i]*gpuarrays->osize[i];
      
      for (int f=0; f< gparams->rnumf[i]; ++f){ // through filters per resampled
	// run one process per filter serially
	filterCpuFir(&h_reference[i][oindex + f*gpuarrays->osize[i]], &h_in[i][ii*gpuarrays->osize[i] + gparams->fsize ], farr, gpuarrays->osize[i], pos[i] + f); 
      }
    }
  }

  if (args->tim){
    gettimeofday(&t_end, NULL); 
    t_cpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0)  - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0; 
    
    printf("\nFinished host single CPU FIR\nProcessing with single CPU took: %f ms\n", t_cpu/(double)N);

  }
}


void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
  /**
     Execution of the multirate filter configuration with OpenMP.
     * @param h_in: array containing all input blocks of the MR filter 
     * @param h_reference: array containing the MR filter output
     * @param gpuarrays: structure containg the gpu arrays and parameters
     * @param gparams: sructure containg parameters
     * @param args: structure containing command line arguments
     * @param farr: structure containg the filters
     * @param N: the number of input blocks to be processed.
  */
{
  int pos[MAX_RATES];
  int sum = 0;
  int oindex = 0;
  int tid;
  int numrates = gparams->nrates;

  struct timeval t_start, t_end;
  double t_mcpu = 0.0;

  pos[0] = 0;
  for (int r=1; r<numrates; r++){
    sum+= gparams->rnumf[r-1];
    pos[r] = sum;
  }

  omp_set_dynamic(0);
  int nthreads =  omp_get_max_threads();
  printf("max omp threads: %d\n", nthreads);
  omp_set_num_threads(nthreads);

  if (args->tim)
    gettimeofday(&t_start, NULL);

  for (int ii=0; ii<N; ++ii){ // loop through input blocks
    for (int i=0; i<numrates; i++){  
      int nfilters =  gparams->rnumf[i];
      int o_sz = gpuarrays->osize[i];
      int fpos = pos[i];
      int ratecount = i;
      int blockcount = ii;      
      int fsize = gparams->fsize;

#pragma omp parallel  for  firstprivate(nfilters, o_sz, fpos, ratecount, blockcount, fsize ) 

      for (int f=0; f<nfilters; ++f){ 
	
	filterCpuFir(&h_reference[ratecount][ blockcount*nfilters*o_sz + f*o_sz], &h_in[ratecount][blockcount*o_sz + fsize], farr, o_sz, fpos + f); 
	  
	
      }
    }
    
  }
 
  if (args->tim){
    gettimeofday(&t_end, NULL); 
    t_mcpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0)  - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0; 
    
    printf("Finished host multi core CPU FIR\nProcessing OPENMP took: %f ms\n", t_mcpu/(double)N);

  }
}

void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N)
/**
 * Check the relative error between CPU and GPU filter computation results
 
 * @param h_in: array containing all input blocks of the MR filter 
 * @param h_reference: array containing the MR filter CPU output
 * @param h_out: array containing the MR filter GPU output
 * @param gpuarrays: structure containg the gpu arrays and parameters
 * @param gparams: sructure containg parameters
 * @param N: the number of input blocks to be processed.
 */
{
  float err=0.0;
  float diff, ref;
  int oindex = 0;
  int numrates = gparams->nrates;

  //this loop helps with debugging
  for (int b = 0; b < N; ++b) { //loop through input blocks

    for (int i = 0; i < numrates; ++i) { //loop through resampled
      oindex = b* gparams->rnumf[i]*gpuarrays->osize[i];

      for (int f = 0; f <  gparams->rnumf[i]; ++f) { // and individual filters 

  	for (int p = 0; p < gpuarrays->osize[i]; ++p) { //and points per filter per block
  	  diff = h_out[i][ oindex + f*gpuarrays->osize[i] + p ] - h_reference[i][ oindex +  f*gpuarrays->osize[i] + p ];
  	  err += diff*diff;   
  	  ref += h_reference[i][ oindex + f*gpuarrays->osize[i] + p] * h_reference[i][ oindex + f*gpuarrays->osize[i] + p ];
  	}
      }
    }
   
  }
  
  float normref = sqrtf(ref);
  float normerr = sqrtf(err);
  err = normerr / normref;
  
  printf("\nL2 error  = %f\n\n",err);
  
}

void read_command_line(int argc, char *argv[], cmd_args *args)
/**
  * Read arguments from command line
  */
{
  static struct option long_options[] = {
    /* name         has_arg      flag   val */
    /* ------------------------------------------ */
    {"help", no_argument,   NULL,   0 },
    {"nrates", required_argument,   NULL,   1 },
    {"nf", required_argument,   NULL,   2 },
    {"insize", required_argument,   NULL,   3 },
    {"rconst", no_argument,   NULL,   4 },
    {"tim", no_argument,   NULL,   5 },
    {   0,            0,           0,    0  }
  };

  int long_index =0;
  int opt;

  while ((opt = getopt_long_only(argc, argv,"", long_options, &long_index )) != -1) {
    switch (opt) {

    case 0:
      print_usage();
      exit(EXIT_FAILURE);

    case 1 : 
      args->nrates = atoi(optarg); 
      break;

    case 2 :
      args->nf = atoi(optarg); 
      break;

    case 3 :
      args->insize = atoi(optarg); 
      break;
 
    case 4 :
      args->rconst = 1;
      break;

    case 5 :
      args->tim = 1;
      break;

    default:
      exit(EXIT_FAILURE);
    }
  }

  if (optind < argc || optind == 1) {
    print_usage(); 

    printf("\n\nNo arguments given, run with default values? (y/n): ");
    while (1)
      {
	int c, answer;

	/* Read the first character of the line.
	   This should be the answer character, but might not be. */
	c = tolower (fgetc (stdin));
	answer = c;
	/* Discard rest of input line. */
	while (c != '\n' && c != EOF)
	  c = fgetc (stdin);
	/* Obey the answer if it was valid. */
	if (answer == 'y'){
	  printf("\nRunning with default values\n");
	  break;
	}
	if (answer == 'n'){
	  printf("\nAborting.\n\n");
	  exit(EXIT_SUCCESS);
	}
	/* Answer was invalid: ask for valid answer. */
	fputs ("Please answer y or n: ", stdout);
      }
  }

}

void print_usage()
/**
 * Print command line usage.
 */
{
      printf("\nUsage: ./filter [-nrates <n> -nf <n> -insize <n> -rconst -tim]\n-------\n");
      printf("\n\t-nrates [n]\tNumber of sampling rates (default 3)\n");
      printf("\n\t-nf [n]\t\tTotal number of filters (default 60)\n");
      printf("\n\t-rconst\t\tUse constant rate equal to input size (default no)\n");
      printf("\n\t-insize [n]\tSize of input block (default 1024)\n");

      printf("\n\t-tim\t\tMeasure execution time (default yes)\n\n");
}