view filters.h @ 1:ae0773634e0b tip

project report.
author Sofia Dimoudi <sofia.dimoudi@gmail.com>
date Fri, 16 Sep 2016 15:49:19 +0100
parents 2b63f74a3010
children
line wrap: on
line source
/*
  GPU multi-rate FIR filter bank example software

  Oxford e-Research Centre, Oxford University

  Centre for Digital Music, Queen Mary, University of London.

  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, 
  or (at your option) any later version.
  See the file COPYING included with this distribution for more information.
*/


#ifndef FILTERS_H
#define FILTERS_H

#include <cuda_runtime.h> 

// Filter parameters

// use shared memory GPU kernel? 
// it appears to be faster and can be used with long filters 
#define CUDA_SHM 1 //!< Use shared memory GPU kernel

#define SIZE_MUL 1 //!< Multiplier of 11 to derive filter length
#define B_SIZE 11 * SIZE_MUL //!< filter length

#define A_SIZE (B_SIZE - 1) //!< For IIR filter. not used currently

#define OFFSET 20 //!< Offset for CPU filter input buffer

#define MAX_RATES 10 //!< Maximum number of sampling rates
#define MAX_FILTERS 1024 //!< Maximum number of filters
 
//general params
typedef struct{
  int nfilters; //!< total number of filters
  int fsize; //!< filter size
  int nrates; //!<  number of input sampling rates to process
  int rnumf[MAX_RATES]; //!<  number of filters for each sampling rate
  int streams; //!<  use streams?
} params;

//structs of arrays preferred to allow contiguous memory accesses.

typedef struct{
  float buf_in[MAX_FILTERS][B_SIZE + OFFSET]; //!< CPU buffer
  float buf_out[MAX_FILTERS][A_SIZE + OFFSET]; //!<  for IIR filter
  int m_offb[MAX_FILTERS]; //!<  offset counter for CPU buffers
  float *bk; //!< filter coefficients array
 } filter_arrays;

typedef struct{
  float *d_filters[MAX_RATES]; //!<  filters to be stored on the GPU
  float *d_in[MAX_RATES]; //!<  GPU input array for each sampling rate
  int isize[MAX_RATES]; //!<  input size for each sampling rate
  float *d_out[MAX_RATES]; //!<  GPU output array for each sampling rate
  int osize[MAX_RATES]; //!< total output size for each set of filters 
  float *h_in[MAX_RATES]; //!<  host input arrays for GPU transfer
  float *h_out[MAX_RATES]; //!<  host output arrays for GPU transfer
  cudaStream_t stream[MAX_RATES]; //!<  CUDA stream objects
} gpu_arrays;

// command line arguments
typedef struct {
  int nrates; //!<  how many sampling rates to process
  int nf; //!<  total number of filters to process
  int insize; //!<  input size before resampling
  int rconst; //!<  for nrates=1 keep the initial input size
  int tim; //!< time process
}cmd_args;

static  int rdiv[MAX_RATES] = {2, 10, 50, 4, 16, 32, 8, 24, 36, 42}; //!<Decimation factors for multiple input rates

// host cuda prototypes
void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr,  int N);

void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N);

void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N);

void read_command_line(int argc, char *argv[], cmd_args *args);

void print_usage();

#ifdef __cplusplus

extern "C" {

  void cudaMultiFilterFirStreams(gpu_arrays *arrays, params *params);

  void cudaMultiFilterFirInit(float **in, float **out, float *filters, params *params, gpu_arrays *arrays);

  void cudaMultiFilterFirClose(params *params, gpu_arrays *arrays);
}

#endif

#endif // FILTERS_H