sofia@0: /*
sofia@0:   GPU multi-rate FIR filter bank example software
sofia@0: 
sofia@0:   Oxford e-Research Centre, Oxford University
sofia@0: 
sofia@0:   Centre for Digital Music, Queen Mary, University of London.
sofia@0: 
sofia@0:   This program is free software: you can redistribute it and/or modify
sofia@0:   it under the terms of the GNU General Public License as published by
sofia@0:   the Free Software Foundation, either version 3 of the License, 
sofia@0:   or (at your option) any later version.
sofia@0:   See the file COPYING included with this distribution for more information.
sofia@0: */
sofia@0: 
sofia@0: 
sofia@0: #ifndef FILTERS_H
sofia@0: #define FILTERS_H
sofia@0: 
sofia@0: #include <cuda_runtime.h> 
sofia@0: 
sofia@0: // Filter parameters
sofia@0: 
sofia@0: // use shared memory GPU kernel? 
sofia@0: // it appears to be faster and can be used with long filters 
sofia@0: #define CUDA_SHM 1 //!< Use shared memory GPU kernel
sofia@0: 
sofia@0: #define SIZE_MUL 1 //!< Multiplier of 11 to derive filter length
sofia@0: #define B_SIZE 11 * SIZE_MUL //!< filter length
sofia@0: 
sofia@0: #define A_SIZE (B_SIZE - 1) //!< For IIR filter. not used currently
sofia@0: 
sofia@0: #define OFFSET 20 //!< Offset for CPU filter input buffer
sofia@0: 
sofia@0: #define MAX_RATES 10 //!< Maximum number of sampling rates
sofia@0: #define MAX_FILTERS 1024 //!< Maximum number of filters
sofia@0:  
sofia@0: //general params
sofia@0: typedef struct{
sofia@0:   int nfilters; //!< total number of filters
sofia@0:   int fsize; //!< filter size
sofia@0:   int nrates; //!<  number of input sampling rates to process
sofia@0:   int rnumf[MAX_RATES]; //!<  number of filters for each sampling rate
sofia@0:   int streams; //!<  use streams?
sofia@0: } params;
sofia@0: 
sofia@0: //structs of arrays preferred to allow contiguous memory accesses.
sofia@0: 
sofia@0: typedef struct{
sofia@0:   float buf_in[MAX_FILTERS][B_SIZE + OFFSET]; //!< CPU buffer
sofia@0:   float buf_out[MAX_FILTERS][A_SIZE + OFFSET]; //!<  for IIR filter
sofia@0:   int m_offb[MAX_FILTERS]; //!<  offset counter for CPU buffers
sofia@0:   float *bk; //!< filter coefficients array
sofia@0:  } filter_arrays;
sofia@0: 
sofia@0: typedef struct{
sofia@0:   float *d_filters[MAX_RATES]; //!<  filters to be stored on the GPU
sofia@0:   float *d_in[MAX_RATES]; //!<  GPU input array for each sampling rate
sofia@0:   int isize[MAX_RATES]; //!<  input size for each sampling rate
sofia@0:   float *d_out[MAX_RATES]; //!<  GPU output array for each sampling rate
sofia@0:   int osize[MAX_RATES]; //!< total output size for each set of filters 
sofia@0:   float *h_in[MAX_RATES]; //!<  host input arrays for GPU transfer
sofia@0:   float *h_out[MAX_RATES]; //!<  host output arrays for GPU transfer
sofia@0:   cudaStream_t stream[MAX_RATES]; //!<  CUDA stream objects
sofia@0: } gpu_arrays;
sofia@0: 
sofia@0: // command line arguments
sofia@0: typedef struct {
sofia@0:   int nrates; //!<  how many sampling rates to process
sofia@0:   int nf; //!<  total number of filters to process
sofia@0:   int insize; //!<  input size before resampling
sofia@0:   int rconst; //!<  for nrates=1 keep the initial input size
sofia@0:   int tim; //!< time process
sofia@0: }cmd_args;
sofia@0: 
sofia@0: static  int rdiv[MAX_RATES] = {2, 10, 50, 4, 16, 32, 8, 24, 36, 42}; //!<Decimation factors for multiple input rates
sofia@0: 
sofia@0: // host cuda prototypes
sofia@0: void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr,  int N);
sofia@0: 
sofia@0: void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N);
sofia@0: 
sofia@0: void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N);
sofia@0: 
sofia@0: void read_command_line(int argc, char *argv[], cmd_args *args);
sofia@0: 
sofia@0: void print_usage();
sofia@0: 
sofia@0: #ifdef __cplusplus
sofia@0: 
sofia@0: extern "C" {
sofia@0: 
sofia@0:   void cudaMultiFilterFirStreams(gpu_arrays *arrays, params *params);
sofia@0: 
sofia@0:   void cudaMultiFilterFirInit(float **in, float **out, float *filters, params *params, gpu_arrays *arrays);
sofia@0: 
sofia@0:   void cudaMultiFilterFirClose(params *params, gpu_arrays *arrays);
sofia@0: }
sofia@0: 
sofia@0: #endif
sofia@0: 
sofia@0: #endif // FILTERS_H