sofia@0: /* sofia@0: GPU multi-rate FIR filter bank example software sofia@0: sofia@0: Oxford e-Research Centre, Oxford University sofia@0: sofia@0: Centre for Digital Music, Queen Mary, University of London. sofia@0: sofia@0: This program is free software: you can redistribute it and/or modify sofia@0: it under the terms of the GNU General Public License as published by sofia@0: the Free Software Foundation, either version 3 of the License, sofia@0: or (at your option) any later version. sofia@0: See the file COPYING included with this distribution for more information. sofia@0: */ sofia@0: sofia@0: sofia@0: #ifndef FILTERS_H sofia@0: #define FILTERS_H sofia@0: sofia@0: #include sofia@0: sofia@0: // Filter parameters sofia@0: sofia@0: // use shared memory GPU kernel? sofia@0: // it appears to be faster and can be used with long filters sofia@0: #define CUDA_SHM 1 //!< Use shared memory GPU kernel sofia@0: sofia@0: #define SIZE_MUL 1 //!< Multiplier of 11 to derive filter length sofia@0: #define B_SIZE 11 * SIZE_MUL //!< filter length sofia@0: sofia@0: #define A_SIZE (B_SIZE - 1) //!< For IIR filter. not used currently sofia@0: sofia@0: #define OFFSET 20 //!< Offset for CPU filter input buffer sofia@0: sofia@0: #define MAX_RATES 10 //!< Maximum number of sampling rates sofia@0: #define MAX_FILTERS 1024 //!< Maximum number of filters sofia@0: sofia@0: //general params sofia@0: typedef struct{ sofia@0: int nfilters; //!< total number of filters sofia@0: int fsize; //!< filter size sofia@0: int nrates; //!< number of input sampling rates to process sofia@0: int rnumf[MAX_RATES]; //!< number of filters for each sampling rate sofia@0: int streams; //!< use streams? sofia@0: } params; sofia@0: sofia@0: //structs of arrays preferred to allow contiguous memory accesses. sofia@0: sofia@0: typedef struct{ sofia@0: float buf_in[MAX_FILTERS][B_SIZE + OFFSET]; //!< CPU buffer sofia@0: float buf_out[MAX_FILTERS][A_SIZE + OFFSET]; //!< for IIR filter sofia@0: int m_offb[MAX_FILTERS]; //!< offset counter for CPU buffers sofia@0: float *bk; //!< filter coefficients array sofia@0: } filter_arrays; sofia@0: sofia@0: typedef struct{ sofia@0: float *d_filters[MAX_RATES]; //!< filters to be stored on the GPU sofia@0: float *d_in[MAX_RATES]; //!< GPU input array for each sampling rate sofia@0: int isize[MAX_RATES]; //!< input size for each sampling rate sofia@0: float *d_out[MAX_RATES]; //!< GPU output array for each sampling rate sofia@0: int osize[MAX_RATES]; //!< total output size for each set of filters sofia@0: float *h_in[MAX_RATES]; //!< host input arrays for GPU transfer sofia@0: float *h_out[MAX_RATES]; //!< host output arrays for GPU transfer sofia@0: cudaStream_t stream[MAX_RATES]; //!< CUDA stream objects sofia@0: } gpu_arrays; sofia@0: sofia@0: // command line arguments sofia@0: typedef struct { sofia@0: int nrates; //!< how many sampling rates to process sofia@0: int nf; //!< total number of filters to process sofia@0: int insize; //!< input size before resampling sofia@0: int rconst; //!< for nrates=1 keep the initial input size sofia@0: int tim; //!< time process sofia@0: }cmd_args; sofia@0: sofia@0: static int rdiv[MAX_RATES] = {2, 10, 50, 4, 16, 32, 8, 24, 36, 42}; //!