Mercurial > hg > gpufilter
view filters.h @ 1:ae0773634e0b tip
project report.
author | Sofia Dimoudi <sofia.dimoudi@gmail.com> |
---|---|
date | Fri, 16 Sep 2016 15:49:19 +0100 |
parents | 2b63f74a3010 |
children |
line wrap: on
line source
/* GPU multi-rate FIR filter bank example software Oxford e-Research Centre, Oxford University Centre for Digital Music, Queen Mary, University of London. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #ifndef FILTERS_H #define FILTERS_H #include <cuda_runtime.h> // Filter parameters // use shared memory GPU kernel? // it appears to be faster and can be used with long filters #define CUDA_SHM 1 //!< Use shared memory GPU kernel #define SIZE_MUL 1 //!< Multiplier of 11 to derive filter length #define B_SIZE 11 * SIZE_MUL //!< filter length #define A_SIZE (B_SIZE - 1) //!< For IIR filter. not used currently #define OFFSET 20 //!< Offset for CPU filter input buffer #define MAX_RATES 10 //!< Maximum number of sampling rates #define MAX_FILTERS 1024 //!< Maximum number of filters //general params typedef struct{ int nfilters; //!< total number of filters int fsize; //!< filter size int nrates; //!< number of input sampling rates to process int rnumf[MAX_RATES]; //!< number of filters for each sampling rate int streams; //!< use streams? } params; //structs of arrays preferred to allow contiguous memory accesses. typedef struct{ float buf_in[MAX_FILTERS][B_SIZE + OFFSET]; //!< CPU buffer float buf_out[MAX_FILTERS][A_SIZE + OFFSET]; //!< for IIR filter int m_offb[MAX_FILTERS]; //!< offset counter for CPU buffers float *bk; //!< filter coefficients array } filter_arrays; typedef struct{ float *d_filters[MAX_RATES]; //!< filters to be stored on the GPU float *d_in[MAX_RATES]; //!< GPU input array for each sampling rate int isize[MAX_RATES]; //!< input size for each sampling rate float *d_out[MAX_RATES]; //!< GPU output array for each sampling rate int osize[MAX_RATES]; //!< total output size for each set of filters float *h_in[MAX_RATES]; //!< host input arrays for GPU transfer float *h_out[MAX_RATES]; //!< host output arrays for GPU transfer cudaStream_t stream[MAX_RATES]; //!< CUDA stream objects } gpu_arrays; // command line arguments typedef struct { int nrates; //!< how many sampling rates to process int nf; //!< total number of filters to process int insize; //!< input size before resampling int rconst; //!< for nrates=1 keep the initial input size int tim; //!< time process }cmd_args; static int rdiv[MAX_RATES] = {2, 10, 50, 4, 16, 32, 8, 24, 36, 42}; //!<Decimation factors for multiple input rates // host cuda prototypes void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N); void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N); void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N); void read_command_line(int argc, char *argv[], cmd_args *args); void print_usage(); #ifdef __cplusplus extern "C" { void cudaMultiFilterFirStreams(gpu_arrays *arrays, params *params); void cudaMultiFilterFirInit(float **in, float **out, float *filters, params *params, gpu_arrays *arrays); void cudaMultiFilterFirClose(params *params, gpu_arrays *arrays); } #endif #endif // FILTERS_H