annotate filters.h @ 1:ae0773634e0b tip

project report.
author Sofia Dimoudi <sofia.dimoudi@gmail.com>
date Fri, 16 Sep 2016 15:49:19 +0100
parents 2b63f74a3010
children
rev   line source
sofia@0 1 /*
sofia@0 2 GPU multi-rate FIR filter bank example software
sofia@0 3
sofia@0 4 Oxford e-Research Centre, Oxford University
sofia@0 5
sofia@0 6 Centre for Digital Music, Queen Mary, University of London.
sofia@0 7
sofia@0 8 This program is free software: you can redistribute it and/or modify
sofia@0 9 it under the terms of the GNU General Public License as published by
sofia@0 10 the Free Software Foundation, either version 3 of the License,
sofia@0 11 or (at your option) any later version.
sofia@0 12 See the file COPYING included with this distribution for more information.
sofia@0 13 */
sofia@0 14
sofia@0 15
sofia@0 16 #ifndef FILTERS_H
sofia@0 17 #define FILTERS_H
sofia@0 18
sofia@0 19 #include <cuda_runtime.h>
sofia@0 20
sofia@0 21 // Filter parameters
sofia@0 22
sofia@0 23 // use shared memory GPU kernel?
sofia@0 24 // it appears to be faster and can be used with long filters
sofia@0 25 #define CUDA_SHM 1 //!< Use shared memory GPU kernel
sofia@0 26
sofia@0 27 #define SIZE_MUL 1 //!< Multiplier of 11 to derive filter length
sofia@0 28 #define B_SIZE 11 * SIZE_MUL //!< filter length
sofia@0 29
sofia@0 30 #define A_SIZE (B_SIZE - 1) //!< For IIR filter. not used currently
sofia@0 31
sofia@0 32 #define OFFSET 20 //!< Offset for CPU filter input buffer
sofia@0 33
sofia@0 34 #define MAX_RATES 10 //!< Maximum number of sampling rates
sofia@0 35 #define MAX_FILTERS 1024 //!< Maximum number of filters
sofia@0 36
sofia@0 37 //general params
sofia@0 38 typedef struct{
sofia@0 39 int nfilters; //!< total number of filters
sofia@0 40 int fsize; //!< filter size
sofia@0 41 int nrates; //!< number of input sampling rates to process
sofia@0 42 int rnumf[MAX_RATES]; //!< number of filters for each sampling rate
sofia@0 43 int streams; //!< use streams?
sofia@0 44 } params;
sofia@0 45
sofia@0 46 //structs of arrays preferred to allow contiguous memory accesses.
sofia@0 47
sofia@0 48 typedef struct{
sofia@0 49 float buf_in[MAX_FILTERS][B_SIZE + OFFSET]; //!< CPU buffer
sofia@0 50 float buf_out[MAX_FILTERS][A_SIZE + OFFSET]; //!< for IIR filter
sofia@0 51 int m_offb[MAX_FILTERS]; //!< offset counter for CPU buffers
sofia@0 52 float *bk; //!< filter coefficients array
sofia@0 53 } filter_arrays;
sofia@0 54
sofia@0 55 typedef struct{
sofia@0 56 float *d_filters[MAX_RATES]; //!< filters to be stored on the GPU
sofia@0 57 float *d_in[MAX_RATES]; //!< GPU input array for each sampling rate
sofia@0 58 int isize[MAX_RATES]; //!< input size for each sampling rate
sofia@0 59 float *d_out[MAX_RATES]; //!< GPU output array for each sampling rate
sofia@0 60 int osize[MAX_RATES]; //!< total output size for each set of filters
sofia@0 61 float *h_in[MAX_RATES]; //!< host input arrays for GPU transfer
sofia@0 62 float *h_out[MAX_RATES]; //!< host output arrays for GPU transfer
sofia@0 63 cudaStream_t stream[MAX_RATES]; //!< CUDA stream objects
sofia@0 64 } gpu_arrays;
sofia@0 65
sofia@0 66 // command line arguments
sofia@0 67 typedef struct {
sofia@0 68 int nrates; //!< how many sampling rates to process
sofia@0 69 int nf; //!< total number of filters to process
sofia@0 70 int insize; //!< input size before resampling
sofia@0 71 int rconst; //!< for nrates=1 keep the initial input size
sofia@0 72 int tim; //!< time process
sofia@0 73 }cmd_args;
sofia@0 74
sofia@0 75 static int rdiv[MAX_RATES] = {2, 10, 50, 4, 16, 32, 8, 24, 36, 42}; //!<Decimation factors for multiple input rates
sofia@0 76
sofia@0 77 // host cuda prototypes
sofia@0 78 void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N);
sofia@0 79
sofia@0 80 void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N);
sofia@0 81
sofia@0 82 void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N);
sofia@0 83
sofia@0 84 void read_command_line(int argc, char *argv[], cmd_args *args);
sofia@0 85
sofia@0 86 void print_usage();
sofia@0 87
sofia@0 88 #ifdef __cplusplus
sofia@0 89
sofia@0 90 extern "C" {
sofia@0 91
sofia@0 92 void cudaMultiFilterFirStreams(gpu_arrays *arrays, params *params);
sofia@0 93
sofia@0 94 void cudaMultiFilterFirInit(float **in, float **out, float *filters, params *params, gpu_arrays *arrays);
sofia@0 95
sofia@0 96 void cudaMultiFilterFirClose(params *params, gpu_arrays *arrays);
sofia@0 97 }
sofia@0 98
sofia@0 99 #endif
sofia@0 100
sofia@0 101 #endif // FILTERS_H