43 int m_offb = farr->
m_offb[nf];
47 for (
int s = 0; s < n; ++s) {
48 if (m_offb > 0) --m_offb;
50 for (
int i = m_sz - 2; i >= 0; --i) {
55 farr->
buf_in[nf][m_offb] = in[s];
58 for (
int i = 0; i < m_sz; ++i) {
59 b_sum += farr->
bk[nf*m_sz+i] * farr->
buf_in[nf][i + m_offb];
85 struct timeval t_start, t_end;
88 int numrates = gparams->
nrates;
91 for (
int r=1; r<numrates; r++){
92 sum+= gparams->
rnumf[r-1];
98 gettimeofday(&t_start, NULL);
101 for (
int ii=0; ii<N; ++ii){
103 for (
int i=0; i<gparams->
nrates; ++i){
104 oindex = ii* gparams->
rnumf[i]*gpuarrays->
osize[i];
106 for (
int f=0; f< gparams->
rnumf[i]; ++f){
114 gettimeofday(&t_end, NULL);
115 t_cpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
117 printf(
"\nFinished host single CPU FIR\nProcessing with single CPU took: %f ms\n", t_cpu/(
double)N);
139 int numrates = gparams->
nrates;
141 struct timeval t_start, t_end;
145 for (
int r=1; r<numrates; r++){
146 sum+= gparams->
rnumf[r-1];
151 int nthreads = omp_get_max_threads();
152 printf(
"max omp threads: %d\n", nthreads);
153 omp_set_num_threads(nthreads);
156 gettimeofday(&t_start, NULL);
158 for (
int ii=0; ii<N; ++ii){
159 for (
int i=0; i<numrates; i++){
160 int nfilters = gparams->
rnumf[i];
161 int o_sz = gpuarrays->
osize[i];
165 int fsize = gparams->
fsize;
167 #pragma omp parallel for firstprivate(nfilters, o_sz, fpos, ratecount, blockcount, fsize ) 169 for (
int f=0; f<nfilters; ++f){
171 filterCpuFir(&h_reference[ratecount][ blockcount*nfilters*o_sz + f*o_sz], &h_in[ratecount][blockcount*o_sz + fsize], farr, o_sz, fpos + f);
180 gettimeofday(&t_end, NULL);
181 t_mcpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
183 printf(
"Finished host multi core CPU FIR\nProcessing OPENMP took: %f ms\n", t_mcpu/(
double)N);
203 int numrates = gparams->
nrates;
206 for (
int b = 0; b < N; ++b) {
208 for (
int i = 0; i < numrates; ++i) {
209 oindex = b* gparams->
rnumf[i]*gpuarrays->
osize[i];
211 for (
int f = 0; f < gparams->
rnumf[i]; ++f) {
213 for (
int p = 0; p < gpuarrays->
osize[i]; ++p) {
214 diff = h_out[i][ oindex + f*gpuarrays->
osize[i] + p ] - h_reference[i][ oindex + f*gpuarrays->
osize[i] + p ];
216 ref += h_reference[i][ oindex + f*gpuarrays->
osize[i] + p] * h_reference[i][ oindex + f*gpuarrays->
osize[i] + p ];
223 float normref = sqrtf(ref);
224 float normerr = sqrtf(err);
225 err = normerr / normref;
227 printf(
"\nL2 error = %f\n\n",err);
236 static struct option long_options[] = {
239 {
"help", no_argument, NULL, 0 },
240 {
"nrates", required_argument, NULL, 1 },
241 {
"nf", required_argument, NULL, 2 },
242 {
"insize", required_argument, NULL, 3 },
243 {
"rconst", no_argument, NULL, 4 },
244 {
"tim", no_argument, NULL, 5 },
251 while ((opt = getopt_long_only(argc, argv,
"", long_options, &long_index )) != -1) {
259 args->
nrates = atoi(optarg);
263 args->
nf = atoi(optarg);
267 args->
insize = atoi(optarg);
283 if (optind < argc || optind == 1) {
286 printf(
"\n\nNo arguments given, run with default values? (y/n): ");
293 c = tolower (fgetc (stdin));
296 while (c !=
'\n' && c != EOF)
300 printf(
"\nRunning with default values\n");
304 printf(
"\nAborting.\n\n");
308 fputs (
"Please answer y or n: ", stdout);
319 printf(
"\nUsage: ./filter [-nrates <n> -nf <n> -insize <n> -rconst -tim]\n-------\n");
320 printf(
"\n\t-nrates [n]\tNumber of sampling rates (default 3)\n");
321 printf(
"\n\t-nf [n]\t\tTotal number of filters (default 60)\n");
322 printf(
"\n\t-rconst\t\tUse constant rate equal to input size (default no)\n");
323 printf(
"\n\t-insize [n]\tSize of input block (default 1024)\n");
325 printf(
"\n\t-tim\t\tMeasure execution time (default yes)\n\n");
int rnumf[MAX_RATES]
number of filters for each sampling rate
float * bk
filter coefficients array
void filterCpuFir(float *out, float *in, filter_arrays *farr, int n, int nf)
float buf_in[MAX_FILTERS][B_SIZE+OFFSET]
CPU buffer.
int nrates
how many sampling rates to process
void compute_ref(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N)
int rconst
for nrates=1 keep the initial input size
int nrates
number of input sampling rates to process
int m_offb[MAX_FILTERS]
offset counter for CPU buffers
int nf
total number of filters to process
int insize
input size before resampling
#define MAX_RATES
Maximum number of sampling rates.
#define OFFSET
Offset for CPU filter input buffer.
#define B_SIZE
filter length
void read_command_line(int argc, char *argv[], cmd_args *args)
int osize[MAX_RATES]
total output size for each set of filters
void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)