sofia@0
|
1 /*
|
sofia@0
|
2 GPU multi-rate FIR filter bank example software
|
sofia@0
|
3
|
sofia@0
|
4 Oxford e-Research Centre, Oxford University
|
sofia@0
|
5
|
sofia@0
|
6 Centre for Digital Music, Queen Mary, University of London.
|
sofia@0
|
7
|
sofia@0
|
8 This program is free software: you can redistribute it and/or modify
|
sofia@0
|
9 it under the terms of the GNU General Public License as published by
|
sofia@0
|
10 the Free Software Foundation, either version 3 of the License,
|
sofia@0
|
11 or (at your option) any later version.
|
sofia@0
|
12 See the file COPYING included with this distribution for more information.
|
sofia@0
|
13 */
|
sofia@0
|
14
|
sofia@0
|
15 #include <iostream>
|
sofia@0
|
16 #include <cstdio>
|
sofia@0
|
17 #include <cstdlib>
|
sofia@0
|
18 #include <cmath>
|
sofia@0
|
19
|
sofia@0
|
20 #include <getopt.h>
|
sofia@0
|
21 #include <sys/time.h>
|
sofia@0
|
22 #include <omp.h>
|
sofia@0
|
23
|
sofia@0
|
24 #include "filters.h"
|
sofia@0
|
25
|
sofia@0
|
26 ///////////////////////////////////////////////////////////////////////////////
|
sofia@0
|
27 // CPU routines
|
sofia@0
|
28 ///////////////////////////////////////////////////////////////////////////////
|
sofia@0
|
29
|
sofia@0
|
30 void filterCpuFir(float* out, float* in, filter_arrays *farr, int n, int nf)
|
sofia@0
|
31 /**
|
sofia@0
|
32 * Basic serial implementation of an FIR filter.
|
sofia@0
|
33 * Based on the Tipic Vamp plugin .
|
sofia@0
|
34
|
sofia@0
|
35 * @param out: the output array from a single filter
|
sofia@0
|
36 * @param in: the input array
|
sofia@0
|
37 * @param farr: structure containing filter object
|
sofia@0
|
38 * @param n: length of input/output
|
sofia@0
|
39 * @param nf: the number of the filter being processed
|
sofia@0
|
40 */
|
sofia@0
|
41 {
|
sofia@0
|
42 // Initialize variables and buffers
|
sofia@0
|
43 int m_offb = farr->m_offb[nf];
|
sofia@0
|
44 int m_sz = B_SIZE;
|
sofia@0
|
45
|
sofia@0
|
46 //main loop
|
sofia@0
|
47 for (int s = 0; s < n; ++s) {
|
sofia@0
|
48 if (m_offb > 0) --m_offb;
|
sofia@0
|
49 else {
|
sofia@0
|
50 for (int i = m_sz - 2; i >= 0; --i) {
|
sofia@0
|
51 farr->buf_in[nf][i + OFFSET + 1] = farr->buf_in[nf][i];
|
sofia@0
|
52 }
|
sofia@0
|
53 m_offb = OFFSET;
|
sofia@0
|
54 }
|
sofia@0
|
55 farr->buf_in[nf][m_offb] = in[s];
|
sofia@0
|
56
|
sofia@0
|
57 float b_sum = 0.0f;
|
sofia@0
|
58 for (int i = 0; i < m_sz; ++i) {
|
sofia@0
|
59 b_sum += farr->bk[nf*m_sz+i] * farr->buf_in[nf][i + m_offb];
|
sofia@0
|
60 }
|
sofia@0
|
61
|
sofia@0
|
62 out[s] = b_sum;
|
sofia@0
|
63 }
|
sofia@0
|
64 farr->m_offb[nf] = m_offb;
|
sofia@0
|
65
|
sofia@0
|
66 }
|
sofia@0
|
67
|
sofia@0
|
68
|
sofia@0
|
69 void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
|
sofia@0
|
70 {
|
sofia@0
|
71 /**
|
sofia@0
|
72 Execution of the multirate filter configuration serially.
|
sofia@0
|
73 * @param h_in: array containing all input blocks of the MR filter
|
sofia@0
|
74 * @param h_reference: array containing the MR filter output
|
sofia@0
|
75 * @param gpuarrays: structure containg the gpu arrays and parameters
|
sofia@0
|
76 * @param gparams: sructure containg parameters
|
sofia@0
|
77 * @param args: structure containing command line arguments
|
sofia@0
|
78 * @param farr: structure containg the filters
|
sofia@0
|
79 * @param N: the number of input blocks to be processed.
|
sofia@0
|
80 */
|
sofia@0
|
81
|
sofia@0
|
82 int oindex = 0;
|
sofia@0
|
83 int pos[MAX_RATES];
|
sofia@0
|
84 int sum = 0;
|
sofia@0
|
85 struct timeval t_start, t_end;
|
sofia@0
|
86 double t_cpu = 0.0;
|
sofia@0
|
87
|
sofia@0
|
88 int numrates = gparams->nrates;
|
sofia@0
|
89
|
sofia@0
|
90 pos[0] = 0;
|
sofia@0
|
91 for (int r=1; r<numrates; r++){
|
sofia@0
|
92 sum+= gparams->rnumf[r-1];
|
sofia@0
|
93 pos[r] = sum;
|
sofia@0
|
94 }
|
sofia@0
|
95
|
sofia@0
|
96 //start test
|
sofia@0
|
97 if (args->tim){
|
sofia@0
|
98 gettimeofday(&t_start, NULL);
|
sofia@0
|
99 }
|
sofia@0
|
100
|
sofia@0
|
101 for (int ii=0; ii<N; ++ii){ // loop through input blocks
|
sofia@0
|
102
|
sofia@0
|
103 for (int i=0; i<gparams->nrates; ++i){ //through number of resampled per block
|
sofia@0
|
104 oindex = ii* gparams->rnumf[i]*gpuarrays->osize[i];
|
sofia@0
|
105
|
sofia@0
|
106 for (int f=0; f< gparams->rnumf[i]; ++f){ // through filters per resampled
|
sofia@0
|
107 // run one process per filter serially
|
sofia@0
|
108 filterCpuFir(&h_reference[i][oindex + f*gpuarrays->osize[i]], &h_in[i][ii*gpuarrays->osize[i] + gparams->fsize ], farr, gpuarrays->osize[i], pos[i] + f);
|
sofia@0
|
109 }
|
sofia@0
|
110 }
|
sofia@0
|
111 }
|
sofia@0
|
112
|
sofia@0
|
113 if (args->tim){
|
sofia@0
|
114 gettimeofday(&t_end, NULL);
|
sofia@0
|
115 t_cpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
|
sofia@0
|
116
|
sofia@0
|
117 printf("\nFinished host single CPU FIR\nProcessing with single CPU took: %f ms\n", t_cpu/(double)N);
|
sofia@0
|
118
|
sofia@0
|
119 }
|
sofia@0
|
120 }
|
sofia@0
|
121
|
sofia@0
|
122
|
sofia@0
|
123 void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
|
sofia@0
|
124 /**
|
sofia@0
|
125 Execution of the multirate filter configuration with OpenMP.
|
sofia@0
|
126 * @param h_in: array containing all input blocks of the MR filter
|
sofia@0
|
127 * @param h_reference: array containing the MR filter output
|
sofia@0
|
128 * @param gpuarrays: structure containg the gpu arrays and parameters
|
sofia@0
|
129 * @param gparams: sructure containg parameters
|
sofia@0
|
130 * @param args: structure containing command line arguments
|
sofia@0
|
131 * @param farr: structure containg the filters
|
sofia@0
|
132 * @param N: the number of input blocks to be processed.
|
sofia@0
|
133 */
|
sofia@0
|
134 {
|
sofia@0
|
135 int pos[MAX_RATES];
|
sofia@0
|
136 int sum = 0;
|
sofia@0
|
137 int oindex = 0;
|
sofia@0
|
138 int tid;
|
sofia@0
|
139 int numrates = gparams->nrates;
|
sofia@0
|
140
|
sofia@0
|
141 struct timeval t_start, t_end;
|
sofia@0
|
142 double t_mcpu = 0.0;
|
sofia@0
|
143
|
sofia@0
|
144 pos[0] = 0;
|
sofia@0
|
145 for (int r=1; r<numrates; r++){
|
sofia@0
|
146 sum+= gparams->rnumf[r-1];
|
sofia@0
|
147 pos[r] = sum;
|
sofia@0
|
148 }
|
sofia@0
|
149
|
sofia@0
|
150 omp_set_dynamic(0);
|
sofia@0
|
151 int nthreads = omp_get_max_threads();
|
sofia@0
|
152 printf("max omp threads: %d\n", nthreads);
|
sofia@0
|
153 omp_set_num_threads(nthreads);
|
sofia@0
|
154
|
sofia@0
|
155 if (args->tim)
|
sofia@0
|
156 gettimeofday(&t_start, NULL);
|
sofia@0
|
157
|
sofia@0
|
158 for (int ii=0; ii<N; ++ii){ // loop through input blocks
|
sofia@0
|
159 for (int i=0; i<numrates; i++){
|
sofia@0
|
160 int nfilters = gparams->rnumf[i];
|
sofia@0
|
161 int o_sz = gpuarrays->osize[i];
|
sofia@0
|
162 int fpos = pos[i];
|
sofia@0
|
163 int ratecount = i;
|
sofia@0
|
164 int blockcount = ii;
|
sofia@0
|
165 int fsize = gparams->fsize;
|
sofia@0
|
166
|
sofia@0
|
167 #pragma omp parallel for firstprivate(nfilters, o_sz, fpos, ratecount, blockcount, fsize )
|
sofia@0
|
168
|
sofia@0
|
169 for (int f=0; f<nfilters; ++f){
|
sofia@0
|
170
|
sofia@0
|
171 filterCpuFir(&h_reference[ratecount][ blockcount*nfilters*o_sz + f*o_sz], &h_in[ratecount][blockcount*o_sz + fsize], farr, o_sz, fpos + f);
|
sofia@0
|
172
|
sofia@0
|
173
|
sofia@0
|
174 }
|
sofia@0
|
175 }
|
sofia@0
|
176
|
sofia@0
|
177 }
|
sofia@0
|
178
|
sofia@0
|
179 if (args->tim){
|
sofia@0
|
180 gettimeofday(&t_end, NULL);
|
sofia@0
|
181 t_mcpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
|
sofia@0
|
182
|
sofia@0
|
183 printf("Finished host multi core CPU FIR\nProcessing OPENMP took: %f ms\n", t_mcpu/(double)N);
|
sofia@0
|
184
|
sofia@0
|
185 }
|
sofia@0
|
186 }
|
sofia@0
|
187
|
sofia@0
|
188 void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N)
|
sofia@0
|
189 /**
|
sofia@0
|
190 * Check the relative error between CPU and GPU filter computation results
|
sofia@0
|
191
|
sofia@0
|
192 * @param h_in: array containing all input blocks of the MR filter
|
sofia@0
|
193 * @param h_reference: array containing the MR filter CPU output
|
sofia@0
|
194 * @param h_out: array containing the MR filter GPU output
|
sofia@0
|
195 * @param gpuarrays: structure containg the gpu arrays and parameters
|
sofia@0
|
196 * @param gparams: sructure containg parameters
|
sofia@0
|
197 * @param N: the number of input blocks to be processed.
|
sofia@0
|
198 */
|
sofia@0
|
199 {
|
sofia@0
|
200 float err=0.0;
|
sofia@0
|
201 float diff, ref;
|
sofia@0
|
202 int oindex = 0;
|
sofia@0
|
203 int numrates = gparams->nrates;
|
sofia@0
|
204
|
sofia@0
|
205 //this loop helps with debugging
|
sofia@0
|
206 for (int b = 0; b < N; ++b) { //loop through input blocks
|
sofia@0
|
207
|
sofia@0
|
208 for (int i = 0; i < numrates; ++i) { //loop through resampled
|
sofia@0
|
209 oindex = b* gparams->rnumf[i]*gpuarrays->osize[i];
|
sofia@0
|
210
|
sofia@0
|
211 for (int f = 0; f < gparams->rnumf[i]; ++f) { // and individual filters
|
sofia@0
|
212
|
sofia@0
|
213 for (int p = 0; p < gpuarrays->osize[i]; ++p) { //and points per filter per block
|
sofia@0
|
214 diff = h_out[i][ oindex + f*gpuarrays->osize[i] + p ] - h_reference[i][ oindex + f*gpuarrays->osize[i] + p ];
|
sofia@0
|
215 err += diff*diff;
|
sofia@0
|
216 ref += h_reference[i][ oindex + f*gpuarrays->osize[i] + p] * h_reference[i][ oindex + f*gpuarrays->osize[i] + p ];
|
sofia@0
|
217 }
|
sofia@0
|
218 }
|
sofia@0
|
219 }
|
sofia@0
|
220
|
sofia@0
|
221 }
|
sofia@0
|
222
|
sofia@0
|
223 float normref = sqrtf(ref);
|
sofia@0
|
224 float normerr = sqrtf(err);
|
sofia@0
|
225 err = normerr / normref;
|
sofia@0
|
226
|
sofia@0
|
227 printf("\nL2 error = %f\n\n",err);
|
sofia@0
|
228
|
sofia@0
|
229 }
|
sofia@0
|
230
|
sofia@0
|
231 void read_command_line(int argc, char *argv[], cmd_args *args)
|
sofia@0
|
232 /**
|
sofia@0
|
233 * Read arguments from command line
|
sofia@0
|
234 */
|
sofia@0
|
235 {
|
sofia@0
|
236 static struct option long_options[] = {
|
sofia@0
|
237 /* name has_arg flag val */
|
sofia@0
|
238 /* ------------------------------------------ */
|
sofia@0
|
239 {"help", no_argument, NULL, 0 },
|
sofia@0
|
240 {"nrates", required_argument, NULL, 1 },
|
sofia@0
|
241 {"nf", required_argument, NULL, 2 },
|
sofia@0
|
242 {"insize", required_argument, NULL, 3 },
|
sofia@0
|
243 {"rconst", no_argument, NULL, 4 },
|
sofia@0
|
244 {"tim", no_argument, NULL, 5 },
|
sofia@0
|
245 { 0, 0, 0, 0 }
|
sofia@0
|
246 };
|
sofia@0
|
247
|
sofia@0
|
248 int long_index =0;
|
sofia@0
|
249 int opt;
|
sofia@0
|
250
|
sofia@0
|
251 while ((opt = getopt_long_only(argc, argv,"", long_options, &long_index )) != -1) {
|
sofia@0
|
252 switch (opt) {
|
sofia@0
|
253
|
sofia@0
|
254 case 0:
|
sofia@0
|
255 print_usage();
|
sofia@0
|
256 exit(EXIT_FAILURE);
|
sofia@0
|
257
|
sofia@0
|
258 case 1 :
|
sofia@0
|
259 args->nrates = atoi(optarg);
|
sofia@0
|
260 break;
|
sofia@0
|
261
|
sofia@0
|
262 case 2 :
|
sofia@0
|
263 args->nf = atoi(optarg);
|
sofia@0
|
264 break;
|
sofia@0
|
265
|
sofia@0
|
266 case 3 :
|
sofia@0
|
267 args->insize = atoi(optarg);
|
sofia@0
|
268 break;
|
sofia@0
|
269
|
sofia@0
|
270 case 4 :
|
sofia@0
|
271 args->rconst = 1;
|
sofia@0
|
272 break;
|
sofia@0
|
273
|
sofia@0
|
274 case 5 :
|
sofia@0
|
275 args->tim = 1;
|
sofia@0
|
276 break;
|
sofia@0
|
277
|
sofia@0
|
278 default:
|
sofia@0
|
279 exit(EXIT_FAILURE);
|
sofia@0
|
280 }
|
sofia@0
|
281 }
|
sofia@0
|
282
|
sofia@0
|
283 if (optind < argc || optind == 1) {
|
sofia@0
|
284 print_usage();
|
sofia@0
|
285
|
sofia@0
|
286 printf("\n\nNo arguments given, run with default values? (y/n): ");
|
sofia@0
|
287 while (1)
|
sofia@0
|
288 {
|
sofia@0
|
289 int c, answer;
|
sofia@0
|
290
|
sofia@0
|
291 /* Read the first character of the line.
|
sofia@0
|
292 This should be the answer character, but might not be. */
|
sofia@0
|
293 c = tolower (fgetc (stdin));
|
sofia@0
|
294 answer = c;
|
sofia@0
|
295 /* Discard rest of input line. */
|
sofia@0
|
296 while (c != '\n' && c != EOF)
|
sofia@0
|
297 c = fgetc (stdin);
|
sofia@0
|
298 /* Obey the answer if it was valid. */
|
sofia@0
|
299 if (answer == 'y'){
|
sofia@0
|
300 printf("\nRunning with default values\n");
|
sofia@0
|
301 break;
|
sofia@0
|
302 }
|
sofia@0
|
303 if (answer == 'n'){
|
sofia@0
|
304 printf("\nAborting.\n\n");
|
sofia@0
|
305 exit(EXIT_SUCCESS);
|
sofia@0
|
306 }
|
sofia@0
|
307 /* Answer was invalid: ask for valid answer. */
|
sofia@0
|
308 fputs ("Please answer y or n: ", stdout);
|
sofia@0
|
309 }
|
sofia@0
|
310 }
|
sofia@0
|
311
|
sofia@0
|
312 }
|
sofia@0
|
313
|
sofia@0
|
314 void print_usage()
|
sofia@0
|
315 /**
|
sofia@0
|
316 * Print command line usage.
|
sofia@0
|
317 */
|
sofia@0
|
318 {
|
sofia@0
|
319 printf("\nUsage: ./filter [-nrates <n> -nf <n> -insize <n> -rconst -tim]\n-------\n");
|
sofia@0
|
320 printf("\n\t-nrates [n]\tNumber of sampling rates (default 3)\n");
|
sofia@0
|
321 printf("\n\t-nf [n]\t\tTotal number of filters (default 60)\n");
|
sofia@0
|
322 printf("\n\t-rconst\t\tUse constant rate equal to input size (default no)\n");
|
sofia@0
|
323 printf("\n\t-insize [n]\tSize of input block (default 1024)\n");
|
sofia@0
|
324
|
sofia@0
|
325 printf("\n\t-tim\t\tMeasure execution time (default yes)\n\n");
|
sofia@0
|
326 }
|