Mercurial > hg > gpufilter
comparison filters_host.cpp @ 0:2b63f74a3010
First commit
author | Sofia Dimoudi <sofia.dimoudi@gmail.com> |
---|---|
date | Fri, 16 Sep 2016 15:38:38 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2b63f74a3010 |
---|---|
1 /* | |
2 GPU multi-rate FIR filter bank example software | |
3 | |
4 Oxford e-Research Centre, Oxford University | |
5 | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 | |
8 This program is free software: you can redistribute it and/or modify | |
9 it under the terms of the GNU General Public License as published by | |
10 the Free Software Foundation, either version 3 of the License, | |
11 or (at your option) any later version. | |
12 See the file COPYING included with this distribution for more information. | |
13 */ | |
14 | |
15 #include <iostream> | |
16 #include <cstdio> | |
17 #include <cstdlib> | |
18 #include <cmath> | |
19 | |
20 #include <getopt.h> | |
21 #include <sys/time.h> | |
22 #include <omp.h> | |
23 | |
24 #include "filters.h" | |
25 | |
26 /////////////////////////////////////////////////////////////////////////////// | |
27 // CPU routines | |
28 /////////////////////////////////////////////////////////////////////////////// | |
29 | |
30 void filterCpuFir(float* out, float* in, filter_arrays *farr, int n, int nf) | |
31 /** | |
32 * Basic serial implementation of an FIR filter. | |
33 * Based on the Tipic Vamp plugin . | |
34 | |
35 * @param out: the output array from a single filter | |
36 * @param in: the input array | |
37 * @param farr: structure containing filter object | |
38 * @param n: length of input/output | |
39 * @param nf: the number of the filter being processed | |
40 */ | |
41 { | |
42 // Initialize variables and buffers | |
43 int m_offb = farr->m_offb[nf]; | |
44 int m_sz = B_SIZE; | |
45 | |
46 //main loop | |
47 for (int s = 0; s < n; ++s) { | |
48 if (m_offb > 0) --m_offb; | |
49 else { | |
50 for (int i = m_sz - 2; i >= 0; --i) { | |
51 farr->buf_in[nf][i + OFFSET + 1] = farr->buf_in[nf][i]; | |
52 } | |
53 m_offb = OFFSET; | |
54 } | |
55 farr->buf_in[nf][m_offb] = in[s]; | |
56 | |
57 float b_sum = 0.0f; | |
58 for (int i = 0; i < m_sz; ++i) { | |
59 b_sum += farr->bk[nf*m_sz+i] * farr->buf_in[nf][i + m_offb]; | |
60 } | |
61 | |
62 out[s] = b_sum; | |
63 } | |
64 farr->m_offb[nf] = m_offb; | |
65 | |
66 } | |
67 | |
68 | |
69 void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N) | |
70 { | |
71 /** | |
72 Execution of the multirate filter configuration serially. | |
73 * @param h_in: array containing all input blocks of the MR filter | |
74 * @param h_reference: array containing the MR filter output | |
75 * @param gpuarrays: structure containg the gpu arrays and parameters | |
76 * @param gparams: sructure containg parameters | |
77 * @param args: structure containing command line arguments | |
78 * @param farr: structure containg the filters | |
79 * @param N: the number of input blocks to be processed. | |
80 */ | |
81 | |
82 int oindex = 0; | |
83 int pos[MAX_RATES]; | |
84 int sum = 0; | |
85 struct timeval t_start, t_end; | |
86 double t_cpu = 0.0; | |
87 | |
88 int numrates = gparams->nrates; | |
89 | |
90 pos[0] = 0; | |
91 for (int r=1; r<numrates; r++){ | |
92 sum+= gparams->rnumf[r-1]; | |
93 pos[r] = sum; | |
94 } | |
95 | |
96 //start test | |
97 if (args->tim){ | |
98 gettimeofday(&t_start, NULL); | |
99 } | |
100 | |
101 for (int ii=0; ii<N; ++ii){ // loop through input blocks | |
102 | |
103 for (int i=0; i<gparams->nrates; ++i){ //through number of resampled per block | |
104 oindex = ii* gparams->rnumf[i]*gpuarrays->osize[i]; | |
105 | |
106 for (int f=0; f< gparams->rnumf[i]; ++f){ // through filters per resampled | |
107 // run one process per filter serially | |
108 filterCpuFir(&h_reference[i][oindex + f*gpuarrays->osize[i]], &h_in[i][ii*gpuarrays->osize[i] + gparams->fsize ], farr, gpuarrays->osize[i], pos[i] + f); | |
109 } | |
110 } | |
111 } | |
112 | |
113 if (args->tim){ | |
114 gettimeofday(&t_end, NULL); | |
115 t_cpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0; | |
116 | |
117 printf("\nFinished host single CPU FIR\nProcessing with single CPU took: %f ms\n", t_cpu/(double)N); | |
118 | |
119 } | |
120 } | |
121 | |
122 | |
123 void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N) | |
124 /** | |
125 Execution of the multirate filter configuration with OpenMP. | |
126 * @param h_in: array containing all input blocks of the MR filter | |
127 * @param h_reference: array containing the MR filter output | |
128 * @param gpuarrays: structure containg the gpu arrays and parameters | |
129 * @param gparams: sructure containg parameters | |
130 * @param args: structure containing command line arguments | |
131 * @param farr: structure containg the filters | |
132 * @param N: the number of input blocks to be processed. | |
133 */ | |
134 { | |
135 int pos[MAX_RATES]; | |
136 int sum = 0; | |
137 int oindex = 0; | |
138 int tid; | |
139 int numrates = gparams->nrates; | |
140 | |
141 struct timeval t_start, t_end; | |
142 double t_mcpu = 0.0; | |
143 | |
144 pos[0] = 0; | |
145 for (int r=1; r<numrates; r++){ | |
146 sum+= gparams->rnumf[r-1]; | |
147 pos[r] = sum; | |
148 } | |
149 | |
150 omp_set_dynamic(0); | |
151 int nthreads = omp_get_max_threads(); | |
152 printf("max omp threads: %d\n", nthreads); | |
153 omp_set_num_threads(nthreads); | |
154 | |
155 if (args->tim) | |
156 gettimeofday(&t_start, NULL); | |
157 | |
158 for (int ii=0; ii<N; ++ii){ // loop through input blocks | |
159 for (int i=0; i<numrates; i++){ | |
160 int nfilters = gparams->rnumf[i]; | |
161 int o_sz = gpuarrays->osize[i]; | |
162 int fpos = pos[i]; | |
163 int ratecount = i; | |
164 int blockcount = ii; | |
165 int fsize = gparams->fsize; | |
166 | |
167 #pragma omp parallel for firstprivate(nfilters, o_sz, fpos, ratecount, blockcount, fsize ) | |
168 | |
169 for (int f=0; f<nfilters; ++f){ | |
170 | |
171 filterCpuFir(&h_reference[ratecount][ blockcount*nfilters*o_sz + f*o_sz], &h_in[ratecount][blockcount*o_sz + fsize], farr, o_sz, fpos + f); | |
172 | |
173 | |
174 } | |
175 } | |
176 | |
177 } | |
178 | |
179 if (args->tim){ | |
180 gettimeofday(&t_end, NULL); | |
181 t_mcpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0; | |
182 | |
183 printf("Finished host multi core CPU FIR\nProcessing OPENMP took: %f ms\n", t_mcpu/(double)N); | |
184 | |
185 } | |
186 } | |
187 | |
188 void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N) | |
189 /** | |
190 * Check the relative error between CPU and GPU filter computation results | |
191 | |
192 * @param h_in: array containing all input blocks of the MR filter | |
193 * @param h_reference: array containing the MR filter CPU output | |
194 * @param h_out: array containing the MR filter GPU output | |
195 * @param gpuarrays: structure containg the gpu arrays and parameters | |
196 * @param gparams: sructure containg parameters | |
197 * @param N: the number of input blocks to be processed. | |
198 */ | |
199 { | |
200 float err=0.0; | |
201 float diff, ref; | |
202 int oindex = 0; | |
203 int numrates = gparams->nrates; | |
204 | |
205 //this loop helps with debugging | |
206 for (int b = 0; b < N; ++b) { //loop through input blocks | |
207 | |
208 for (int i = 0; i < numrates; ++i) { //loop through resampled | |
209 oindex = b* gparams->rnumf[i]*gpuarrays->osize[i]; | |
210 | |
211 for (int f = 0; f < gparams->rnumf[i]; ++f) { // and individual filters | |
212 | |
213 for (int p = 0; p < gpuarrays->osize[i]; ++p) { //and points per filter per block | |
214 diff = h_out[i][ oindex + f*gpuarrays->osize[i] + p ] - h_reference[i][ oindex + f*gpuarrays->osize[i] + p ]; | |
215 err += diff*diff; | |
216 ref += h_reference[i][ oindex + f*gpuarrays->osize[i] + p] * h_reference[i][ oindex + f*gpuarrays->osize[i] + p ]; | |
217 } | |
218 } | |
219 } | |
220 | |
221 } | |
222 | |
223 float normref = sqrtf(ref); | |
224 float normerr = sqrtf(err); | |
225 err = normerr / normref; | |
226 | |
227 printf("\nL2 error = %f\n\n",err); | |
228 | |
229 } | |
230 | |
231 void read_command_line(int argc, char *argv[], cmd_args *args) | |
232 /** | |
233 * Read arguments from command line | |
234 */ | |
235 { | |
236 static struct option long_options[] = { | |
237 /* name has_arg flag val */ | |
238 /* ------------------------------------------ */ | |
239 {"help", no_argument, NULL, 0 }, | |
240 {"nrates", required_argument, NULL, 1 }, | |
241 {"nf", required_argument, NULL, 2 }, | |
242 {"insize", required_argument, NULL, 3 }, | |
243 {"rconst", no_argument, NULL, 4 }, | |
244 {"tim", no_argument, NULL, 5 }, | |
245 { 0, 0, 0, 0 } | |
246 }; | |
247 | |
248 int long_index =0; | |
249 int opt; | |
250 | |
251 while ((opt = getopt_long_only(argc, argv,"", long_options, &long_index )) != -1) { | |
252 switch (opt) { | |
253 | |
254 case 0: | |
255 print_usage(); | |
256 exit(EXIT_FAILURE); | |
257 | |
258 case 1 : | |
259 args->nrates = atoi(optarg); | |
260 break; | |
261 | |
262 case 2 : | |
263 args->nf = atoi(optarg); | |
264 break; | |
265 | |
266 case 3 : | |
267 args->insize = atoi(optarg); | |
268 break; | |
269 | |
270 case 4 : | |
271 args->rconst = 1; | |
272 break; | |
273 | |
274 case 5 : | |
275 args->tim = 1; | |
276 break; | |
277 | |
278 default: | |
279 exit(EXIT_FAILURE); | |
280 } | |
281 } | |
282 | |
283 if (optind < argc || optind == 1) { | |
284 print_usage(); | |
285 | |
286 printf("\n\nNo arguments given, run with default values? (y/n): "); | |
287 while (1) | |
288 { | |
289 int c, answer; | |
290 | |
291 /* Read the first character of the line. | |
292 This should be the answer character, but might not be. */ | |
293 c = tolower (fgetc (stdin)); | |
294 answer = c; | |
295 /* Discard rest of input line. */ | |
296 while (c != '\n' && c != EOF) | |
297 c = fgetc (stdin); | |
298 /* Obey the answer if it was valid. */ | |
299 if (answer == 'y'){ | |
300 printf("\nRunning with default values\n"); | |
301 break; | |
302 } | |
303 if (answer == 'n'){ | |
304 printf("\nAborting.\n\n"); | |
305 exit(EXIT_SUCCESS); | |
306 } | |
307 /* Answer was invalid: ask for valid answer. */ | |
308 fputs ("Please answer y or n: ", stdout); | |
309 } | |
310 } | |
311 | |
312 } | |
313 | |
314 void print_usage() | |
315 /** | |
316 * Print command line usage. | |
317 */ | |
318 { | |
319 printf("\nUsage: ./filter [-nrates <n> -nf <n> -insize <n> -rconst -tim]\n-------\n"); | |
320 printf("\n\t-nrates [n]\tNumber of sampling rates (default 3)\n"); | |
321 printf("\n\t-nf [n]\t\tTotal number of filters (default 60)\n"); | |
322 printf("\n\t-rconst\t\tUse constant rate equal to input size (default no)\n"); | |
323 printf("\n\t-insize [n]\tSize of input block (default 1024)\n"); | |
324 | |
325 printf("\n\t-tim\t\tMeasure execution time (default yes)\n\n"); | |
326 } |