annotate filters_host.cpp @ 1:ae0773634e0b tip

project report.
author Sofia Dimoudi <sofia.dimoudi@gmail.com>
date Fri, 16 Sep 2016 15:49:19 +0100
parents 2b63f74a3010
children
rev   line source
sofia@0 1 /*
sofia@0 2 GPU multi-rate FIR filter bank example software
sofia@0 3
sofia@0 4 Oxford e-Research Centre, Oxford University
sofia@0 5
sofia@0 6 Centre for Digital Music, Queen Mary, University of London.
sofia@0 7
sofia@0 8 This program is free software: you can redistribute it and/or modify
sofia@0 9 it under the terms of the GNU General Public License as published by
sofia@0 10 the Free Software Foundation, either version 3 of the License,
sofia@0 11 or (at your option) any later version.
sofia@0 12 See the file COPYING included with this distribution for more information.
sofia@0 13 */
sofia@0 14
sofia@0 15 #include <iostream>
sofia@0 16 #include <cstdio>
sofia@0 17 #include <cstdlib>
sofia@0 18 #include <cmath>
sofia@0 19
sofia@0 20 #include <getopt.h>
sofia@0 21 #include <sys/time.h>
sofia@0 22 #include <omp.h>
sofia@0 23
sofia@0 24 #include "filters.h"
sofia@0 25
sofia@0 26 ///////////////////////////////////////////////////////////////////////////////
sofia@0 27 // CPU routines
sofia@0 28 ///////////////////////////////////////////////////////////////////////////////
sofia@0 29
sofia@0 30 void filterCpuFir(float* out, float* in, filter_arrays *farr, int n, int nf)
sofia@0 31 /**
sofia@0 32 * Basic serial implementation of an FIR filter.
sofia@0 33 * Based on the Tipic Vamp plugin .
sofia@0 34
sofia@0 35 * @param out: the output array from a single filter
sofia@0 36 * @param in: the input array
sofia@0 37 * @param farr: structure containing filter object
sofia@0 38 * @param n: length of input/output
sofia@0 39 * @param nf: the number of the filter being processed
sofia@0 40 */
sofia@0 41 {
sofia@0 42 // Initialize variables and buffers
sofia@0 43 int m_offb = farr->m_offb[nf];
sofia@0 44 int m_sz = B_SIZE;
sofia@0 45
sofia@0 46 //main loop
sofia@0 47 for (int s = 0; s < n; ++s) {
sofia@0 48 if (m_offb > 0) --m_offb;
sofia@0 49 else {
sofia@0 50 for (int i = m_sz - 2; i >= 0; --i) {
sofia@0 51 farr->buf_in[nf][i + OFFSET + 1] = farr->buf_in[nf][i];
sofia@0 52 }
sofia@0 53 m_offb = OFFSET;
sofia@0 54 }
sofia@0 55 farr->buf_in[nf][m_offb] = in[s];
sofia@0 56
sofia@0 57 float b_sum = 0.0f;
sofia@0 58 for (int i = 0; i < m_sz; ++i) {
sofia@0 59 b_sum += farr->bk[nf*m_sz+i] * farr->buf_in[nf][i + m_offb];
sofia@0 60 }
sofia@0 61
sofia@0 62 out[s] = b_sum;
sofia@0 63 }
sofia@0 64 farr->m_offb[nf] = m_offb;
sofia@0 65
sofia@0 66 }
sofia@0 67
sofia@0 68
sofia@0 69 void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
sofia@0 70 {
sofia@0 71 /**
sofia@0 72 Execution of the multirate filter configuration serially.
sofia@0 73 * @param h_in: array containing all input blocks of the MR filter
sofia@0 74 * @param h_reference: array containing the MR filter output
sofia@0 75 * @param gpuarrays: structure containg the gpu arrays and parameters
sofia@0 76 * @param gparams: sructure containg parameters
sofia@0 77 * @param args: structure containing command line arguments
sofia@0 78 * @param farr: structure containg the filters
sofia@0 79 * @param N: the number of input blocks to be processed.
sofia@0 80 */
sofia@0 81
sofia@0 82 int oindex = 0;
sofia@0 83 int pos[MAX_RATES];
sofia@0 84 int sum = 0;
sofia@0 85 struct timeval t_start, t_end;
sofia@0 86 double t_cpu = 0.0;
sofia@0 87
sofia@0 88 int numrates = gparams->nrates;
sofia@0 89
sofia@0 90 pos[0] = 0;
sofia@0 91 for (int r=1; r<numrates; r++){
sofia@0 92 sum+= gparams->rnumf[r-1];
sofia@0 93 pos[r] = sum;
sofia@0 94 }
sofia@0 95
sofia@0 96 //start test
sofia@0 97 if (args->tim){
sofia@0 98 gettimeofday(&t_start, NULL);
sofia@0 99 }
sofia@0 100
sofia@0 101 for (int ii=0; ii<N; ++ii){ // loop through input blocks
sofia@0 102
sofia@0 103 for (int i=0; i<gparams->nrates; ++i){ //through number of resampled per block
sofia@0 104 oindex = ii* gparams->rnumf[i]*gpuarrays->osize[i];
sofia@0 105
sofia@0 106 for (int f=0; f< gparams->rnumf[i]; ++f){ // through filters per resampled
sofia@0 107 // run one process per filter serially
sofia@0 108 filterCpuFir(&h_reference[i][oindex + f*gpuarrays->osize[i]], &h_in[i][ii*gpuarrays->osize[i] + gparams->fsize ], farr, gpuarrays->osize[i], pos[i] + f);
sofia@0 109 }
sofia@0 110 }
sofia@0 111 }
sofia@0 112
sofia@0 113 if (args->tim){
sofia@0 114 gettimeofday(&t_end, NULL);
sofia@0 115 t_cpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
sofia@0 116
sofia@0 117 printf("\nFinished host single CPU FIR\nProcessing with single CPU took: %f ms\n", t_cpu/(double)N);
sofia@0 118
sofia@0 119 }
sofia@0 120 }
sofia@0 121
sofia@0 122
sofia@0 123 void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N)
sofia@0 124 /**
sofia@0 125 Execution of the multirate filter configuration with OpenMP.
sofia@0 126 * @param h_in: array containing all input blocks of the MR filter
sofia@0 127 * @param h_reference: array containing the MR filter output
sofia@0 128 * @param gpuarrays: structure containg the gpu arrays and parameters
sofia@0 129 * @param gparams: sructure containg parameters
sofia@0 130 * @param args: structure containing command line arguments
sofia@0 131 * @param farr: structure containg the filters
sofia@0 132 * @param N: the number of input blocks to be processed.
sofia@0 133 */
sofia@0 134 {
sofia@0 135 int pos[MAX_RATES];
sofia@0 136 int sum = 0;
sofia@0 137 int oindex = 0;
sofia@0 138 int tid;
sofia@0 139 int numrates = gparams->nrates;
sofia@0 140
sofia@0 141 struct timeval t_start, t_end;
sofia@0 142 double t_mcpu = 0.0;
sofia@0 143
sofia@0 144 pos[0] = 0;
sofia@0 145 for (int r=1; r<numrates; r++){
sofia@0 146 sum+= gparams->rnumf[r-1];
sofia@0 147 pos[r] = sum;
sofia@0 148 }
sofia@0 149
sofia@0 150 omp_set_dynamic(0);
sofia@0 151 int nthreads = omp_get_max_threads();
sofia@0 152 printf("max omp threads: %d\n", nthreads);
sofia@0 153 omp_set_num_threads(nthreads);
sofia@0 154
sofia@0 155 if (args->tim)
sofia@0 156 gettimeofday(&t_start, NULL);
sofia@0 157
sofia@0 158 for (int ii=0; ii<N; ++ii){ // loop through input blocks
sofia@0 159 for (int i=0; i<numrates; i++){
sofia@0 160 int nfilters = gparams->rnumf[i];
sofia@0 161 int o_sz = gpuarrays->osize[i];
sofia@0 162 int fpos = pos[i];
sofia@0 163 int ratecount = i;
sofia@0 164 int blockcount = ii;
sofia@0 165 int fsize = gparams->fsize;
sofia@0 166
sofia@0 167 #pragma omp parallel for firstprivate(nfilters, o_sz, fpos, ratecount, blockcount, fsize )
sofia@0 168
sofia@0 169 for (int f=0; f<nfilters; ++f){
sofia@0 170
sofia@0 171 filterCpuFir(&h_reference[ratecount][ blockcount*nfilters*o_sz + f*o_sz], &h_in[ratecount][blockcount*o_sz + fsize], farr, o_sz, fpos + f);
sofia@0 172
sofia@0 173
sofia@0 174 }
sofia@0 175 }
sofia@0 176
sofia@0 177 }
sofia@0 178
sofia@0 179 if (args->tim){
sofia@0 180 gettimeofday(&t_end, NULL);
sofia@0 181 t_mcpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0;
sofia@0 182
sofia@0 183 printf("Finished host multi core CPU FIR\nProcessing OPENMP took: %f ms\n", t_mcpu/(double)N);
sofia@0 184
sofia@0 185 }
sofia@0 186 }
sofia@0 187
sofia@0 188 void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N)
sofia@0 189 /**
sofia@0 190 * Check the relative error between CPU and GPU filter computation results
sofia@0 191
sofia@0 192 * @param h_in: array containing all input blocks of the MR filter
sofia@0 193 * @param h_reference: array containing the MR filter CPU output
sofia@0 194 * @param h_out: array containing the MR filter GPU output
sofia@0 195 * @param gpuarrays: structure containg the gpu arrays and parameters
sofia@0 196 * @param gparams: sructure containg parameters
sofia@0 197 * @param N: the number of input blocks to be processed.
sofia@0 198 */
sofia@0 199 {
sofia@0 200 float err=0.0;
sofia@0 201 float diff, ref;
sofia@0 202 int oindex = 0;
sofia@0 203 int numrates = gparams->nrates;
sofia@0 204
sofia@0 205 //this loop helps with debugging
sofia@0 206 for (int b = 0; b < N; ++b) { //loop through input blocks
sofia@0 207
sofia@0 208 for (int i = 0; i < numrates; ++i) { //loop through resampled
sofia@0 209 oindex = b* gparams->rnumf[i]*gpuarrays->osize[i];
sofia@0 210
sofia@0 211 for (int f = 0; f < gparams->rnumf[i]; ++f) { // and individual filters
sofia@0 212
sofia@0 213 for (int p = 0; p < gpuarrays->osize[i]; ++p) { //and points per filter per block
sofia@0 214 diff = h_out[i][ oindex + f*gpuarrays->osize[i] + p ] - h_reference[i][ oindex + f*gpuarrays->osize[i] + p ];
sofia@0 215 err += diff*diff;
sofia@0 216 ref += h_reference[i][ oindex + f*gpuarrays->osize[i] + p] * h_reference[i][ oindex + f*gpuarrays->osize[i] + p ];
sofia@0 217 }
sofia@0 218 }
sofia@0 219 }
sofia@0 220
sofia@0 221 }
sofia@0 222
sofia@0 223 float normref = sqrtf(ref);
sofia@0 224 float normerr = sqrtf(err);
sofia@0 225 err = normerr / normref;
sofia@0 226
sofia@0 227 printf("\nL2 error = %f\n\n",err);
sofia@0 228
sofia@0 229 }
sofia@0 230
sofia@0 231 void read_command_line(int argc, char *argv[], cmd_args *args)
sofia@0 232 /**
sofia@0 233 * Read arguments from command line
sofia@0 234 */
sofia@0 235 {
sofia@0 236 static struct option long_options[] = {
sofia@0 237 /* name has_arg flag val */
sofia@0 238 /* ------------------------------------------ */
sofia@0 239 {"help", no_argument, NULL, 0 },
sofia@0 240 {"nrates", required_argument, NULL, 1 },
sofia@0 241 {"nf", required_argument, NULL, 2 },
sofia@0 242 {"insize", required_argument, NULL, 3 },
sofia@0 243 {"rconst", no_argument, NULL, 4 },
sofia@0 244 {"tim", no_argument, NULL, 5 },
sofia@0 245 { 0, 0, 0, 0 }
sofia@0 246 };
sofia@0 247
sofia@0 248 int long_index =0;
sofia@0 249 int opt;
sofia@0 250
sofia@0 251 while ((opt = getopt_long_only(argc, argv,"", long_options, &long_index )) != -1) {
sofia@0 252 switch (opt) {
sofia@0 253
sofia@0 254 case 0:
sofia@0 255 print_usage();
sofia@0 256 exit(EXIT_FAILURE);
sofia@0 257
sofia@0 258 case 1 :
sofia@0 259 args->nrates = atoi(optarg);
sofia@0 260 break;
sofia@0 261
sofia@0 262 case 2 :
sofia@0 263 args->nf = atoi(optarg);
sofia@0 264 break;
sofia@0 265
sofia@0 266 case 3 :
sofia@0 267 args->insize = atoi(optarg);
sofia@0 268 break;
sofia@0 269
sofia@0 270 case 4 :
sofia@0 271 args->rconst = 1;
sofia@0 272 break;
sofia@0 273
sofia@0 274 case 5 :
sofia@0 275 args->tim = 1;
sofia@0 276 break;
sofia@0 277
sofia@0 278 default:
sofia@0 279 exit(EXIT_FAILURE);
sofia@0 280 }
sofia@0 281 }
sofia@0 282
sofia@0 283 if (optind < argc || optind == 1) {
sofia@0 284 print_usage();
sofia@0 285
sofia@0 286 printf("\n\nNo arguments given, run with default values? (y/n): ");
sofia@0 287 while (1)
sofia@0 288 {
sofia@0 289 int c, answer;
sofia@0 290
sofia@0 291 /* Read the first character of the line.
sofia@0 292 This should be the answer character, but might not be. */
sofia@0 293 c = tolower (fgetc (stdin));
sofia@0 294 answer = c;
sofia@0 295 /* Discard rest of input line. */
sofia@0 296 while (c != '\n' && c != EOF)
sofia@0 297 c = fgetc (stdin);
sofia@0 298 /* Obey the answer if it was valid. */
sofia@0 299 if (answer == 'y'){
sofia@0 300 printf("\nRunning with default values\n");
sofia@0 301 break;
sofia@0 302 }
sofia@0 303 if (answer == 'n'){
sofia@0 304 printf("\nAborting.\n\n");
sofia@0 305 exit(EXIT_SUCCESS);
sofia@0 306 }
sofia@0 307 /* Answer was invalid: ask for valid answer. */
sofia@0 308 fputs ("Please answer y or n: ", stdout);
sofia@0 309 }
sofia@0 310 }
sofia@0 311
sofia@0 312 }
sofia@0 313
sofia@0 314 void print_usage()
sofia@0 315 /**
sofia@0 316 * Print command line usage.
sofia@0 317 */
sofia@0 318 {
sofia@0 319 printf("\nUsage: ./filter [-nrates <n> -nf <n> -insize <n> -rconst -tim]\n-------\n");
sofia@0 320 printf("\n\t-nrates [n]\tNumber of sampling rates (default 3)\n");
sofia@0 321 printf("\n\t-nf [n]\t\tTotal number of filters (default 60)\n");
sofia@0 322 printf("\n\t-rconst\t\tUse constant rate equal to input size (default no)\n");
sofia@0 323 printf("\n\t-insize [n]\tSize of input block (default 1024)\n");
sofia@0 324
sofia@0 325 printf("\n\t-tim\t\tMeasure execution time (default yes)\n\n");
sofia@0 326 }