sofia@0: /* sofia@0: GPU multi-rate FIR filter bank example software sofia@0: sofia@0: Oxford e-Research Centre, Oxford University sofia@0: sofia@0: Centre for Digital Music, Queen Mary, University of London. sofia@0: sofia@0: This program is free software: you can redistribute it and/or modify sofia@0: it under the terms of the GNU General Public License as published by sofia@0: the Free Software Foundation, either version 3 of the License, sofia@0: or (at your option) any later version. sofia@0: See the file COPYING included with this distribution for more information. sofia@0: */ sofia@0: sofia@0: #include sofia@0: #include sofia@0: #include sofia@0: #include sofia@0: sofia@0: #include sofia@0: #include sofia@0: #include sofia@0: sofia@0: #include "filters.h" sofia@0: sofia@0: /////////////////////////////////////////////////////////////////////////////// sofia@0: // CPU routines sofia@0: /////////////////////////////////////////////////////////////////////////////// sofia@0: sofia@0: void filterCpuFir(float* out, float* in, filter_arrays *farr, int n, int nf) sofia@0: /** sofia@0: * Basic serial implementation of an FIR filter. sofia@0: * Based on the Tipic Vamp plugin . sofia@0: sofia@0: * @param out: the output array from a single filter sofia@0: * @param in: the input array sofia@0: * @param farr: structure containing filter object sofia@0: * @param n: length of input/output sofia@0: * @param nf: the number of the filter being processed sofia@0: */ sofia@0: { sofia@0: // Initialize variables and buffers sofia@0: int m_offb = farr->m_offb[nf]; sofia@0: int m_sz = B_SIZE; sofia@0: sofia@0: //main loop sofia@0: for (int s = 0; s < n; ++s) { sofia@0: if (m_offb > 0) --m_offb; sofia@0: else { sofia@0: for (int i = m_sz - 2; i >= 0; --i) { sofia@0: farr->buf_in[nf][i + OFFSET + 1] = farr->buf_in[nf][i]; sofia@0: } sofia@0: m_offb = OFFSET; sofia@0: } sofia@0: farr->buf_in[nf][m_offb] = in[s]; sofia@0: sofia@0: float b_sum = 0.0f; sofia@0: for (int i = 0; i < m_sz; ++i) { sofia@0: b_sum += farr->bk[nf*m_sz+i] * farr->buf_in[nf][i + m_offb]; sofia@0: } sofia@0: sofia@0: out[s] = b_sum; sofia@0: } sofia@0: farr->m_offb[nf] = m_offb; sofia@0: sofia@0: } sofia@0: sofia@0: sofia@0: void compute_ref( float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N) sofia@0: { sofia@0: /** sofia@0: Execution of the multirate filter configuration serially. sofia@0: * @param h_in: array containing all input blocks of the MR filter sofia@0: * @param h_reference: array containing the MR filter output sofia@0: * @param gpuarrays: structure containg the gpu arrays and parameters sofia@0: * @param gparams: sructure containg parameters sofia@0: * @param args: structure containing command line arguments sofia@0: * @param farr: structure containg the filters sofia@0: * @param N: the number of input blocks to be processed. sofia@0: */ sofia@0: sofia@0: int oindex = 0; sofia@0: int pos[MAX_RATES]; sofia@0: int sum = 0; sofia@0: struct timeval t_start, t_end; sofia@0: double t_cpu = 0.0; sofia@0: sofia@0: int numrates = gparams->nrates; sofia@0: sofia@0: pos[0] = 0; sofia@0: for (int r=1; rrnumf[r-1]; sofia@0: pos[r] = sum; sofia@0: } sofia@0: sofia@0: //start test sofia@0: if (args->tim){ sofia@0: gettimeofday(&t_start, NULL); sofia@0: } sofia@0: sofia@0: for (int ii=0; iinrates; ++i){ //through number of resampled per block sofia@0: oindex = ii* gparams->rnumf[i]*gpuarrays->osize[i]; sofia@0: sofia@0: for (int f=0; f< gparams->rnumf[i]; ++f){ // through filters per resampled sofia@0: // run one process per filter serially sofia@0: filterCpuFir(&h_reference[i][oindex + f*gpuarrays->osize[i]], &h_in[i][ii*gpuarrays->osize[i] + gparams->fsize ], farr, gpuarrays->osize[i], pos[i] + f); sofia@0: } sofia@0: } sofia@0: } sofia@0: sofia@0: if (args->tim){ sofia@0: gettimeofday(&t_end, NULL); sofia@0: t_cpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0; sofia@0: sofia@0: printf("\nFinished host single CPU FIR\nProcessing with single CPU took: %f ms\n", t_cpu/(double)N); sofia@0: sofia@0: } sofia@0: } sofia@0: sofia@0: sofia@0: void compute_omp(float *h_in[], float *h_reference[], gpu_arrays *gpuarrays, params *gparams, cmd_args *args, filter_arrays *farr, int N) sofia@0: /** sofia@0: Execution of the multirate filter configuration with OpenMP. sofia@0: * @param h_in: array containing all input blocks of the MR filter sofia@0: * @param h_reference: array containing the MR filter output sofia@0: * @param gpuarrays: structure containg the gpu arrays and parameters sofia@0: * @param gparams: sructure containg parameters sofia@0: * @param args: structure containing command line arguments sofia@0: * @param farr: structure containg the filters sofia@0: * @param N: the number of input blocks to be processed. sofia@0: */ sofia@0: { sofia@0: int pos[MAX_RATES]; sofia@0: int sum = 0; sofia@0: int oindex = 0; sofia@0: int tid; sofia@0: int numrates = gparams->nrates; sofia@0: sofia@0: struct timeval t_start, t_end; sofia@0: double t_mcpu = 0.0; sofia@0: sofia@0: pos[0] = 0; sofia@0: for (int r=1; rrnumf[r-1]; sofia@0: pos[r] = sum; sofia@0: } sofia@0: sofia@0: omp_set_dynamic(0); sofia@0: int nthreads = omp_get_max_threads(); sofia@0: printf("max omp threads: %d\n", nthreads); sofia@0: omp_set_num_threads(nthreads); sofia@0: sofia@0: if (args->tim) sofia@0: gettimeofday(&t_start, NULL); sofia@0: sofia@0: for (int ii=0; iirnumf[i]; sofia@0: int o_sz = gpuarrays->osize[i]; sofia@0: int fpos = pos[i]; sofia@0: int ratecount = i; sofia@0: int blockcount = ii; sofia@0: int fsize = gparams->fsize; sofia@0: sofia@0: #pragma omp parallel for firstprivate(nfilters, o_sz, fpos, ratecount, blockcount, fsize ) sofia@0: sofia@0: for (int f=0; ftim){ sofia@0: gettimeofday(&t_end, NULL); sofia@0: t_mcpu = (double) (t_end.tv_sec + (t_end.tv_usec / 1000000.0) - t_start.tv_sec - (t_start.tv_usec/ 1000000.0)) * 1000.0; sofia@0: sofia@0: printf("Finished host multi core CPU FIR\nProcessing OPENMP took: %f ms\n", t_mcpu/(double)N); sofia@0: sofia@0: } sofia@0: } sofia@0: sofia@0: void check_results(float *h_reference[], float *h_out[], gpu_arrays *gpuarrays, params *gparams, int N) sofia@0: /** sofia@0: * Check the relative error between CPU and GPU filter computation results sofia@0: sofia@0: * @param h_in: array containing all input blocks of the MR filter sofia@0: * @param h_reference: array containing the MR filter CPU output sofia@0: * @param h_out: array containing the MR filter GPU output sofia@0: * @param gpuarrays: structure containg the gpu arrays and parameters sofia@0: * @param gparams: sructure containg parameters sofia@0: * @param N: the number of input blocks to be processed. sofia@0: */ sofia@0: { sofia@0: float err=0.0; sofia@0: float diff, ref; sofia@0: int oindex = 0; sofia@0: int numrates = gparams->nrates; sofia@0: sofia@0: //this loop helps with debugging sofia@0: for (int b = 0; b < N; ++b) { //loop through input blocks sofia@0: sofia@0: for (int i = 0; i < numrates; ++i) { //loop through resampled sofia@0: oindex = b* gparams->rnumf[i]*gpuarrays->osize[i]; sofia@0: sofia@0: for (int f = 0; f < gparams->rnumf[i]; ++f) { // and individual filters sofia@0: sofia@0: for (int p = 0; p < gpuarrays->osize[i]; ++p) { //and points per filter per block sofia@0: diff = h_out[i][ oindex + f*gpuarrays->osize[i] + p ] - h_reference[i][ oindex + f*gpuarrays->osize[i] + p ]; sofia@0: err += diff*diff; sofia@0: ref += h_reference[i][ oindex + f*gpuarrays->osize[i] + p] * h_reference[i][ oindex + f*gpuarrays->osize[i] + p ]; sofia@0: } sofia@0: } sofia@0: } sofia@0: sofia@0: } sofia@0: sofia@0: float normref = sqrtf(ref); sofia@0: float normerr = sqrtf(err); sofia@0: err = normerr / normref; sofia@0: sofia@0: printf("\nL2 error = %f\n\n",err); sofia@0: sofia@0: } sofia@0: sofia@0: void read_command_line(int argc, char *argv[], cmd_args *args) sofia@0: /** sofia@0: * Read arguments from command line sofia@0: */ sofia@0: { sofia@0: static struct option long_options[] = { sofia@0: /* name has_arg flag val */ sofia@0: /* ------------------------------------------ */ sofia@0: {"help", no_argument, NULL, 0 }, sofia@0: {"nrates", required_argument, NULL, 1 }, sofia@0: {"nf", required_argument, NULL, 2 }, sofia@0: {"insize", required_argument, NULL, 3 }, sofia@0: {"rconst", no_argument, NULL, 4 }, sofia@0: {"tim", no_argument, NULL, 5 }, sofia@0: { 0, 0, 0, 0 } sofia@0: }; sofia@0: sofia@0: int long_index =0; sofia@0: int opt; sofia@0: sofia@0: while ((opt = getopt_long_only(argc, argv,"", long_options, &long_index )) != -1) { sofia@0: switch (opt) { sofia@0: sofia@0: case 0: sofia@0: print_usage(); sofia@0: exit(EXIT_FAILURE); sofia@0: sofia@0: case 1 : sofia@0: args->nrates = atoi(optarg); sofia@0: break; sofia@0: sofia@0: case 2 : sofia@0: args->nf = atoi(optarg); sofia@0: break; sofia@0: sofia@0: case 3 : sofia@0: args->insize = atoi(optarg); sofia@0: break; sofia@0: sofia@0: case 4 : sofia@0: args->rconst = 1; sofia@0: break; sofia@0: sofia@0: case 5 : sofia@0: args->tim = 1; sofia@0: break; sofia@0: sofia@0: default: sofia@0: exit(EXIT_FAILURE); sofia@0: } sofia@0: } sofia@0: sofia@0: if (optind < argc || optind == 1) { sofia@0: print_usage(); sofia@0: sofia@0: printf("\n\nNo arguments given, run with default values? (y/n): "); sofia@0: while (1) sofia@0: { sofia@0: int c, answer; sofia@0: sofia@0: /* Read the first character of the line. sofia@0: This should be the answer character, but might not be. */ sofia@0: c = tolower (fgetc (stdin)); sofia@0: answer = c; sofia@0: /* Discard rest of input line. */ sofia@0: while (c != '\n' && c != EOF) sofia@0: c = fgetc (stdin); sofia@0: /* Obey the answer if it was valid. */ sofia@0: if (answer == 'y'){ sofia@0: printf("\nRunning with default values\n"); sofia@0: break; sofia@0: } sofia@0: if (answer == 'n'){ sofia@0: printf("\nAborting.\n\n"); sofia@0: exit(EXIT_SUCCESS); sofia@0: } sofia@0: /* Answer was invalid: ask for valid answer. */ sofia@0: fputs ("Please answer y or n: ", stdout); sofia@0: } sofia@0: } sofia@0: sofia@0: } sofia@0: sofia@0: void print_usage() sofia@0: /** sofia@0: * Print command line usage. sofia@0: */ sofia@0: { sofia@0: printf("\nUsage: ./filter [-nrates -nf -insize -rconst -tim]\n-------\n"); sofia@0: printf("\n\t-nrates [n]\tNumber of sampling rates (default 3)\n"); sofia@0: printf("\n\t-nf [n]\t\tTotal number of filters (default 60)\n"); sofia@0: printf("\n\t-rconst\t\tUse constant rate equal to input size (default no)\n"); sofia@0: printf("\n\t-insize [n]\tSize of input block (default 1024)\n"); sofia@0: sofia@0: printf("\n\t-tim\t\tMeasure execution time (default yes)\n\n"); sofia@0: }