annotate ext/kissfft/tools/kiss_fastfir.c @ 416:1f3244a6884c

Add filter test
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 07 Oct 2015 10:07:30 +0100
parents 1f1999b0f577
children
rev   line source
c@409 1 /*
c@409 2 Copyright (c) 2003-2004, Mark Borgerding
c@409 3
c@409 4 All rights reserved.
c@409 5
c@409 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
c@409 7
c@409 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
c@409 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
c@409 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
c@409 11
c@409 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
c@409 13 */
c@409 14
c@409 15 #include "_kiss_fft_guts.h"
c@409 16
c@409 17
c@409 18 /*
c@409 19 Some definitions that allow real or complex filtering
c@409 20 */
c@409 21 #ifdef REAL_FASTFIR
c@409 22 #define MIN_FFT_LEN 2048
c@409 23 #include "kiss_fftr.h"
c@409 24 typedef kiss_fft_scalar kffsamp_t;
c@409 25 typedef kiss_fftr_cfg kfcfg_t;
c@409 26 #define FFT_ALLOC kiss_fftr_alloc
c@409 27 #define FFTFWD kiss_fftr
c@409 28 #define FFTINV kiss_fftri
c@409 29 #else
c@409 30 #define MIN_FFT_LEN 1024
c@409 31 typedef kiss_fft_cpx kffsamp_t;
c@409 32 typedef kiss_fft_cfg kfcfg_t;
c@409 33 #define FFT_ALLOC kiss_fft_alloc
c@409 34 #define FFTFWD kiss_fft
c@409 35 #define FFTINV kiss_fft
c@409 36 #endif
c@409 37
c@409 38 typedef struct kiss_fastfir_state *kiss_fastfir_cfg;
c@409 39
c@409 40
c@409 41
c@409 42 kiss_fastfir_cfg kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,
c@409 43 size_t * nfft,void * mem,size_t*lenmem);
c@409 44
c@409 45 /* see do_file_filter for usage */
c@409 46 size_t kiss_fastfir( kiss_fastfir_cfg cfg, kffsamp_t * inbuf, kffsamp_t * outbuf, size_t n, size_t *offset);
c@409 47
c@409 48
c@409 49
c@409 50 static int verbose=0;
c@409 51
c@409 52
c@409 53 struct kiss_fastfir_state{
c@409 54 size_t nfft;
c@409 55 size_t ngood;
c@409 56 kfcfg_t fftcfg;
c@409 57 kfcfg_t ifftcfg;
c@409 58 kiss_fft_cpx * fir_freq_resp;
c@409 59 kiss_fft_cpx * freqbuf;
c@409 60 size_t n_freq_bins;
c@409 61 kffsamp_t * tmpbuf;
c@409 62 };
c@409 63
c@409 64
c@409 65 kiss_fastfir_cfg kiss_fastfir_alloc(
c@409 66 const kffsamp_t * imp_resp,size_t n_imp_resp,
c@409 67 size_t *pnfft, /* if <= 0, an appropriate size will be chosen */
c@409 68 void * mem,size_t*lenmem)
c@409 69 {
c@409 70 kiss_fastfir_cfg st = NULL;
c@409 71 size_t len_fftcfg,len_ifftcfg;
c@409 72 size_t memneeded = sizeof(struct kiss_fastfir_state);
c@409 73 char * ptr;
c@409 74 size_t i;
c@409 75 size_t nfft=0;
c@409 76 float scale;
c@409 77 int n_freq_bins;
c@409 78 if (pnfft)
c@409 79 nfft=*pnfft;
c@409 80
c@409 81 if (nfft<=0) {
c@409 82 /* determine fft size as next power of two at least 2x
c@409 83 the impulse response length*/
c@409 84 i=n_imp_resp-1;
c@409 85 nfft=2;
c@409 86 do{
c@409 87 nfft<<=1;
c@409 88 }while (i>>=1);
c@409 89 #ifdef MIN_FFT_LEN
c@409 90 if ( nfft < MIN_FFT_LEN )
c@409 91 nfft=MIN_FFT_LEN;
c@409 92 #endif
c@409 93 }
c@409 94 if (pnfft)
c@409 95 *pnfft = nfft;
c@409 96
c@409 97 #ifdef REAL_FASTFIR
c@409 98 n_freq_bins = nfft/2 + 1;
c@409 99 #else
c@409 100 n_freq_bins = nfft;
c@409 101 #endif
c@409 102 /*fftcfg*/
c@409 103 FFT_ALLOC (nfft, 0, NULL, &len_fftcfg);
c@409 104 memneeded += len_fftcfg;
c@409 105 /*ifftcfg*/
c@409 106 FFT_ALLOC (nfft, 1, NULL, &len_ifftcfg);
c@409 107 memneeded += len_ifftcfg;
c@409 108 /* tmpbuf */
c@409 109 memneeded += sizeof(kffsamp_t) * nfft;
c@409 110 /* fir_freq_resp */
c@409 111 memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
c@409 112 /* freqbuf */
c@409 113 memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
c@409 114
c@409 115 if (lenmem == NULL) {
c@409 116 st = (kiss_fastfir_cfg) malloc (memneeded);
c@409 117 } else {
c@409 118 if (*lenmem >= memneeded)
c@409 119 st = (kiss_fastfir_cfg) mem;
c@409 120 *lenmem = memneeded;
c@409 121 }
c@409 122 if (!st)
c@409 123 return NULL;
c@409 124
c@409 125 st->nfft = nfft;
c@409 126 st->ngood = nfft - n_imp_resp + 1;
c@409 127 st->n_freq_bins = n_freq_bins;
c@409 128 ptr=(char*)(st+1);
c@409 129
c@409 130 st->fftcfg = (kfcfg_t)ptr;
c@409 131 ptr += len_fftcfg;
c@409 132
c@409 133 st->ifftcfg = (kfcfg_t)ptr;
c@409 134 ptr += len_ifftcfg;
c@409 135
c@409 136 st->tmpbuf = (kffsamp_t*)ptr;
c@409 137 ptr += sizeof(kffsamp_t) * nfft;
c@409 138
c@409 139 st->freqbuf = (kiss_fft_cpx*)ptr;
c@409 140 ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
c@409 141
c@409 142 st->fir_freq_resp = (kiss_fft_cpx*)ptr;
c@409 143 ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
c@409 144
c@409 145 FFT_ALLOC (nfft,0,st->fftcfg , &len_fftcfg);
c@409 146 FFT_ALLOC (nfft,1,st->ifftcfg , &len_ifftcfg);
c@409 147
c@409 148 memset(st->tmpbuf,0,sizeof(kffsamp_t)*nfft);
c@409 149 /*zero pad in the middle to left-rotate the impulse response
c@409 150 This puts the scrap samples at the end of the inverse fft'd buffer */
c@409 151 st->tmpbuf[0] = imp_resp[ n_imp_resp - 1 ];
c@409 152 for (i=0;i<n_imp_resp - 1; ++i) {
c@409 153 st->tmpbuf[ nfft - n_imp_resp + 1 + i ] = imp_resp[ i ];
c@409 154 }
c@409 155
c@409 156 FFTFWD(st->fftcfg,st->tmpbuf,st->fir_freq_resp);
c@409 157
c@409 158 /* TODO: this won't work for fixed point */
c@409 159 scale = 1.0 / st->nfft;
c@409 160
c@409 161 for ( i=0; i < st->n_freq_bins; ++i ) {
c@409 162 #ifdef USE_SIMD
c@409 163 st->fir_freq_resp[i].r *= _mm_set1_ps(scale);
c@409 164 st->fir_freq_resp[i].i *= _mm_set1_ps(scale);
c@409 165 #else
c@409 166 st->fir_freq_resp[i].r *= scale;
c@409 167 st->fir_freq_resp[i].i *= scale;
c@409 168 #endif
c@409 169 }
c@409 170 return st;
c@409 171 }
c@409 172
c@409 173 static void fastconv1buf(const kiss_fastfir_cfg st,const kffsamp_t * in,kffsamp_t * out)
c@409 174 {
c@409 175 size_t i;
c@409 176 /* multiply the frequency response of the input signal by
c@409 177 that of the fir filter*/
c@409 178 FFTFWD( st->fftcfg, in , st->freqbuf );
c@409 179 for ( i=0; i<st->n_freq_bins; ++i ) {
c@409 180 kiss_fft_cpx tmpsamp;
c@409 181 C_MUL(tmpsamp,st->freqbuf[i],st->fir_freq_resp[i]);
c@409 182 st->freqbuf[i] = tmpsamp;
c@409 183 }
c@409 184
c@409 185 /* perform the inverse fft*/
c@409 186 FFTINV(st->ifftcfg,st->freqbuf,out);
c@409 187 }
c@409 188
c@409 189 /* n : the size of inbuf and outbuf in samples
c@409 190 return value: the number of samples completely processed
c@409 191 n-retval samples should be copied to the front of the next input buffer */
c@409 192 static size_t kff_nocopy(
c@409 193 kiss_fastfir_cfg st,
c@409 194 const kffsamp_t * inbuf,
c@409 195 kffsamp_t * outbuf,
c@409 196 size_t n)
c@409 197 {
c@409 198 size_t norig=n;
c@409 199 while (n >= st->nfft ) {
c@409 200 fastconv1buf(st,inbuf,outbuf);
c@409 201 inbuf += st->ngood;
c@409 202 outbuf += st->ngood;
c@409 203 n -= st->ngood;
c@409 204 }
c@409 205 return norig - n;
c@409 206 }
c@409 207
c@409 208 static
c@409 209 size_t kff_flush(kiss_fastfir_cfg st,const kffsamp_t * inbuf,kffsamp_t * outbuf,size_t n)
c@409 210 {
c@409 211 size_t zpad=0,ntmp;
c@409 212
c@409 213 ntmp = kff_nocopy(st,inbuf,outbuf,n);
c@409 214 n -= ntmp;
c@409 215 inbuf += ntmp;
c@409 216 outbuf += ntmp;
c@409 217
c@409 218 zpad = st->nfft - n;
c@409 219 memset(st->tmpbuf,0,sizeof(kffsamp_t)*st->nfft );
c@409 220 memcpy(st->tmpbuf,inbuf,sizeof(kffsamp_t)*n );
c@409 221
c@409 222 fastconv1buf(st,st->tmpbuf,st->tmpbuf);
c@409 223
c@409 224 memcpy(outbuf,st->tmpbuf,sizeof(kffsamp_t)*( st->ngood - zpad ));
c@409 225 return ntmp + st->ngood - zpad;
c@409 226 }
c@409 227
c@409 228 size_t kiss_fastfir(
c@409 229 kiss_fastfir_cfg vst,
c@409 230 kffsamp_t * inbuf,
c@409 231 kffsamp_t * outbuf,
c@409 232 size_t n_new,
c@409 233 size_t *offset)
c@409 234 {
c@409 235 size_t ntot = n_new + *offset;
c@409 236 if (n_new==0) {
c@409 237 return kff_flush(vst,inbuf,outbuf,ntot);
c@409 238 }else{
c@409 239 size_t nwritten = kff_nocopy(vst,inbuf,outbuf,ntot);
c@409 240 *offset = ntot - nwritten;
c@409 241 /*save the unused or underused samples at the front of the input buffer */
c@409 242 memcpy( inbuf , inbuf+nwritten , *offset * sizeof(kffsamp_t) );
c@409 243 return nwritten;
c@409 244 }
c@409 245 }
c@409 246
c@409 247 #ifdef FAST_FILT_UTIL
c@409 248 #include <unistd.h>
c@409 249 #include <sys/types.h>
c@409 250 #include <sys/mman.h>
c@409 251 #include <assert.h>
c@409 252
c@409 253 static
c@409 254 void direct_file_filter(
c@409 255 FILE * fin,
c@409 256 FILE * fout,
c@409 257 const kffsamp_t * imp_resp,
c@409 258 size_t n_imp_resp)
c@409 259 {
c@409 260 size_t nlag = n_imp_resp - 1;
c@409 261
c@409 262 const kffsamp_t *tmph;
c@409 263 kffsamp_t *buf, *circbuf;
c@409 264 kffsamp_t outval;
c@409 265 size_t nread;
c@409 266 size_t nbuf;
c@409 267 size_t oldestlag = 0;
c@409 268 size_t k, tap;
c@409 269 #ifndef REAL_FASTFIR
c@409 270 kffsamp_t tmp;
c@409 271 #endif
c@409 272
c@409 273 nbuf = 4096;
c@409 274 buf = (kffsamp_t *) malloc ( sizeof (kffsamp_t) * nbuf);
c@409 275 circbuf = (kffsamp_t *) malloc (sizeof (kffsamp_t) * nlag);
c@409 276 if (!circbuf || !buf) {
c@409 277 perror("circbuf allocation");
c@409 278 exit(1);
c@409 279 }
c@409 280
c@409 281 if ( fread (circbuf, sizeof (kffsamp_t), nlag, fin) != nlag ) {
c@409 282 perror ("insufficient data to overcome transient");
c@409 283 exit (1);
c@409 284 }
c@409 285
c@409 286 do {
c@409 287 nread = fread (buf, sizeof (kffsamp_t), nbuf, fin);
c@409 288 if (nread <= 0)
c@409 289 break;
c@409 290
c@409 291 for (k = 0; k < nread; ++k) {
c@409 292 tmph = imp_resp+nlag;
c@409 293 #ifdef REAL_FASTFIR
c@409 294 # ifdef USE_SIMD
c@409 295 outval = _mm_set1_ps(0);
c@409 296 #else
c@409 297 outval = 0;
c@409 298 #endif
c@409 299 for (tap = oldestlag; tap < nlag; ++tap)
c@409 300 outval += circbuf[tap] * *tmph--;
c@409 301 for (tap = 0; tap < oldestlag; ++tap)
c@409 302 outval += circbuf[tap] * *tmph--;
c@409 303 outval += buf[k] * *tmph;
c@409 304 #else
c@409 305 # ifdef USE_SIMD
c@409 306 outval.r = outval.i = _mm_set1_ps(0);
c@409 307 #else
c@409 308 outval.r = outval.i = 0;
c@409 309 #endif
c@409 310 for (tap = oldestlag; tap < nlag; ++tap){
c@409 311 C_MUL(tmp,circbuf[tap],*tmph);
c@409 312 --tmph;
c@409 313 C_ADDTO(outval,tmp);
c@409 314 }
c@409 315
c@409 316 for (tap = 0; tap < oldestlag; ++tap) {
c@409 317 C_MUL(tmp,circbuf[tap],*tmph);
c@409 318 --tmph;
c@409 319 C_ADDTO(outval,tmp);
c@409 320 }
c@409 321 C_MUL(tmp,buf[k],*tmph);
c@409 322 C_ADDTO(outval,tmp);
c@409 323 #endif
c@409 324
c@409 325 circbuf[oldestlag++] = buf[k];
c@409 326 buf[k] = outval;
c@409 327
c@409 328 if (oldestlag == nlag)
c@409 329 oldestlag = 0;
c@409 330 }
c@409 331
c@409 332 if (fwrite (buf, sizeof (buf[0]), nread, fout) != nread) {
c@409 333 perror ("short write");
c@409 334 exit (1);
c@409 335 }
c@409 336 } while (nread);
c@409 337 free (buf);
c@409 338 free (circbuf);
c@409 339 }
c@409 340
c@409 341 static
c@409 342 void do_file_filter(
c@409 343 FILE * fin,
c@409 344 FILE * fout,
c@409 345 const kffsamp_t * imp_resp,
c@409 346 size_t n_imp_resp,
c@409 347 size_t nfft )
c@409 348 {
c@409 349 int fdout;
c@409 350 size_t n_samps_buf;
c@409 351
c@409 352 kiss_fastfir_cfg cfg;
c@409 353 kffsamp_t *inbuf,*outbuf;
c@409 354 int nread,nwrite;
c@409 355 size_t idx_inbuf;
c@409 356
c@409 357 fdout = fileno(fout);
c@409 358
c@409 359 cfg=kiss_fastfir_alloc(imp_resp,n_imp_resp,&nfft,0,0);
c@409 360
c@409 361 /* use length to minimize buffer shift*/
c@409 362 n_samps_buf = 8*4096/sizeof(kffsamp_t);
c@409 363 n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
c@409 364
c@409 365 if (verbose) fprintf(stderr,"bufsize=%d\n",(int)(sizeof(kffsamp_t)*n_samps_buf) );
c@409 366
c@409 367
c@409 368 /*allocate space and initialize pointers */
c@409 369 inbuf = (kffsamp_t*)malloc(sizeof(kffsamp_t)*n_samps_buf);
c@409 370 outbuf = (kffsamp_t*)malloc(sizeof(kffsamp_t)*n_samps_buf);
c@409 371
c@409 372 idx_inbuf=0;
c@409 373 do{
c@409 374 /* start reading at inbuf[idx_inbuf] */
c@409 375 nread = fread( inbuf + idx_inbuf, sizeof(kffsamp_t), n_samps_buf - idx_inbuf,fin );
c@409 376
c@409 377 /* If nread==0, then this is a flush.
c@409 378 The total number of samples in input is idx_inbuf + nread . */
c@409 379 nwrite = kiss_fastfir(cfg, inbuf, outbuf,nread,&idx_inbuf) * sizeof(kffsamp_t);
c@409 380 /* kiss_fastfir moved any unused samples to the front of inbuf and updated idx_inbuf */
c@409 381
c@409 382 if ( write(fdout, outbuf, nwrite) != nwrite ) {
c@409 383 perror("short write");
c@409 384 exit(1);
c@409 385 }
c@409 386 }while ( nread );
c@409 387 free(cfg);
c@409 388 free(inbuf);
c@409 389 free(outbuf);
c@409 390 }
c@409 391
c@409 392 int main(int argc,char**argv)
c@409 393 {
c@409 394 kffsamp_t * h;
c@409 395 int use_direct=0;
c@409 396 size_t nh,nfft=0;
c@409 397 FILE *fin=stdin;
c@409 398 FILE *fout=stdout;
c@409 399 FILE *filtfile=NULL;
c@409 400 while (1) {
c@409 401 int c=getopt(argc,argv,"n:h:i:o:vd");
c@409 402 if (c==-1) break;
c@409 403 switch (c) {
c@409 404 case 'v':
c@409 405 verbose=1;
c@409 406 break;
c@409 407 case 'n':
c@409 408 nfft=atoi(optarg);
c@409 409 break;
c@409 410 case 'i':
c@409 411 fin = fopen(optarg,"rb");
c@409 412 if (fin==NULL) {
c@409 413 perror(optarg);
c@409 414 exit(1);
c@409 415 }
c@409 416 break;
c@409 417 case 'o':
c@409 418 fout = fopen(optarg,"w+b");
c@409 419 if (fout==NULL) {
c@409 420 perror(optarg);
c@409 421 exit(1);
c@409 422 }
c@409 423 break;
c@409 424 case 'h':
c@409 425 filtfile = fopen(optarg,"rb");
c@409 426 if (filtfile==NULL) {
c@409 427 perror(optarg);
c@409 428 exit(1);
c@409 429 }
c@409 430 break;
c@409 431 case 'd':
c@409 432 use_direct=1;
c@409 433 break;
c@409 434 case '?':
c@409 435 fprintf(stderr,"usage options:\n"
c@409 436 "\t-n nfft: fft size to use\n"
c@409 437 "\t-d : use direct FIR filtering, not fast convolution\n"
c@409 438 "\t-i filename: input file\n"
c@409 439 "\t-o filename: output(filtered) file\n"
c@409 440 "\t-n nfft: fft size to use\n"
c@409 441 "\t-h filename: impulse response\n");
c@409 442 exit (1);
c@409 443 default:fprintf(stderr,"bad %c\n",c);break;
c@409 444 }
c@409 445 }
c@409 446 if (filtfile==NULL) {
c@409 447 fprintf(stderr,"You must supply the FIR coeffs via -h\n");
c@409 448 exit(1);
c@409 449 }
c@409 450 fseek(filtfile,0,SEEK_END);
c@409 451 nh = ftell(filtfile) / sizeof(kffsamp_t);
c@409 452 if (verbose) fprintf(stderr,"%d samples in FIR filter\n",(int)nh);
c@409 453 h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
c@409 454 fseek(filtfile,0,SEEK_SET);
c@409 455 if (fread(h,sizeof(kffsamp_t),nh,filtfile) != nh)
c@409 456 fprintf(stderr,"short read on filter file\n");
c@409 457
c@409 458 fclose(filtfile);
c@409 459
c@409 460 if (use_direct)
c@409 461 direct_file_filter( fin, fout, h,nh);
c@409 462 else
c@409 463 do_file_filter( fin, fout, h,nh,nfft);
c@409 464
c@409 465 if (fout!=stdout) fclose(fout);
c@409 466 if (fin!=stdin) fclose(fin);
c@409 467
c@409 468 return 0;
c@409 469 }
c@409 470 #endif