annotate ext/kissfft/tools/kiss_fftr.c @ 515:08bcc06c38ec tip master

Remove fast-math
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 28 Jan 2020 15:27:37 +0000
parents 1f1999b0f577
children
rev   line source
c@409 1 /*
c@409 2 Copyright (c) 2003-2004, Mark Borgerding
c@409 3
c@409 4 All rights reserved.
c@409 5
c@409 6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
c@409 7
c@409 8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
c@409 9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
c@409 10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
c@409 11
c@409 12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
c@409 13 */
c@409 14
c@409 15 #include "kiss_fftr.h"
c@409 16 #include "_kiss_fft_guts.h"
c@409 17
c@409 18 struct kiss_fftr_state{
c@409 19 kiss_fft_cfg substate;
c@409 20 kiss_fft_cpx * tmpbuf;
c@409 21 kiss_fft_cpx * super_twiddles;
c@409 22 #ifdef USE_SIMD
c@409 23 void * pad;
c@409 24 #endif
c@409 25 };
c@409 26
c@409 27 kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
c@409 28 {
c@409 29 int i;
c@409 30 kiss_fftr_cfg st = NULL;
c@409 31 size_t subsize, memneeded;
c@409 32
c@409 33 if (nfft & 1) {
c@409 34 fprintf(stderr,"Real FFT optimization must be even.\n");
c@409 35 return NULL;
c@409 36 }
c@409 37 nfft >>= 1;
c@409 38
c@409 39 kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
c@409 40 memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
c@409 41
c@409 42 if (lenmem == NULL) {
c@409 43 st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
c@409 44 } else {
c@409 45 if (*lenmem >= memneeded)
c@409 46 st = (kiss_fftr_cfg) mem;
c@409 47 *lenmem = memneeded;
c@409 48 }
c@409 49 if (!st)
c@409 50 return NULL;
c@409 51
c@409 52 st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
c@409 53 st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
c@409 54 st->super_twiddles = st->tmpbuf + nfft;
c@409 55 kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
c@409 56
c@409 57 for (i = 0; i < nfft/2; ++i) {
c@409 58 double phase =
c@409 59 -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
c@409 60 if (inverse_fft)
c@409 61 phase *= -1;
c@409 62 kf_cexp (st->super_twiddles+i,phase);
c@409 63 }
c@409 64 return st;
c@409 65 }
c@409 66
c@409 67 void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
c@409 68 {
c@409 69 /* input buffer timedata is stored row-wise */
c@409 70 int k,ncfft;
c@409 71 kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
c@409 72
c@409 73 if ( st->substate->inverse) {
c@409 74 fprintf(stderr,"kiss fft usage error: improper alloc\n");
c@409 75 exit(1);
c@409 76 }
c@409 77
c@409 78 ncfft = st->substate->nfft;
c@409 79
c@409 80 /*perform the parallel fft of two real signals packed in real,imag*/
c@409 81 kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
c@409 82 /* The real part of the DC element of the frequency spectrum in st->tmpbuf
c@409 83 * contains the sum of the even-numbered elements of the input time sequence
c@409 84 * The imag part is the sum of the odd-numbered elements
c@409 85 *
c@409 86 * The sum of tdc.r and tdc.i is the sum of the input time sequence.
c@409 87 * yielding DC of input time sequence
c@409 88 * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
c@409 89 * yielding Nyquist bin of input time sequence
c@409 90 */
c@409 91
c@409 92 tdc.r = st->tmpbuf[0].r;
c@409 93 tdc.i = st->tmpbuf[0].i;
c@409 94 C_FIXDIV(tdc,2);
c@409 95 CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
c@409 96 CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
c@409 97 freqdata[0].r = tdc.r + tdc.i;
c@409 98 freqdata[ncfft].r = tdc.r - tdc.i;
c@409 99 #ifdef USE_SIMD
c@409 100 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
c@409 101 #else
c@409 102 freqdata[ncfft].i = freqdata[0].i = 0;
c@409 103 #endif
c@409 104
c@409 105 for ( k=1;k <= ncfft/2 ; ++k ) {
c@409 106 fpk = st->tmpbuf[k];
c@409 107 fpnk.r = st->tmpbuf[ncfft-k].r;
c@409 108 fpnk.i = - st->tmpbuf[ncfft-k].i;
c@409 109 C_FIXDIV(fpk,2);
c@409 110 C_FIXDIV(fpnk,2);
c@409 111
c@409 112 C_ADD( f1k, fpk , fpnk );
c@409 113 C_SUB( f2k, fpk , fpnk );
c@409 114 C_MUL( tw , f2k , st->super_twiddles[k-1]);
c@409 115
c@409 116 freqdata[k].r = HALF_OF(f1k.r + tw.r);
c@409 117 freqdata[k].i = HALF_OF(f1k.i + tw.i);
c@409 118 freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
c@409 119 freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
c@409 120 }
c@409 121 }
c@409 122
c@409 123 void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
c@409 124 {
c@409 125 /* input buffer timedata is stored row-wise */
c@409 126 int k, ncfft;
c@409 127
c@409 128 if (st->substate->inverse == 0) {
c@409 129 fprintf (stderr, "kiss fft usage error: improper alloc\n");
c@409 130 exit (1);
c@409 131 }
c@409 132
c@409 133 ncfft = st->substate->nfft;
c@409 134
c@409 135 st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
c@409 136 st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
c@409 137 C_FIXDIV(st->tmpbuf[0],2);
c@409 138
c@409 139 for (k = 1; k <= ncfft / 2; ++k) {
c@409 140 kiss_fft_cpx fk, fnkc, fek, fok, tmp;
c@409 141 fk = freqdata[k];
c@409 142 fnkc.r = freqdata[ncfft - k].r;
c@409 143 fnkc.i = -freqdata[ncfft - k].i;
c@409 144 C_FIXDIV( fk , 2 );
c@409 145 C_FIXDIV( fnkc , 2 );
c@409 146
c@409 147 C_ADD (fek, fk, fnkc);
c@409 148 C_SUB (tmp, fk, fnkc);
c@409 149 C_MUL (fok, tmp, st->super_twiddles[k-1]);
c@409 150 C_ADD (st->tmpbuf[k], fek, fok);
c@409 151 C_SUB (st->tmpbuf[ncfft - k], fek, fok);
c@409 152 #ifdef USE_SIMD
c@409 153 st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
c@409 154 #else
c@409 155 st->tmpbuf[ncfft - k].i *= -1;
c@409 156 #endif
c@409 157 }
c@409 158 kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
c@409 159 }