annotate ext/kissfft/tools/kiss_fftnd.c @ 409:1f1999b0f577

Bring in kissfft into this repo (formerly a subrepo, but the remote is not responding)
author Chris Cannam <c.cannam@qmul.ac.uk>
date Tue, 21 Jul 2015 07:34:15 +0100
parents
children
rev   line source
c@409 1
c@409 2
c@409 3 /*
c@409 4 Copyright (c) 2003-2004, Mark Borgerding
c@409 5
c@409 6 All rights reserved.
c@409 7
c@409 8 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
c@409 9
c@409 10 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
c@409 11 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
c@409 12 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
c@409 13
c@409 14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
c@409 15 */
c@409 16
c@409 17 #include "kiss_fftnd.h"
c@409 18 #include "_kiss_fft_guts.h"
c@409 19
c@409 20 struct kiss_fftnd_state{
c@409 21 int dimprod; /* dimsum would be mighty tasty right now */
c@409 22 int ndims;
c@409 23 int *dims;
c@409 24 kiss_fft_cfg *states; /* cfg states for each dimension */
c@409 25 kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire input */
c@409 26 };
c@409 27
c@409 28 kiss_fftnd_cfg kiss_fftnd_alloc(const int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem)
c@409 29 {
c@409 30 kiss_fftnd_cfg st = NULL;
c@409 31 int i;
c@409 32 int dimprod=1;
c@409 33 size_t memneeded = sizeof(struct kiss_fftnd_state);
c@409 34 char * ptr;
c@409 35
c@409 36 for (i=0;i<ndims;++i) {
c@409 37 size_t sublen=0;
c@409 38 kiss_fft_alloc (dims[i], inverse_fft, NULL, &sublen);
c@409 39 memneeded += sublen; /* st->states[i] */
c@409 40 dimprod *= dims[i];
c@409 41 }
c@409 42 memneeded += sizeof(int) * ndims;/* st->dims */
c@409 43 memneeded += sizeof(void*) * ndims;/* st->states */
c@409 44 memneeded += sizeof(kiss_fft_cpx) * dimprod; /* st->tmpbuf */
c@409 45
c@409 46 if (lenmem == NULL) {/* allocate for the caller*/
c@409 47 st = (kiss_fftnd_cfg) malloc (memneeded);
c@409 48 } else { /* initialize supplied buffer if big enough */
c@409 49 if (*lenmem >= memneeded)
c@409 50 st = (kiss_fftnd_cfg) mem;
c@409 51 *lenmem = memneeded; /*tell caller how big struct is (or would be) */
c@409 52 }
c@409 53 if (!st)
c@409 54 return NULL; /*malloc failed or buffer too small */
c@409 55
c@409 56 st->dimprod = dimprod;
c@409 57 st->ndims = ndims;
c@409 58 ptr=(char*)(st+1);
c@409 59
c@409 60 st->states = (kiss_fft_cfg *)ptr;
c@409 61 ptr += sizeof(void*) * ndims;
c@409 62
c@409 63 st->dims = (int*)ptr;
c@409 64 ptr += sizeof(int) * ndims;
c@409 65
c@409 66 st->tmpbuf = (kiss_fft_cpx*)ptr;
c@409 67 ptr += sizeof(kiss_fft_cpx) * dimprod;
c@409 68
c@409 69 for (i=0;i<ndims;++i) {
c@409 70 size_t len;
c@409 71 st->dims[i] = dims[i];
c@409 72 kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len);
c@409 73 st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len);
c@409 74 ptr += len;
c@409 75 }
c@409 76 /*
c@409 77 Hi there!
c@409 78
c@409 79 If you're looking at this particular code, it probably means you've got a brain-dead bounds checker
c@409 80 that thinks the above code overwrites the end of the array.
c@409 81
c@409 82 It doesn't.
c@409 83
c@409 84 -- Mark
c@409 85
c@409 86 P.S.
c@409 87 The below code might give you some warm fuzzies and help convince you.
c@409 88 */
c@409 89 if ( ptr - (char*)st != (int)memneeded ) {
c@409 90 fprintf(stderr,
c@409 91 "################################################################################\n"
c@409 92 "Internal error! Memory allocation miscalculation\n"
c@409 93 "################################################################################\n"
c@409 94 );
c@409 95 }
c@409 96 return st;
c@409 97 }
c@409 98
c@409 99 /*
c@409 100 This works by tackling one dimension at a time.
c@409 101
c@409 102 In effect,
c@409 103 Each stage starts out by reshaping the matrix into a DixSi 2d matrix.
c@409 104 A Di-sized fft is taken of each column, transposing the matrix as it goes.
c@409 105
c@409 106 Here's a 3-d example:
c@409 107 Take a 2x3x4 matrix, laid out in memory as a contiguous buffer
c@409 108 [ [ [ a b c d ] [ e f g h ] [ i j k l ] ]
c@409 109 [ [ m n o p ] [ q r s t ] [ u v w x ] ] ]
c@409 110
c@409 111 Stage 0 ( D=2): treat the buffer as a 2x12 matrix
c@409 112 [ [a b ... k l]
c@409 113 [m n ... w x] ]
c@409 114
c@409 115 FFT each column with size 2.
c@409 116 Transpose the matrix at the same time using kiss_fft_stride.
c@409 117
c@409 118 [ [ a+m a-m ]
c@409 119 [ b+n b-n]
c@409 120 ...
c@409 121 [ k+w k-w ]
c@409 122 [ l+x l-x ] ]
c@409 123
c@409 124 Note fft([x y]) == [x+y x-y]
c@409 125
c@409 126 Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix,
c@409 127 [ [ a+m a-m b+n b-n c+o c-o d+p d-p ]
c@409 128 [ e+q e-q f+r f-r g+s g-s h+t h-t ]
c@409 129 [ i+u i-u j+v j-v k+w k-w l+x l-x ] ]
c@409 130
c@409 131 And perform FFTs (size=3) on each of the columns as above, transposing
c@409 132 the matrix as it goes. The output of stage 1 is
c@409 133 (Legend: ap = [ a+m e+q i+u ]
c@409 134 am = [ a-m e-q i-u ] )
c@409 135
c@409 136 [ [ sum(ap) fft(ap)[0] fft(ap)[1] ]
c@409 137 [ sum(am) fft(am)[0] fft(am)[1] ]
c@409 138 [ sum(bp) fft(bp)[0] fft(bp)[1] ]
c@409 139 [ sum(bm) fft(bm)[0] fft(bm)[1] ]
c@409 140 [ sum(cp) fft(cp)[0] fft(cp)[1] ]
c@409 141 [ sum(cm) fft(cm)[0] fft(cm)[1] ]
c@409 142 [ sum(dp) fft(dp)[0] fft(dp)[1] ]
c@409 143 [ sum(dm) fft(dm)[0] fft(dm)[1] ] ]
c@409 144
c@409 145 Stage 2 ( D=4) treats this buffer as a 4*6 matrix,
c@409 146 [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ]
c@409 147 [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ]
c@409 148 [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ]
c@409 149 [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ]
c@409 150
c@409 151 Then FFTs each column, transposing as it goes.
c@409 152
c@409 153 The resulting matrix is the 3d FFT of the 2x3x4 input matrix.
c@409 154
c@409 155 Note as a sanity check that the first element of the final
c@409 156 stage's output (DC term) is
c@409 157 sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] )
c@409 158 , i.e. the summation of all 24 input elements.
c@409 159
c@409 160 */
c@409 161 void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
c@409 162 {
c@409 163 int i,k;
c@409 164 const kiss_fft_cpx * bufin=fin;
c@409 165 kiss_fft_cpx * bufout;
c@409 166
c@409 167 /*arrange it so the last bufout == fout*/
c@409 168 if ( st->ndims & 1 ) {
c@409 169 bufout = fout;
c@409 170 if (fin==fout) {
c@409 171 memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod );
c@409 172 bufin = st->tmpbuf;
c@409 173 }
c@409 174 }else
c@409 175 bufout = st->tmpbuf;
c@409 176
c@409 177 for ( k=0; k < st->ndims; ++k) {
c@409 178 int curdim = st->dims[k];
c@409 179 int stride = st->dimprod / curdim;
c@409 180
c@409 181 for ( i=0 ; i<stride ; ++i )
c@409 182 kiss_fft_stride( st->states[k], bufin+i , bufout+i*curdim, stride );
c@409 183
c@409 184 /*toggle back and forth between the two buffers*/
c@409 185 if (bufout == st->tmpbuf){
c@409 186 bufout = fout;
c@409 187 bufin = st->tmpbuf;
c@409 188 }else{
c@409 189 bufout = st->tmpbuf;
c@409 190 bufin = fout;
c@409 191 }
c@409 192 }
c@409 193 }