cannam@154
|
1 /*Copyright (c) 2003-2004, Mark Borgerding
|
cannam@154
|
2 Lots of modifications by Jean-Marc Valin
|
cannam@154
|
3 Copyright (c) 2005-2007, Xiph.Org Foundation
|
cannam@154
|
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
|
cannam@154
|
5
|
cannam@154
|
6 All rights reserved.
|
cannam@154
|
7
|
cannam@154
|
8 Redistribution and use in source and binary forms, with or without
|
cannam@154
|
9 modification, are permitted provided that the following conditions are met:
|
cannam@154
|
10
|
cannam@154
|
11 * Redistributions of source code must retain the above copyright notice,
|
cannam@154
|
12 this list of conditions and the following disclaimer.
|
cannam@154
|
13 * Redistributions in binary form must reproduce the above copyright notice,
|
cannam@154
|
14 this list of conditions and the following disclaimer in the
|
cannam@154
|
15 documentation and/or other materials provided with the distribution.
|
cannam@154
|
16
|
cannam@154
|
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
cannam@154
|
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
cannam@154
|
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
cannam@154
|
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
cannam@154
|
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
cannam@154
|
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
cannam@154
|
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
cannam@154
|
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
cannam@154
|
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
cannam@154
|
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
cannam@154
|
27 POSSIBILITY OF SUCH DAMAGE.*/
|
cannam@154
|
28
|
cannam@154
|
29 /* This code is originally from Mark Borgerding's KISS-FFT but has been
|
cannam@154
|
30 heavily modified to better suit Opus */
|
cannam@154
|
31
|
cannam@154
|
32 #ifndef SKIP_CONFIG_H
|
cannam@154
|
33 # ifdef HAVE_CONFIG_H
|
cannam@154
|
34 # include "config.h"
|
cannam@154
|
35 # endif
|
cannam@154
|
36 #endif
|
cannam@154
|
37
|
cannam@154
|
38 #include "_kiss_fft_guts.h"
|
cannam@154
|
39 #include "arch.h"
|
cannam@154
|
40 #include "os_support.h"
|
cannam@154
|
41 #include "mathops.h"
|
cannam@154
|
42 #include "stack_alloc.h"
|
cannam@154
|
43
|
cannam@154
|
44 /* The guts header contains all the multiplication and addition macros that are defined for
|
cannam@154
|
45 complex numbers. It also delares the kf_ internal functions.
|
cannam@154
|
46 */
|
cannam@154
|
47
|
cannam@154
|
48 static void kf_bfly2(
|
cannam@154
|
49 kiss_fft_cpx * Fout,
|
cannam@154
|
50 int m,
|
cannam@154
|
51 int N
|
cannam@154
|
52 )
|
cannam@154
|
53 {
|
cannam@154
|
54 kiss_fft_cpx * Fout2;
|
cannam@154
|
55 int i;
|
cannam@154
|
56 (void)m;
|
cannam@154
|
57 #ifdef CUSTOM_MODES
|
cannam@154
|
58 if (m==1)
|
cannam@154
|
59 {
|
cannam@154
|
60 celt_assert(m==1);
|
cannam@154
|
61 for (i=0;i<N;i++)
|
cannam@154
|
62 {
|
cannam@154
|
63 kiss_fft_cpx t;
|
cannam@154
|
64 Fout2 = Fout + 1;
|
cannam@154
|
65 t = *Fout2;
|
cannam@154
|
66 C_SUB( *Fout2 , *Fout , t );
|
cannam@154
|
67 C_ADDTO( *Fout , t );
|
cannam@154
|
68 Fout += 2;
|
cannam@154
|
69 }
|
cannam@154
|
70 } else
|
cannam@154
|
71 #endif
|
cannam@154
|
72 {
|
cannam@154
|
73 opus_val16 tw;
|
cannam@154
|
74 tw = QCONST16(0.7071067812f, 15);
|
cannam@154
|
75 /* We know that m==4 here because the radix-2 is just after a radix-4 */
|
cannam@154
|
76 celt_assert(m==4);
|
cannam@154
|
77 for (i=0;i<N;i++)
|
cannam@154
|
78 {
|
cannam@154
|
79 kiss_fft_cpx t;
|
cannam@154
|
80 Fout2 = Fout + 4;
|
cannam@154
|
81 t = Fout2[0];
|
cannam@154
|
82 C_SUB( Fout2[0] , Fout[0] , t );
|
cannam@154
|
83 C_ADDTO( Fout[0] , t );
|
cannam@154
|
84
|
cannam@154
|
85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
|
cannam@154
|
86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
|
cannam@154
|
87 C_SUB( Fout2[1] , Fout[1] , t );
|
cannam@154
|
88 C_ADDTO( Fout[1] , t );
|
cannam@154
|
89
|
cannam@154
|
90 t.r = Fout2[2].i;
|
cannam@154
|
91 t.i = -Fout2[2].r;
|
cannam@154
|
92 C_SUB( Fout2[2] , Fout[2] , t );
|
cannam@154
|
93 C_ADDTO( Fout[2] , t );
|
cannam@154
|
94
|
cannam@154
|
95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
|
cannam@154
|
96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
|
cannam@154
|
97 C_SUB( Fout2[3] , Fout[3] , t );
|
cannam@154
|
98 C_ADDTO( Fout[3] , t );
|
cannam@154
|
99 Fout += 8;
|
cannam@154
|
100 }
|
cannam@154
|
101 }
|
cannam@154
|
102 }
|
cannam@154
|
103
|
cannam@154
|
104 static void kf_bfly4(
|
cannam@154
|
105 kiss_fft_cpx * Fout,
|
cannam@154
|
106 const size_t fstride,
|
cannam@154
|
107 const kiss_fft_state *st,
|
cannam@154
|
108 int m,
|
cannam@154
|
109 int N,
|
cannam@154
|
110 int mm
|
cannam@154
|
111 )
|
cannam@154
|
112 {
|
cannam@154
|
113 int i;
|
cannam@154
|
114
|
cannam@154
|
115 if (m==1)
|
cannam@154
|
116 {
|
cannam@154
|
117 /* Degenerate case where all the twiddles are 1. */
|
cannam@154
|
118 for (i=0;i<N;i++)
|
cannam@154
|
119 {
|
cannam@154
|
120 kiss_fft_cpx scratch0, scratch1;
|
cannam@154
|
121
|
cannam@154
|
122 C_SUB( scratch0 , *Fout, Fout[2] );
|
cannam@154
|
123 C_ADDTO(*Fout, Fout[2]);
|
cannam@154
|
124 C_ADD( scratch1 , Fout[1] , Fout[3] );
|
cannam@154
|
125 C_SUB( Fout[2], *Fout, scratch1 );
|
cannam@154
|
126 C_ADDTO( *Fout , scratch1 );
|
cannam@154
|
127 C_SUB( scratch1 , Fout[1] , Fout[3] );
|
cannam@154
|
128
|
cannam@154
|
129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
|
cannam@154
|
130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
|
cannam@154
|
131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
|
cannam@154
|
132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
|
cannam@154
|
133 Fout+=4;
|
cannam@154
|
134 }
|
cannam@154
|
135 } else {
|
cannam@154
|
136 int j;
|
cannam@154
|
137 kiss_fft_cpx scratch[6];
|
cannam@154
|
138 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
|
cannam@154
|
139 const int m2=2*m;
|
cannam@154
|
140 const int m3=3*m;
|
cannam@154
|
141 kiss_fft_cpx * Fout_beg = Fout;
|
cannam@154
|
142 for (i=0;i<N;i++)
|
cannam@154
|
143 {
|
cannam@154
|
144 Fout = Fout_beg + i*mm;
|
cannam@154
|
145 tw3 = tw2 = tw1 = st->twiddles;
|
cannam@154
|
146 /* m is guaranteed to be a multiple of 4. */
|
cannam@154
|
147 for (j=0;j<m;j++)
|
cannam@154
|
148 {
|
cannam@154
|
149 C_MUL(scratch[0],Fout[m] , *tw1 );
|
cannam@154
|
150 C_MUL(scratch[1],Fout[m2] , *tw2 );
|
cannam@154
|
151 C_MUL(scratch[2],Fout[m3] , *tw3 );
|
cannam@154
|
152
|
cannam@154
|
153 C_SUB( scratch[5] , *Fout, scratch[1] );
|
cannam@154
|
154 C_ADDTO(*Fout, scratch[1]);
|
cannam@154
|
155 C_ADD( scratch[3] , scratch[0] , scratch[2] );
|
cannam@154
|
156 C_SUB( scratch[4] , scratch[0] , scratch[2] );
|
cannam@154
|
157 C_SUB( Fout[m2], *Fout, scratch[3] );
|
cannam@154
|
158 tw1 += fstride;
|
cannam@154
|
159 tw2 += fstride*2;
|
cannam@154
|
160 tw3 += fstride*3;
|
cannam@154
|
161 C_ADDTO( *Fout , scratch[3] );
|
cannam@154
|
162
|
cannam@154
|
163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
|
cannam@154
|
164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
|
cannam@154
|
165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
|
cannam@154
|
166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
|
cannam@154
|
167 ++Fout;
|
cannam@154
|
168 }
|
cannam@154
|
169 }
|
cannam@154
|
170 }
|
cannam@154
|
171 }
|
cannam@154
|
172
|
cannam@154
|
173
|
cannam@154
|
174 #ifndef RADIX_TWO_ONLY
|
cannam@154
|
175
|
cannam@154
|
176 static void kf_bfly3(
|
cannam@154
|
177 kiss_fft_cpx * Fout,
|
cannam@154
|
178 const size_t fstride,
|
cannam@154
|
179 const kiss_fft_state *st,
|
cannam@154
|
180 int m,
|
cannam@154
|
181 int N,
|
cannam@154
|
182 int mm
|
cannam@154
|
183 )
|
cannam@154
|
184 {
|
cannam@154
|
185 int i;
|
cannam@154
|
186 size_t k;
|
cannam@154
|
187 const size_t m2 = 2*m;
|
cannam@154
|
188 const kiss_twiddle_cpx *tw1,*tw2;
|
cannam@154
|
189 kiss_fft_cpx scratch[5];
|
cannam@154
|
190 kiss_twiddle_cpx epi3;
|
cannam@154
|
191
|
cannam@154
|
192 kiss_fft_cpx * Fout_beg = Fout;
|
cannam@154
|
193 #ifdef FIXED_POINT
|
cannam@154
|
194 /*epi3.r = -16384;*/ /* Unused */
|
cannam@154
|
195 epi3.i = -28378;
|
cannam@154
|
196 #else
|
cannam@154
|
197 epi3 = st->twiddles[fstride*m];
|
cannam@154
|
198 #endif
|
cannam@154
|
199 for (i=0;i<N;i++)
|
cannam@154
|
200 {
|
cannam@154
|
201 Fout = Fout_beg + i*mm;
|
cannam@154
|
202 tw1=tw2=st->twiddles;
|
cannam@154
|
203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
|
cannam@154
|
204 k=m;
|
cannam@154
|
205 do {
|
cannam@154
|
206
|
cannam@154
|
207 C_MUL(scratch[1],Fout[m] , *tw1);
|
cannam@154
|
208 C_MUL(scratch[2],Fout[m2] , *tw2);
|
cannam@154
|
209
|
cannam@154
|
210 C_ADD(scratch[3],scratch[1],scratch[2]);
|
cannam@154
|
211 C_SUB(scratch[0],scratch[1],scratch[2]);
|
cannam@154
|
212 tw1 += fstride;
|
cannam@154
|
213 tw2 += fstride*2;
|
cannam@154
|
214
|
cannam@154
|
215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
|
cannam@154
|
216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
|
cannam@154
|
217
|
cannam@154
|
218 C_MULBYSCALAR( scratch[0] , epi3.i );
|
cannam@154
|
219
|
cannam@154
|
220 C_ADDTO(*Fout,scratch[3]);
|
cannam@154
|
221
|
cannam@154
|
222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
|
cannam@154
|
223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
|
cannam@154
|
224
|
cannam@154
|
225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
|
cannam@154
|
226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
|
cannam@154
|
227
|
cannam@154
|
228 ++Fout;
|
cannam@154
|
229 } while(--k);
|
cannam@154
|
230 }
|
cannam@154
|
231 }
|
cannam@154
|
232
|
cannam@154
|
233
|
cannam@154
|
234 #ifndef OVERRIDE_kf_bfly5
|
cannam@154
|
235 static void kf_bfly5(
|
cannam@154
|
236 kiss_fft_cpx * Fout,
|
cannam@154
|
237 const size_t fstride,
|
cannam@154
|
238 const kiss_fft_state *st,
|
cannam@154
|
239 int m,
|
cannam@154
|
240 int N,
|
cannam@154
|
241 int mm
|
cannam@154
|
242 )
|
cannam@154
|
243 {
|
cannam@154
|
244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
|
cannam@154
|
245 int i, u;
|
cannam@154
|
246 kiss_fft_cpx scratch[13];
|
cannam@154
|
247 const kiss_twiddle_cpx *tw;
|
cannam@154
|
248 kiss_twiddle_cpx ya,yb;
|
cannam@154
|
249 kiss_fft_cpx * Fout_beg = Fout;
|
cannam@154
|
250
|
cannam@154
|
251 #ifdef FIXED_POINT
|
cannam@154
|
252 ya.r = 10126;
|
cannam@154
|
253 ya.i = -31164;
|
cannam@154
|
254 yb.r = -26510;
|
cannam@154
|
255 yb.i = -19261;
|
cannam@154
|
256 #else
|
cannam@154
|
257 ya = st->twiddles[fstride*m];
|
cannam@154
|
258 yb = st->twiddles[fstride*2*m];
|
cannam@154
|
259 #endif
|
cannam@154
|
260 tw=st->twiddles;
|
cannam@154
|
261
|
cannam@154
|
262 for (i=0;i<N;i++)
|
cannam@154
|
263 {
|
cannam@154
|
264 Fout = Fout_beg + i*mm;
|
cannam@154
|
265 Fout0=Fout;
|
cannam@154
|
266 Fout1=Fout0+m;
|
cannam@154
|
267 Fout2=Fout0+2*m;
|
cannam@154
|
268 Fout3=Fout0+3*m;
|
cannam@154
|
269 Fout4=Fout0+4*m;
|
cannam@154
|
270
|
cannam@154
|
271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
|
cannam@154
|
272 for ( u=0; u<m; ++u ) {
|
cannam@154
|
273 scratch[0] = *Fout0;
|
cannam@154
|
274
|
cannam@154
|
275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
|
cannam@154
|
276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
|
cannam@154
|
277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
|
cannam@154
|
278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
|
cannam@154
|
279
|
cannam@154
|
280 C_ADD( scratch[7],scratch[1],scratch[4]);
|
cannam@154
|
281 C_SUB( scratch[10],scratch[1],scratch[4]);
|
cannam@154
|
282 C_ADD( scratch[8],scratch[2],scratch[3]);
|
cannam@154
|
283 C_SUB( scratch[9],scratch[2],scratch[3]);
|
cannam@154
|
284
|
cannam@154
|
285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
|
cannam@154
|
286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
|
cannam@154
|
287
|
cannam@154
|
288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
|
cannam@154
|
289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
|
cannam@154
|
290
|
cannam@154
|
291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
|
cannam@154
|
292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
|
cannam@154
|
293
|
cannam@154
|
294 C_SUB(*Fout1,scratch[5],scratch[6]);
|
cannam@154
|
295 C_ADD(*Fout4,scratch[5],scratch[6]);
|
cannam@154
|
296
|
cannam@154
|
297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
|
cannam@154
|
298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
|
cannam@154
|
299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
|
cannam@154
|
300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
|
cannam@154
|
301
|
cannam@154
|
302 C_ADD(*Fout2,scratch[11],scratch[12]);
|
cannam@154
|
303 C_SUB(*Fout3,scratch[11],scratch[12]);
|
cannam@154
|
304
|
cannam@154
|
305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
|
cannam@154
|
306 }
|
cannam@154
|
307 }
|
cannam@154
|
308 }
|
cannam@154
|
309 #endif /* OVERRIDE_kf_bfly5 */
|
cannam@154
|
310
|
cannam@154
|
311
|
cannam@154
|
312 #endif
|
cannam@154
|
313
|
cannam@154
|
314
|
cannam@154
|
315 #ifdef CUSTOM_MODES
|
cannam@154
|
316
|
cannam@154
|
317 static
|
cannam@154
|
318 void compute_bitrev_table(
|
cannam@154
|
319 int Fout,
|
cannam@154
|
320 opus_int16 *f,
|
cannam@154
|
321 const size_t fstride,
|
cannam@154
|
322 int in_stride,
|
cannam@154
|
323 opus_int16 * factors,
|
cannam@154
|
324 const kiss_fft_state *st
|
cannam@154
|
325 )
|
cannam@154
|
326 {
|
cannam@154
|
327 const int p=*factors++; /* the radix */
|
cannam@154
|
328 const int m=*factors++; /* stage's fft length/p */
|
cannam@154
|
329
|
cannam@154
|
330 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
|
cannam@154
|
331 if (m==1)
|
cannam@154
|
332 {
|
cannam@154
|
333 int j;
|
cannam@154
|
334 for (j=0;j<p;j++)
|
cannam@154
|
335 {
|
cannam@154
|
336 *f = Fout+j;
|
cannam@154
|
337 f += fstride*in_stride;
|
cannam@154
|
338 }
|
cannam@154
|
339 } else {
|
cannam@154
|
340 int j;
|
cannam@154
|
341 for (j=0;j<p;j++)
|
cannam@154
|
342 {
|
cannam@154
|
343 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
|
cannam@154
|
344 f += fstride*in_stride;
|
cannam@154
|
345 Fout += m;
|
cannam@154
|
346 }
|
cannam@154
|
347 }
|
cannam@154
|
348 }
|
cannam@154
|
349
|
cannam@154
|
350 /* facbuf is populated by p1,m1,p2,m2, ...
|
cannam@154
|
351 where
|
cannam@154
|
352 p[i] * m[i] = m[i-1]
|
cannam@154
|
353 m0 = n */
|
cannam@154
|
354 static
|
cannam@154
|
355 int kf_factor(int n,opus_int16 * facbuf)
|
cannam@154
|
356 {
|
cannam@154
|
357 int p=4;
|
cannam@154
|
358 int i;
|
cannam@154
|
359 int stages=0;
|
cannam@154
|
360 int nbak = n;
|
cannam@154
|
361
|
cannam@154
|
362 /*factor out powers of 4, powers of 2, then any remaining primes */
|
cannam@154
|
363 do {
|
cannam@154
|
364 while (n % p) {
|
cannam@154
|
365 switch (p) {
|
cannam@154
|
366 case 4: p = 2; break;
|
cannam@154
|
367 case 2: p = 3; break;
|
cannam@154
|
368 default: p += 2; break;
|
cannam@154
|
369 }
|
cannam@154
|
370 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
|
cannam@154
|
371 p = n; /* no more factors, skip to end */
|
cannam@154
|
372 }
|
cannam@154
|
373 n /= p;
|
cannam@154
|
374 #ifdef RADIX_TWO_ONLY
|
cannam@154
|
375 if (p!=2 && p != 4)
|
cannam@154
|
376 #else
|
cannam@154
|
377 if (p>5)
|
cannam@154
|
378 #endif
|
cannam@154
|
379 {
|
cannam@154
|
380 return 0;
|
cannam@154
|
381 }
|
cannam@154
|
382 facbuf[2*stages] = p;
|
cannam@154
|
383 if (p==2 && stages > 1)
|
cannam@154
|
384 {
|
cannam@154
|
385 facbuf[2*stages] = 4;
|
cannam@154
|
386 facbuf[2] = 2;
|
cannam@154
|
387 }
|
cannam@154
|
388 stages++;
|
cannam@154
|
389 } while (n > 1);
|
cannam@154
|
390 n = nbak;
|
cannam@154
|
391 /* Reverse the order to get the radix 4 at the end, so we can use the
|
cannam@154
|
392 fast degenerate case. It turns out that reversing the order also
|
cannam@154
|
393 improves the noise behaviour. */
|
cannam@154
|
394 for (i=0;i<stages/2;i++)
|
cannam@154
|
395 {
|
cannam@154
|
396 int tmp;
|
cannam@154
|
397 tmp = facbuf[2*i];
|
cannam@154
|
398 facbuf[2*i] = facbuf[2*(stages-i-1)];
|
cannam@154
|
399 facbuf[2*(stages-i-1)] = tmp;
|
cannam@154
|
400 }
|
cannam@154
|
401 for (i=0;i<stages;i++)
|
cannam@154
|
402 {
|
cannam@154
|
403 n /= facbuf[2*i];
|
cannam@154
|
404 facbuf[2*i+1] = n;
|
cannam@154
|
405 }
|
cannam@154
|
406 return 1;
|
cannam@154
|
407 }
|
cannam@154
|
408
|
cannam@154
|
409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
|
cannam@154
|
410 {
|
cannam@154
|
411 int i;
|
cannam@154
|
412 #ifdef FIXED_POINT
|
cannam@154
|
413 for (i=0;i<nfft;++i) {
|
cannam@154
|
414 opus_val32 phase = -i;
|
cannam@154
|
415 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
|
cannam@154
|
416 }
|
cannam@154
|
417 #else
|
cannam@154
|
418 for (i=0;i<nfft;++i) {
|
cannam@154
|
419 const double pi=3.14159265358979323846264338327;
|
cannam@154
|
420 double phase = ( -2*pi /nfft ) * i;
|
cannam@154
|
421 kf_cexp(twiddles+i, phase );
|
cannam@154
|
422 }
|
cannam@154
|
423 #endif
|
cannam@154
|
424 }
|
cannam@154
|
425
|
cannam@154
|
426 int opus_fft_alloc_arch_c(kiss_fft_state *st) {
|
cannam@154
|
427 (void)st;
|
cannam@154
|
428 return 0;
|
cannam@154
|
429 }
|
cannam@154
|
430
|
cannam@154
|
431 /*
|
cannam@154
|
432 *
|
cannam@154
|
433 * Allocates all necessary storage space for the fft and ifft.
|
cannam@154
|
434 * The return value is a contiguous block of memory. As such,
|
cannam@154
|
435 * It can be freed with free().
|
cannam@154
|
436 * */
|
cannam@154
|
437 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
|
cannam@154
|
438 const kiss_fft_state *base, int arch)
|
cannam@154
|
439 {
|
cannam@154
|
440 kiss_fft_state *st=NULL;
|
cannam@154
|
441 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
|
cannam@154
|
442
|
cannam@154
|
443 if ( lenmem==NULL ) {
|
cannam@154
|
444 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
|
cannam@154
|
445 }else{
|
cannam@154
|
446 if (mem != NULL && *lenmem >= memneeded)
|
cannam@154
|
447 st = (kiss_fft_state*)mem;
|
cannam@154
|
448 *lenmem = memneeded;
|
cannam@154
|
449 }
|
cannam@154
|
450 if (st) {
|
cannam@154
|
451 opus_int16 *bitrev;
|
cannam@154
|
452 kiss_twiddle_cpx *twiddles;
|
cannam@154
|
453
|
cannam@154
|
454 st->nfft=nfft;
|
cannam@154
|
455 #ifdef FIXED_POINT
|
cannam@154
|
456 st->scale_shift = celt_ilog2(st->nfft);
|
cannam@154
|
457 if (st->nfft == 1<<st->scale_shift)
|
cannam@154
|
458 st->scale = Q15ONE;
|
cannam@154
|
459 else
|
cannam@154
|
460 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
|
cannam@154
|
461 #else
|
cannam@154
|
462 st->scale = 1.f/nfft;
|
cannam@154
|
463 #endif
|
cannam@154
|
464 if (base != NULL)
|
cannam@154
|
465 {
|
cannam@154
|
466 st->twiddles = base->twiddles;
|
cannam@154
|
467 st->shift = 0;
|
cannam@154
|
468 while (st->shift < 32 && nfft<<st->shift != base->nfft)
|
cannam@154
|
469 st->shift++;
|
cannam@154
|
470 if (st->shift>=32)
|
cannam@154
|
471 goto fail;
|
cannam@154
|
472 } else {
|
cannam@154
|
473 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
|
cannam@154
|
474 compute_twiddles(twiddles, nfft);
|
cannam@154
|
475 st->shift = -1;
|
cannam@154
|
476 }
|
cannam@154
|
477 if (!kf_factor(nfft,st->factors))
|
cannam@154
|
478 {
|
cannam@154
|
479 goto fail;
|
cannam@154
|
480 }
|
cannam@154
|
481
|
cannam@154
|
482 /* bitrev */
|
cannam@154
|
483 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
|
cannam@154
|
484 if (st->bitrev==NULL)
|
cannam@154
|
485 goto fail;
|
cannam@154
|
486 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
|
cannam@154
|
487
|
cannam@154
|
488 /* Initialize architecture specific fft parameters */
|
cannam@154
|
489 if (opus_fft_alloc_arch(st, arch))
|
cannam@154
|
490 goto fail;
|
cannam@154
|
491 }
|
cannam@154
|
492 return st;
|
cannam@154
|
493 fail:
|
cannam@154
|
494 opus_fft_free(st, arch);
|
cannam@154
|
495 return NULL;
|
cannam@154
|
496 }
|
cannam@154
|
497
|
cannam@154
|
498 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
|
cannam@154
|
499 {
|
cannam@154
|
500 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
|
cannam@154
|
501 }
|
cannam@154
|
502
|
cannam@154
|
503 void opus_fft_free_arch_c(kiss_fft_state *st) {
|
cannam@154
|
504 (void)st;
|
cannam@154
|
505 }
|
cannam@154
|
506
|
cannam@154
|
507 void opus_fft_free(const kiss_fft_state *cfg, int arch)
|
cannam@154
|
508 {
|
cannam@154
|
509 if (cfg)
|
cannam@154
|
510 {
|
cannam@154
|
511 opus_fft_free_arch((kiss_fft_state *)cfg, arch);
|
cannam@154
|
512 opus_free((opus_int16*)cfg->bitrev);
|
cannam@154
|
513 if (cfg->shift < 0)
|
cannam@154
|
514 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
|
cannam@154
|
515 opus_free((kiss_fft_state*)cfg);
|
cannam@154
|
516 }
|
cannam@154
|
517 }
|
cannam@154
|
518
|
cannam@154
|
519 #endif /* CUSTOM_MODES */
|
cannam@154
|
520
|
cannam@154
|
521 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
|
cannam@154
|
522 {
|
cannam@154
|
523 int m2, m;
|
cannam@154
|
524 int p;
|
cannam@154
|
525 int L;
|
cannam@154
|
526 int fstride[MAXFACTORS];
|
cannam@154
|
527 int i;
|
cannam@154
|
528 int shift;
|
cannam@154
|
529
|
cannam@154
|
530 /* st->shift can be -1 */
|
cannam@154
|
531 shift = st->shift>0 ? st->shift : 0;
|
cannam@154
|
532
|
cannam@154
|
533 fstride[0] = 1;
|
cannam@154
|
534 L=0;
|
cannam@154
|
535 do {
|
cannam@154
|
536 p = st->factors[2*L];
|
cannam@154
|
537 m = st->factors[2*L+1];
|
cannam@154
|
538 fstride[L+1] = fstride[L]*p;
|
cannam@154
|
539 L++;
|
cannam@154
|
540 } while(m!=1);
|
cannam@154
|
541 m = st->factors[2*L-1];
|
cannam@154
|
542 for (i=L-1;i>=0;i--)
|
cannam@154
|
543 {
|
cannam@154
|
544 if (i!=0)
|
cannam@154
|
545 m2 = st->factors[2*i-1];
|
cannam@154
|
546 else
|
cannam@154
|
547 m2 = 1;
|
cannam@154
|
548 switch (st->factors[2*i])
|
cannam@154
|
549 {
|
cannam@154
|
550 case 2:
|
cannam@154
|
551 kf_bfly2(fout, m, fstride[i]);
|
cannam@154
|
552 break;
|
cannam@154
|
553 case 4:
|
cannam@154
|
554 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
cannam@154
|
555 break;
|
cannam@154
|
556 #ifndef RADIX_TWO_ONLY
|
cannam@154
|
557 case 3:
|
cannam@154
|
558 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
cannam@154
|
559 break;
|
cannam@154
|
560 case 5:
|
cannam@154
|
561 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
cannam@154
|
562 break;
|
cannam@154
|
563 #endif
|
cannam@154
|
564 }
|
cannam@154
|
565 m = m2;
|
cannam@154
|
566 }
|
cannam@154
|
567 }
|
cannam@154
|
568
|
cannam@154
|
569 void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
cannam@154
|
570 {
|
cannam@154
|
571 int i;
|
cannam@154
|
572 opus_val16 scale;
|
cannam@154
|
573 #ifdef FIXED_POINT
|
cannam@154
|
574 /* Allows us to scale with MULT16_32_Q16(), which is faster than
|
cannam@154
|
575 MULT16_32_Q15() on ARM. */
|
cannam@154
|
576 int scale_shift = st->scale_shift-1;
|
cannam@154
|
577 #endif
|
cannam@154
|
578 scale = st->scale;
|
cannam@154
|
579
|
cannam@154
|
580 celt_assert2 (fin != fout, "In-place FFT not supported");
|
cannam@154
|
581 /* Bit-reverse the input */
|
cannam@154
|
582 for (i=0;i<st->nfft;i++)
|
cannam@154
|
583 {
|
cannam@154
|
584 kiss_fft_cpx x = fin[i];
|
cannam@154
|
585 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
|
cannam@154
|
586 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
|
cannam@154
|
587 }
|
cannam@154
|
588 opus_fft_impl(st, fout);
|
cannam@154
|
589 }
|
cannam@154
|
590
|
cannam@154
|
591
|
cannam@154
|
592 void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
cannam@154
|
593 {
|
cannam@154
|
594 int i;
|
cannam@154
|
595 celt_assert2 (fin != fout, "In-place FFT not supported");
|
cannam@154
|
596 /* Bit-reverse the input */
|
cannam@154
|
597 for (i=0;i<st->nfft;i++)
|
cannam@154
|
598 fout[st->bitrev[i]] = fin[i];
|
cannam@154
|
599 for (i=0;i<st->nfft;i++)
|
cannam@154
|
600 fout[i].i = -fout[i].i;
|
cannam@154
|
601 opus_fft_impl(st, fout);
|
cannam@154
|
602 for (i=0;i<st->nfft;i++)
|
cannam@154
|
603 fout[i].i = -fout[i].i;
|
cannam@154
|
604 }
|