Mercurial > hg > sv-dependency-builds
comparison src/opus-1.3/celt/kiss_fft.c @ 154:4664ac0c1032
Add Opus sources and macOS builds
author | Chris Cannam <cannam@all-day-breakfast.com> |
---|---|
date | Wed, 23 Jan 2019 13:48:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
153:84bc3a5ec321 | 154:4664ac0c1032 |
---|---|
1 /*Copyright (c) 2003-2004, Mark Borgerding | |
2 Lots of modifications by Jean-Marc Valin | |
3 Copyright (c) 2005-2007, Xiph.Org Foundation | |
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO | |
5 | |
6 All rights reserved. | |
7 | |
8 Redistribution and use in source and binary forms, with or without | |
9 modification, are permitted provided that the following conditions are met: | |
10 | |
11 * Redistributions of source code must retain the above copyright notice, | |
12 this list of conditions and the following disclaimer. | |
13 * Redistributions in binary form must reproduce the above copyright notice, | |
14 this list of conditions and the following disclaimer in the | |
15 documentation and/or other materials provided with the distribution. | |
16 | |
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
27 POSSIBILITY OF SUCH DAMAGE.*/ | |
28 | |
29 /* This code is originally from Mark Borgerding's KISS-FFT but has been | |
30 heavily modified to better suit Opus */ | |
31 | |
32 #ifndef SKIP_CONFIG_H | |
33 # ifdef HAVE_CONFIG_H | |
34 # include "config.h" | |
35 # endif | |
36 #endif | |
37 | |
38 #include "_kiss_fft_guts.h" | |
39 #include "arch.h" | |
40 #include "os_support.h" | |
41 #include "mathops.h" | |
42 #include "stack_alloc.h" | |
43 | |
44 /* The guts header contains all the multiplication and addition macros that are defined for | |
45 complex numbers. It also delares the kf_ internal functions. | |
46 */ | |
47 | |
48 static void kf_bfly2( | |
49 kiss_fft_cpx * Fout, | |
50 int m, | |
51 int N | |
52 ) | |
53 { | |
54 kiss_fft_cpx * Fout2; | |
55 int i; | |
56 (void)m; | |
57 #ifdef CUSTOM_MODES | |
58 if (m==1) | |
59 { | |
60 celt_assert(m==1); | |
61 for (i=0;i<N;i++) | |
62 { | |
63 kiss_fft_cpx t; | |
64 Fout2 = Fout + 1; | |
65 t = *Fout2; | |
66 C_SUB( *Fout2 , *Fout , t ); | |
67 C_ADDTO( *Fout , t ); | |
68 Fout += 2; | |
69 } | |
70 } else | |
71 #endif | |
72 { | |
73 opus_val16 tw; | |
74 tw = QCONST16(0.7071067812f, 15); | |
75 /* We know that m==4 here because the radix-2 is just after a radix-4 */ | |
76 celt_assert(m==4); | |
77 for (i=0;i<N;i++) | |
78 { | |
79 kiss_fft_cpx t; | |
80 Fout2 = Fout + 4; | |
81 t = Fout2[0]; | |
82 C_SUB( Fout2[0] , Fout[0] , t ); | |
83 C_ADDTO( Fout[0] , t ); | |
84 | |
85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); | |
86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); | |
87 C_SUB( Fout2[1] , Fout[1] , t ); | |
88 C_ADDTO( Fout[1] , t ); | |
89 | |
90 t.r = Fout2[2].i; | |
91 t.i = -Fout2[2].r; | |
92 C_SUB( Fout2[2] , Fout[2] , t ); | |
93 C_ADDTO( Fout[2] , t ); | |
94 | |
95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); | |
96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); | |
97 C_SUB( Fout2[3] , Fout[3] , t ); | |
98 C_ADDTO( Fout[3] , t ); | |
99 Fout += 8; | |
100 } | |
101 } | |
102 } | |
103 | |
104 static void kf_bfly4( | |
105 kiss_fft_cpx * Fout, | |
106 const size_t fstride, | |
107 const kiss_fft_state *st, | |
108 int m, | |
109 int N, | |
110 int mm | |
111 ) | |
112 { | |
113 int i; | |
114 | |
115 if (m==1) | |
116 { | |
117 /* Degenerate case where all the twiddles are 1. */ | |
118 for (i=0;i<N;i++) | |
119 { | |
120 kiss_fft_cpx scratch0, scratch1; | |
121 | |
122 C_SUB( scratch0 , *Fout, Fout[2] ); | |
123 C_ADDTO(*Fout, Fout[2]); | |
124 C_ADD( scratch1 , Fout[1] , Fout[3] ); | |
125 C_SUB( Fout[2], *Fout, scratch1 ); | |
126 C_ADDTO( *Fout , scratch1 ); | |
127 C_SUB( scratch1 , Fout[1] , Fout[3] ); | |
128 | |
129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); | |
130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); | |
131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); | |
132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); | |
133 Fout+=4; | |
134 } | |
135 } else { | |
136 int j; | |
137 kiss_fft_cpx scratch[6]; | |
138 const kiss_twiddle_cpx *tw1,*tw2,*tw3; | |
139 const int m2=2*m; | |
140 const int m3=3*m; | |
141 kiss_fft_cpx * Fout_beg = Fout; | |
142 for (i=0;i<N;i++) | |
143 { | |
144 Fout = Fout_beg + i*mm; | |
145 tw3 = tw2 = tw1 = st->twiddles; | |
146 /* m is guaranteed to be a multiple of 4. */ | |
147 for (j=0;j<m;j++) | |
148 { | |
149 C_MUL(scratch[0],Fout[m] , *tw1 ); | |
150 C_MUL(scratch[1],Fout[m2] , *tw2 ); | |
151 C_MUL(scratch[2],Fout[m3] , *tw3 ); | |
152 | |
153 C_SUB( scratch[5] , *Fout, scratch[1] ); | |
154 C_ADDTO(*Fout, scratch[1]); | |
155 C_ADD( scratch[3] , scratch[0] , scratch[2] ); | |
156 C_SUB( scratch[4] , scratch[0] , scratch[2] ); | |
157 C_SUB( Fout[m2], *Fout, scratch[3] ); | |
158 tw1 += fstride; | |
159 tw2 += fstride*2; | |
160 tw3 += fstride*3; | |
161 C_ADDTO( *Fout , scratch[3] ); | |
162 | |
163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); | |
164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); | |
165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); | |
166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); | |
167 ++Fout; | |
168 } | |
169 } | |
170 } | |
171 } | |
172 | |
173 | |
174 #ifndef RADIX_TWO_ONLY | |
175 | |
176 static void kf_bfly3( | |
177 kiss_fft_cpx * Fout, | |
178 const size_t fstride, | |
179 const kiss_fft_state *st, | |
180 int m, | |
181 int N, | |
182 int mm | |
183 ) | |
184 { | |
185 int i; | |
186 size_t k; | |
187 const size_t m2 = 2*m; | |
188 const kiss_twiddle_cpx *tw1,*tw2; | |
189 kiss_fft_cpx scratch[5]; | |
190 kiss_twiddle_cpx epi3; | |
191 | |
192 kiss_fft_cpx * Fout_beg = Fout; | |
193 #ifdef FIXED_POINT | |
194 /*epi3.r = -16384;*/ /* Unused */ | |
195 epi3.i = -28378; | |
196 #else | |
197 epi3 = st->twiddles[fstride*m]; | |
198 #endif | |
199 for (i=0;i<N;i++) | |
200 { | |
201 Fout = Fout_beg + i*mm; | |
202 tw1=tw2=st->twiddles; | |
203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */ | |
204 k=m; | |
205 do { | |
206 | |
207 C_MUL(scratch[1],Fout[m] , *tw1); | |
208 C_MUL(scratch[2],Fout[m2] , *tw2); | |
209 | |
210 C_ADD(scratch[3],scratch[1],scratch[2]); | |
211 C_SUB(scratch[0],scratch[1],scratch[2]); | |
212 tw1 += fstride; | |
213 tw2 += fstride*2; | |
214 | |
215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); | |
216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); | |
217 | |
218 C_MULBYSCALAR( scratch[0] , epi3.i ); | |
219 | |
220 C_ADDTO(*Fout,scratch[3]); | |
221 | |
222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); | |
223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); | |
224 | |
225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); | |
226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); | |
227 | |
228 ++Fout; | |
229 } while(--k); | |
230 } | |
231 } | |
232 | |
233 | |
234 #ifndef OVERRIDE_kf_bfly5 | |
235 static void kf_bfly5( | |
236 kiss_fft_cpx * Fout, | |
237 const size_t fstride, | |
238 const kiss_fft_state *st, | |
239 int m, | |
240 int N, | |
241 int mm | |
242 ) | |
243 { | |
244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | |
245 int i, u; | |
246 kiss_fft_cpx scratch[13]; | |
247 const kiss_twiddle_cpx *tw; | |
248 kiss_twiddle_cpx ya,yb; | |
249 kiss_fft_cpx * Fout_beg = Fout; | |
250 | |
251 #ifdef FIXED_POINT | |
252 ya.r = 10126; | |
253 ya.i = -31164; | |
254 yb.r = -26510; | |
255 yb.i = -19261; | |
256 #else | |
257 ya = st->twiddles[fstride*m]; | |
258 yb = st->twiddles[fstride*2*m]; | |
259 #endif | |
260 tw=st->twiddles; | |
261 | |
262 for (i=0;i<N;i++) | |
263 { | |
264 Fout = Fout_beg + i*mm; | |
265 Fout0=Fout; | |
266 Fout1=Fout0+m; | |
267 Fout2=Fout0+2*m; | |
268 Fout3=Fout0+3*m; | |
269 Fout4=Fout0+4*m; | |
270 | |
271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */ | |
272 for ( u=0; u<m; ++u ) { | |
273 scratch[0] = *Fout0; | |
274 | |
275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | |
276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | |
277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); | |
278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); | |
279 | |
280 C_ADD( scratch[7],scratch[1],scratch[4]); | |
281 C_SUB( scratch[10],scratch[1],scratch[4]); | |
282 C_ADD( scratch[8],scratch[2],scratch[3]); | |
283 C_SUB( scratch[9],scratch[2],scratch[3]); | |
284 | |
285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); | |
286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); | |
287 | |
288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); | |
289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); | |
290 | |
291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); | |
292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); | |
293 | |
294 C_SUB(*Fout1,scratch[5],scratch[6]); | |
295 C_ADD(*Fout4,scratch[5],scratch[6]); | |
296 | |
297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); | |
298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); | |
299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); | |
300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); | |
301 | |
302 C_ADD(*Fout2,scratch[11],scratch[12]); | |
303 C_SUB(*Fout3,scratch[11],scratch[12]); | |
304 | |
305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | |
306 } | |
307 } | |
308 } | |
309 #endif /* OVERRIDE_kf_bfly5 */ | |
310 | |
311 | |
312 #endif | |
313 | |
314 | |
315 #ifdef CUSTOM_MODES | |
316 | |
317 static | |
318 void compute_bitrev_table( | |
319 int Fout, | |
320 opus_int16 *f, | |
321 const size_t fstride, | |
322 int in_stride, | |
323 opus_int16 * factors, | |
324 const kiss_fft_state *st | |
325 ) | |
326 { | |
327 const int p=*factors++; /* the radix */ | |
328 const int m=*factors++; /* stage's fft length/p */ | |
329 | |
330 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ | |
331 if (m==1) | |
332 { | |
333 int j; | |
334 for (j=0;j<p;j++) | |
335 { | |
336 *f = Fout+j; | |
337 f += fstride*in_stride; | |
338 } | |
339 } else { | |
340 int j; | |
341 for (j=0;j<p;j++) | |
342 { | |
343 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st); | |
344 f += fstride*in_stride; | |
345 Fout += m; | |
346 } | |
347 } | |
348 } | |
349 | |
350 /* facbuf is populated by p1,m1,p2,m2, ... | |
351 where | |
352 p[i] * m[i] = m[i-1] | |
353 m0 = n */ | |
354 static | |
355 int kf_factor(int n,opus_int16 * facbuf) | |
356 { | |
357 int p=4; | |
358 int i; | |
359 int stages=0; | |
360 int nbak = n; | |
361 | |
362 /*factor out powers of 4, powers of 2, then any remaining primes */ | |
363 do { | |
364 while (n % p) { | |
365 switch (p) { | |
366 case 4: p = 2; break; | |
367 case 2: p = 3; break; | |
368 default: p += 2; break; | |
369 } | |
370 if (p>32000 || (opus_int32)p*(opus_int32)p > n) | |
371 p = n; /* no more factors, skip to end */ | |
372 } | |
373 n /= p; | |
374 #ifdef RADIX_TWO_ONLY | |
375 if (p!=2 && p != 4) | |
376 #else | |
377 if (p>5) | |
378 #endif | |
379 { | |
380 return 0; | |
381 } | |
382 facbuf[2*stages] = p; | |
383 if (p==2 && stages > 1) | |
384 { | |
385 facbuf[2*stages] = 4; | |
386 facbuf[2] = 2; | |
387 } | |
388 stages++; | |
389 } while (n > 1); | |
390 n = nbak; | |
391 /* Reverse the order to get the radix 4 at the end, so we can use the | |
392 fast degenerate case. It turns out that reversing the order also | |
393 improves the noise behaviour. */ | |
394 for (i=0;i<stages/2;i++) | |
395 { | |
396 int tmp; | |
397 tmp = facbuf[2*i]; | |
398 facbuf[2*i] = facbuf[2*(stages-i-1)]; | |
399 facbuf[2*(stages-i-1)] = tmp; | |
400 } | |
401 for (i=0;i<stages;i++) | |
402 { | |
403 n /= facbuf[2*i]; | |
404 facbuf[2*i+1] = n; | |
405 } | |
406 return 1; | |
407 } | |
408 | |
409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) | |
410 { | |
411 int i; | |
412 #ifdef FIXED_POINT | |
413 for (i=0;i<nfft;++i) { | |
414 opus_val32 phase = -i; | |
415 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); | |
416 } | |
417 #else | |
418 for (i=0;i<nfft;++i) { | |
419 const double pi=3.14159265358979323846264338327; | |
420 double phase = ( -2*pi /nfft ) * i; | |
421 kf_cexp(twiddles+i, phase ); | |
422 } | |
423 #endif | |
424 } | |
425 | |
426 int opus_fft_alloc_arch_c(kiss_fft_state *st) { | |
427 (void)st; | |
428 return 0; | |
429 } | |
430 | |
431 /* | |
432 * | |
433 * Allocates all necessary storage space for the fft and ifft. | |
434 * The return value is a contiguous block of memory. As such, | |
435 * It can be freed with free(). | |
436 * */ | |
437 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, | |
438 const kiss_fft_state *base, int arch) | |
439 { | |
440 kiss_fft_state *st=NULL; | |
441 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/ | |
442 | |
443 if ( lenmem==NULL ) { | |
444 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded ); | |
445 }else{ | |
446 if (mem != NULL && *lenmem >= memneeded) | |
447 st = (kiss_fft_state*)mem; | |
448 *lenmem = memneeded; | |
449 } | |
450 if (st) { | |
451 opus_int16 *bitrev; | |
452 kiss_twiddle_cpx *twiddles; | |
453 | |
454 st->nfft=nfft; | |
455 #ifdef FIXED_POINT | |
456 st->scale_shift = celt_ilog2(st->nfft); | |
457 if (st->nfft == 1<<st->scale_shift) | |
458 st->scale = Q15ONE; | |
459 else | |
460 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); | |
461 #else | |
462 st->scale = 1.f/nfft; | |
463 #endif | |
464 if (base != NULL) | |
465 { | |
466 st->twiddles = base->twiddles; | |
467 st->shift = 0; | |
468 while (st->shift < 32 && nfft<<st->shift != base->nfft) | |
469 st->shift++; | |
470 if (st->shift>=32) | |
471 goto fail; | |
472 } else { | |
473 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); | |
474 compute_twiddles(twiddles, nfft); | |
475 st->shift = -1; | |
476 } | |
477 if (!kf_factor(nfft,st->factors)) | |
478 { | |
479 goto fail; | |
480 } | |
481 | |
482 /* bitrev */ | |
483 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); | |
484 if (st->bitrev==NULL) | |
485 goto fail; | |
486 compute_bitrev_table(0, bitrev, 1,1, st->factors,st); | |
487 | |
488 /* Initialize architecture specific fft parameters */ | |
489 if (opus_fft_alloc_arch(st, arch)) | |
490 goto fail; | |
491 } | |
492 return st; | |
493 fail: | |
494 opus_fft_free(st, arch); | |
495 return NULL; | |
496 } | |
497 | |
498 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) | |
499 { | |
500 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); | |
501 } | |
502 | |
503 void opus_fft_free_arch_c(kiss_fft_state *st) { | |
504 (void)st; | |
505 } | |
506 | |
507 void opus_fft_free(const kiss_fft_state *cfg, int arch) | |
508 { | |
509 if (cfg) | |
510 { | |
511 opus_fft_free_arch((kiss_fft_state *)cfg, arch); | |
512 opus_free((opus_int16*)cfg->bitrev); | |
513 if (cfg->shift < 0) | |
514 opus_free((kiss_twiddle_cpx*)cfg->twiddles); | |
515 opus_free((kiss_fft_state*)cfg); | |
516 } | |
517 } | |
518 | |
519 #endif /* CUSTOM_MODES */ | |
520 | |
521 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) | |
522 { | |
523 int m2, m; | |
524 int p; | |
525 int L; | |
526 int fstride[MAXFACTORS]; | |
527 int i; | |
528 int shift; | |
529 | |
530 /* st->shift can be -1 */ | |
531 shift = st->shift>0 ? st->shift : 0; | |
532 | |
533 fstride[0] = 1; | |
534 L=0; | |
535 do { | |
536 p = st->factors[2*L]; | |
537 m = st->factors[2*L+1]; | |
538 fstride[L+1] = fstride[L]*p; | |
539 L++; | |
540 } while(m!=1); | |
541 m = st->factors[2*L-1]; | |
542 for (i=L-1;i>=0;i--) | |
543 { | |
544 if (i!=0) | |
545 m2 = st->factors[2*i-1]; | |
546 else | |
547 m2 = 1; | |
548 switch (st->factors[2*i]) | |
549 { | |
550 case 2: | |
551 kf_bfly2(fout, m, fstride[i]); | |
552 break; | |
553 case 4: | |
554 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
555 break; | |
556 #ifndef RADIX_TWO_ONLY | |
557 case 3: | |
558 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
559 break; | |
560 case 5: | |
561 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
562 break; | |
563 #endif | |
564 } | |
565 m = m2; | |
566 } | |
567 } | |
568 | |
569 void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) | |
570 { | |
571 int i; | |
572 opus_val16 scale; | |
573 #ifdef FIXED_POINT | |
574 /* Allows us to scale with MULT16_32_Q16(), which is faster than | |
575 MULT16_32_Q15() on ARM. */ | |
576 int scale_shift = st->scale_shift-1; | |
577 #endif | |
578 scale = st->scale; | |
579 | |
580 celt_assert2 (fin != fout, "In-place FFT not supported"); | |
581 /* Bit-reverse the input */ | |
582 for (i=0;i<st->nfft;i++) | |
583 { | |
584 kiss_fft_cpx x = fin[i]; | |
585 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); | |
586 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); | |
587 } | |
588 opus_fft_impl(st, fout); | |
589 } | |
590 | |
591 | |
592 void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) | |
593 { | |
594 int i; | |
595 celt_assert2 (fin != fout, "In-place FFT not supported"); | |
596 /* Bit-reverse the input */ | |
597 for (i=0;i<st->nfft;i++) | |
598 fout[st->bitrev[i]] = fin[i]; | |
599 for (i=0;i<st->nfft;i++) | |
600 fout[i].i = -fout[i].i; | |
601 opus_fft_impl(st, fout); | |
602 for (i=0;i<st->nfft;i++) | |
603 fout[i].i = -fout[i].i; | |
604 } |