Chris@69
|
1 /*Copyright (c) 2013, Xiph.Org Foundation and contributors.
|
Chris@69
|
2
|
Chris@69
|
3 All rights reserved.
|
Chris@69
|
4
|
Chris@69
|
5 Redistribution and use in source and binary forms, with or without
|
Chris@69
|
6 modification, are permitted provided that the following conditions are met:
|
Chris@69
|
7
|
Chris@69
|
8 * Redistributions of source code must retain the above copyright notice,
|
Chris@69
|
9 this list of conditions and the following disclaimer.
|
Chris@69
|
10 * Redistributions in binary form must reproduce the above copyright notice,
|
Chris@69
|
11 this list of conditions and the following disclaimer in the
|
Chris@69
|
12 documentation and/or other materials provided with the distribution.
|
Chris@69
|
13
|
Chris@69
|
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
Chris@69
|
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
Chris@69
|
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
Chris@69
|
17 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
Chris@69
|
18 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
Chris@69
|
19 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
Chris@69
|
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
Chris@69
|
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
Chris@69
|
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
Chris@69
|
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
Chris@69
|
24 POSSIBILITY OF SUCH DAMAGE.*/
|
Chris@69
|
25
|
Chris@69
|
26 #ifndef KISS_FFT_MIPSR1_H
|
Chris@69
|
27 #define KISS_FFT_MIPSR1_H
|
Chris@69
|
28
|
Chris@69
|
29 #if !defined(KISS_FFT_GUTS_H)
|
Chris@69
|
30 #error "This file should only be included from _kiss_fft_guts.h"
|
Chris@69
|
31 #endif
|
Chris@69
|
32
|
Chris@69
|
33 #ifdef FIXED_POINT
|
Chris@69
|
34
|
Chris@69
|
35 #define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
|
Chris@69
|
36 #define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
|
Chris@69
|
37
|
Chris@69
|
38 #undef S_MUL_ADD
|
Chris@69
|
39 static inline int S_MUL_ADD(int a, int b, int c, int d) {
|
Chris@69
|
40 int m;
|
Chris@69
|
41 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
|
Chris@69
|
42 asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
|
Chris@69
|
43 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
|
Chris@69
|
44 return m;
|
Chris@69
|
45 }
|
Chris@69
|
46
|
Chris@69
|
47 #undef S_MUL_SUB
|
Chris@69
|
48 static inline int S_MUL_SUB(int a, int b, int c, int d) {
|
Chris@69
|
49 int m;
|
Chris@69
|
50 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
|
Chris@69
|
51 asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
|
Chris@69
|
52 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
|
Chris@69
|
53 return m;
|
Chris@69
|
54 }
|
Chris@69
|
55
|
Chris@69
|
56 #undef C_MUL
|
Chris@69
|
57 # define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
|
Chris@69
|
58 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
|
Chris@69
|
59 kiss_fft_cpx m;
|
Chris@69
|
60
|
Chris@69
|
61 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
|
Chris@69
|
62 asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
|
Chris@69
|
63 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
|
Chris@69
|
64 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
|
Chris@69
|
65 asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
|
Chris@69
|
66 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
|
Chris@69
|
67
|
Chris@69
|
68 return m;
|
Chris@69
|
69 }
|
Chris@69
|
70 #undef C_MULC
|
Chris@69
|
71 # define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
|
Chris@69
|
72 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
|
Chris@69
|
73 kiss_fft_cpx m;
|
Chris@69
|
74
|
Chris@69
|
75 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
|
Chris@69
|
76 asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
|
Chris@69
|
77 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
|
Chris@69
|
78 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
|
Chris@69
|
79 asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
|
Chris@69
|
80 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
|
Chris@69
|
81
|
Chris@69
|
82 return m;
|
Chris@69
|
83 }
|
Chris@69
|
84
|
Chris@69
|
85 #endif /* FIXED_POINT */
|
Chris@69
|
86
|
Chris@69
|
87 #define OVERRIDE_kf_bfly5
|
Chris@69
|
88 static void kf_bfly5(
|
Chris@69
|
89 kiss_fft_cpx * Fout,
|
Chris@69
|
90 const size_t fstride,
|
Chris@69
|
91 const kiss_fft_state *st,
|
Chris@69
|
92 int m,
|
Chris@69
|
93 int N,
|
Chris@69
|
94 int mm
|
Chris@69
|
95 )
|
Chris@69
|
96 {
|
Chris@69
|
97 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
|
Chris@69
|
98 int i, u;
|
Chris@69
|
99 kiss_fft_cpx scratch[13];
|
Chris@69
|
100
|
Chris@69
|
101 const kiss_twiddle_cpx *tw;
|
Chris@69
|
102 kiss_twiddle_cpx ya,yb;
|
Chris@69
|
103 kiss_fft_cpx * Fout_beg = Fout;
|
Chris@69
|
104
|
Chris@69
|
105 #ifdef FIXED_POINT
|
Chris@69
|
106 ya.r = 10126;
|
Chris@69
|
107 ya.i = -31164;
|
Chris@69
|
108 yb.r = -26510;
|
Chris@69
|
109 yb.i = -19261;
|
Chris@69
|
110 #else
|
Chris@69
|
111 ya = st->twiddles[fstride*m];
|
Chris@69
|
112 yb = st->twiddles[fstride*2*m];
|
Chris@69
|
113 #endif
|
Chris@69
|
114
|
Chris@69
|
115 tw=st->twiddles;
|
Chris@69
|
116
|
Chris@69
|
117 for (i=0;i<N;i++)
|
Chris@69
|
118 {
|
Chris@69
|
119 Fout = Fout_beg + i*mm;
|
Chris@69
|
120 Fout0=Fout;
|
Chris@69
|
121 Fout1=Fout0+m;
|
Chris@69
|
122 Fout2=Fout0+2*m;
|
Chris@69
|
123 Fout3=Fout0+3*m;
|
Chris@69
|
124 Fout4=Fout0+4*m;
|
Chris@69
|
125
|
Chris@69
|
126 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
|
Chris@69
|
127 for ( u=0; u<m; ++u ) {
|
Chris@69
|
128 scratch[0] = *Fout0;
|
Chris@69
|
129
|
Chris@69
|
130
|
Chris@69
|
131 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
|
Chris@69
|
132 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
|
Chris@69
|
133 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
|
Chris@69
|
134 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
|
Chris@69
|
135
|
Chris@69
|
136 C_ADD( scratch[7],scratch[1],scratch[4]);
|
Chris@69
|
137 C_SUB( scratch[10],scratch[1],scratch[4]);
|
Chris@69
|
138 C_ADD( scratch[8],scratch[2],scratch[3]);
|
Chris@69
|
139 C_SUB( scratch[9],scratch[2],scratch[3]);
|
Chris@69
|
140
|
Chris@69
|
141 Fout0->r += scratch[7].r + scratch[8].r;
|
Chris@69
|
142 Fout0->i += scratch[7].i + scratch[8].i;
|
Chris@69
|
143 scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
|
Chris@69
|
144 scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
|
Chris@69
|
145
|
Chris@69
|
146 scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
|
Chris@69
|
147 scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
|
Chris@69
|
148
|
Chris@69
|
149 C_SUB(*Fout1,scratch[5],scratch[6]);
|
Chris@69
|
150 C_ADD(*Fout4,scratch[5],scratch[6]);
|
Chris@69
|
151
|
Chris@69
|
152 scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
|
Chris@69
|
153 scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
|
Chris@69
|
154
|
Chris@69
|
155 scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
|
Chris@69
|
156 scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
|
Chris@69
|
157
|
Chris@69
|
158 C_ADD(*Fout2,scratch[11],scratch[12]);
|
Chris@69
|
159 C_SUB(*Fout3,scratch[11],scratch[12]);
|
Chris@69
|
160
|
Chris@69
|
161 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
|
Chris@69
|
162 }
|
Chris@69
|
163 }
|
Chris@69
|
164 }
|
Chris@69
|
165
|
Chris@69
|
166
|
Chris@69
|
167 #endif /* KISS_FFT_MIPSR1_H */
|