To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_6.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (7.58 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:13 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 6 -name t1_6 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 46 FP additions, 32 FP multiplications,
32
 * (or, 24 additions, 10 multiplications, 22 fused multiply/add),
33
 * 31 stack variables, 2 constants, and 24 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
     {
42
          INT m;
43
          for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
44
               E T1, TX, T7, TW, Tl, TR, TB, TJ, Ty, TS, TC, TO;
45
               T1 = ri[0];
46
               TX = ii[0];
47
               {
48
                    E T3, T6, T4, TV, T2, T5;
49
                    T3 = ri[WS(rs, 3)];
50
                    T6 = ii[WS(rs, 3)];
51
                    T2 = W[4];
52
                    T4 = T2 * T3;
53
                    TV = T2 * T6;
54
                    T5 = W[5];
55
                    T7 = FMA(T5, T6, T4);
56
                    TW = FNMS(T5, T3, TV);
57
               }
58
               {
59
                    E Ta, Td, Tb, TF, Tg, Tj, Th, TH, T9, Tf;
60
                    Ta = ri[WS(rs, 2)];
61
                    Td = ii[WS(rs, 2)];
62
                    T9 = W[2];
63
                    Tb = T9 * Ta;
64
                    TF = T9 * Td;
65
                    Tg = ri[WS(rs, 5)];
66
                    Tj = ii[WS(rs, 5)];
67
                    Tf = W[8];
68
                    Th = Tf * Tg;
69
                    TH = Tf * Tj;
70
                    {
71
                         E Te, TG, Tk, TI, Tc, Ti;
72
                         Tc = W[3];
73
                         Te = FMA(Tc, Td, Tb);
74
                         TG = FNMS(Tc, Ta, TF);
75
                         Ti = W[9];
76
                         Tk = FMA(Ti, Tj, Th);
77
                         TI = FNMS(Ti, Tg, TH);
78
                         Tl = Te - Tk;
79
                         TR = TG + TI;
80
                         TB = Te + Tk;
81
                         TJ = TG - TI;
82
                    }
83
               }
84
               {
85
                    E Tn, Tq, To, TK, Tt, Tw, Tu, TM, Tm, Ts;
86
                    Tn = ri[WS(rs, 4)];
87
                    Tq = ii[WS(rs, 4)];
88
                    Tm = W[6];
89
                    To = Tm * Tn;
90
                    TK = Tm * Tq;
91
                    Tt = ri[WS(rs, 1)];
92
                    Tw = ii[WS(rs, 1)];
93
                    Ts = W[0];
94
                    Tu = Ts * Tt;
95
                    TM = Ts * Tw;
96
                    {
97
                         E Tr, TL, Tx, TN, Tp, Tv;
98
                         Tp = W[7];
99
                         Tr = FMA(Tp, Tq, To);
100
                         TL = FNMS(Tp, Tn, TK);
101
                         Tv = W[1];
102
                         Tx = FMA(Tv, Tw, Tu);
103
                         TN = FNMS(Tv, Tt, TM);
104
                         Ty = Tr - Tx;
105
                         TS = TL + TN;
106
                         TC = Tr + Tx;
107
                         TO = TL - TN;
108
                    }
109
               }
110
               {
111
                    E TP, T8, Tz, TE;
112
                    TP = TJ - TO;
113
                    T8 = T1 - T7;
114
                    Tz = Tl + Ty;
115
                    TE = FNMS(KP500000000, Tz, T8);
116
                    ri[WS(rs, 3)] = T8 + Tz;
117
                    ri[WS(rs, 1)] = FMA(KP866025403, TP, TE);
118
                    ri[WS(rs, 5)] = FNMS(KP866025403, TP, TE);
119
               }
120
               {
121
                    E T14, T11, T12, T13;
122
                    T14 = Ty - Tl;
123
                    T11 = TX - TW;
124
                    T12 = TJ + TO;
125
                    T13 = FNMS(KP500000000, T12, T11);
126
                    ii[WS(rs, 1)] = FMA(KP866025403, T14, T13);
127
                    ii[WS(rs, 3)] = T12 + T11;
128
                    ii[WS(rs, 5)] = FNMS(KP866025403, T14, T13);
129
               }
130
               {
131
                    E TT, TA, TD, TQ;
132
                    TT = TR - TS;
133
                    TA = T1 + T7;
134
                    TD = TB + TC;
135
                    TQ = FNMS(KP500000000, TD, TA);
136
                    ri[0] = TA + TD;
137
                    ri[WS(rs, 4)] = FMA(KP866025403, TT, TQ);
138
                    ri[WS(rs, 2)] = FNMS(KP866025403, TT, TQ);
139
               }
140
               {
141
                    E T10, TU, TY, TZ;
142
                    T10 = TC - TB;
143
                    TU = TR + TS;
144
                    TY = TW + TX;
145
                    TZ = FNMS(KP500000000, TU, TY);
146
                    ii[0] = TU + TY;
147
                    ii[WS(rs, 4)] = FMA(KP866025403, T10, TZ);
148
                    ii[WS(rs, 2)] = FNMS(KP866025403, T10, TZ);
149
               }
150
          }
151
     }
152
}
153

    
154
static const tw_instr twinstr[] = {
155
     {TW_FULL, 0, 6},
156
     {TW_NEXT, 1, 0}
157
};
158

    
159
static const ct_desc desc = { 6, "t1_6", twinstr, &GENUS, {24, 10, 22, 0}, 0, 0, 0 };
160

    
161
void X(codelet_t1_6) (planner *p) {
162
     X(kdft_dit_register) (p, t1_6, &desc);
163
}
164
#else
165

    
166
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 6 -name t1_6 -include dft/scalar/t.h */
167

    
168
/*
169
 * This function contains 46 FP additions, 28 FP multiplications,
170
 * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
171
 * 23 stack variables, 2 constants, and 24 memory accesses
172
 */
173
#include "dft/scalar/t.h"
174

    
175
static void t1_6(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
176
{
177
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
178
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
179
     {
180
          INT m;
181
          for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs)) {
182
               E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC;
183
               {
184
                    E T1, TN, T6, TM;
185
                    T1 = ri[0];
186
                    TN = ii[0];
187
                    {
188
                         E T3, T5, T2, T4;
189
                         T3 = ri[WS(rs, 3)];
190
                         T5 = ii[WS(rs, 3)];
191
                         T2 = W[4];
192
                         T4 = W[5];
193
                         T6 = FMA(T2, T3, T4 * T5);
194
                         TM = FNMS(T4, T3, T2 * T5);
195
                    }
196
                    T7 = T1 - T6;
197
                    TS = TN - TM;
198
                    Tv = T1 + T6;
199
                    TO = TM + TN;
200
               }
201
               {
202
                    E Tn, TD, Ts, TE;
203
                    {
204
                         E Tk, Tm, Tj, Tl;
205
                         Tk = ri[WS(rs, 4)];
206
                         Tm = ii[WS(rs, 4)];
207
                         Tj = W[6];
208
                         Tl = W[7];
209
                         Tn = FMA(Tj, Tk, Tl * Tm);
210
                         TD = FNMS(Tl, Tk, Tj * Tm);
211
                    }
212
                    {
213
                         E Tp, Tr, To, Tq;
214
                         Tp = ri[WS(rs, 1)];
215
                         Tr = ii[WS(rs, 1)];
216
                         To = W[0];
217
                         Tq = W[1];
218
                         Ts = FMA(To, Tp, Tq * Tr);
219
                         TE = FNMS(Tq, Tp, To * Tr);
220
                    }
221
                    Tt = Tn - Ts;
222
                    TJ = TD + TE;
223
                    Tx = Tn + Ts;
224
                    TF = TD - TE;
225
               }
226
               {
227
                    E Tc, TA, Th, TB;
228
                    {
229
                         E T9, Tb, T8, Ta;
230
                         T9 = ri[WS(rs, 2)];
231
                         Tb = ii[WS(rs, 2)];
232
                         T8 = W[2];
233
                         Ta = W[3];
234
                         Tc = FMA(T8, T9, Ta * Tb);
235
                         TA = FNMS(Ta, T9, T8 * Tb);
236
                    }
237
                    {
238
                         E Te, Tg, Td, Tf;
239
                         Te = ri[WS(rs, 5)];
240
                         Tg = ii[WS(rs, 5)];
241
                         Td = W[8];
242
                         Tf = W[9];
243
                         Th = FMA(Td, Te, Tf * Tg);
244
                         TB = FNMS(Tf, Te, Td * Tg);
245
                    }
246
                    Ti = Tc - Th;
247
                    TI = TA + TB;
248
                    Tw = Tc + Th;
249
                    TC = TA - TB;
250
               }
251
               {
252
                    E TG, Tu, Tz, TR, TT, TU;
253
                    TG = KP866025403 * (TC - TF);
254
                    Tu = Ti + Tt;
255
                    Tz = FNMS(KP500000000, Tu, T7);
256
                    ri[WS(rs, 3)] = T7 + Tu;
257
                    ri[WS(rs, 1)] = Tz + TG;
258
                    ri[WS(rs, 5)] = Tz - TG;
259
                    TR = KP866025403 * (Tt - Ti);
260
                    TT = TC + TF;
261
                    TU = FNMS(KP500000000, TT, TS);
262
                    ii[WS(rs, 1)] = TR + TU;
263
                    ii[WS(rs, 3)] = TT + TS;
264
                    ii[WS(rs, 5)] = TU - TR;
265
               }
266
               {
267
                    E TK, Ty, TH, TQ, TL, TP;
268
                    TK = KP866025403 * (TI - TJ);
269
                    Ty = Tw + Tx;
270
                    TH = FNMS(KP500000000, Ty, Tv);
271
                    ri[0] = Tv + Ty;
272
                    ri[WS(rs, 4)] = TH + TK;
273
                    ri[WS(rs, 2)] = TH - TK;
274
                    TQ = KP866025403 * (Tx - Tw);
275
                    TL = TI + TJ;
276
                    TP = FNMS(KP500000000, TL, TO);
277
                    ii[0] = TL + TO;
278
                    ii[WS(rs, 4)] = TQ + TP;
279
                    ii[WS(rs, 2)] = TP - TQ;
280
               }
281
          }
282
     }
283
}
284

    
285
static const tw_instr twinstr[] = {
286
     {TW_FULL, 0, 6},
287
     {TW_NEXT, 1, 0}
288
};
289

    
290
static const ct_desc desc = { 6, "t1_6", twinstr, &GENUS, {32, 14, 14, 0}, 0, 0, 0 };
291

    
292
void X(codelet_t1_6) (planner *p) {
293
     X(kdft_dit_register) (p, t1_6, &desc);
294
}
295
#endif