To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_7.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (10.4 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:13 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 7 -name t1_7 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 72 FP additions, 66 FP multiplications,
32
 * (or, 18 additions, 12 multiplications, 54 fused multiply/add),
33
 * 37 stack variables, 6 constants, and 28 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
40
     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
41
     DK(KP801937735, +0.801937735804838252472204639014890102331838324);
42
     DK(KP554958132, +0.554958132087371191422194871006410481067288862);
43
     DK(KP692021471, +0.692021471630095869627814897002069140197260599);
44
     DK(KP356895867, +0.356895867892209443894399510021300583399127187);
45
     {
46
          INT m;
47
          for (m = mb, W = W + (mb * 12); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 12, MAKE_VOLATILE_STRIDE(14, rs)) {
48
               E T1, T1c, Te, T1h, TR, T19, Tr, T1g, TM, T1a, TE, T1i, TW, T1b;
49
               T1 = ri[0];
50
               T1c = ii[0];
51
               {
52
                    E T3, T6, T4, TN, T9, Tc, Ta, TP, T2, T8;
53
                    T3 = ri[WS(rs, 1)];
54
                    T6 = ii[WS(rs, 1)];
55
                    T2 = W[0];
56
                    T4 = T2 * T3;
57
                    TN = T2 * T6;
58
                    T9 = ri[WS(rs, 6)];
59
                    Tc = ii[WS(rs, 6)];
60
                    T8 = W[10];
61
                    Ta = T8 * T9;
62
                    TP = T8 * Tc;
63
                    {
64
                         E T7, TO, Td, TQ, T5, Tb;
65
                         T5 = W[1];
66
                         T7 = FMA(T5, T6, T4);
67
                         TO = FNMS(T5, T3, TN);
68
                         Tb = W[11];
69
                         Td = FMA(Tb, Tc, Ta);
70
                         TQ = FNMS(Tb, T9, TP);
71
                         Te = T7 + Td;
72
                         T1h = Td - T7;
73
                         TR = TO - TQ;
74
                         T19 = TO + TQ;
75
                    }
76
               }
77
               {
78
                    E Tg, Tj, Th, TI, Tm, Tp, Tn, TK, Tf, Tl;
79
                    Tg = ri[WS(rs, 2)];
80
                    Tj = ii[WS(rs, 2)];
81
                    Tf = W[2];
82
                    Th = Tf * Tg;
83
                    TI = Tf * Tj;
84
                    Tm = ri[WS(rs, 5)];
85
                    Tp = ii[WS(rs, 5)];
86
                    Tl = W[8];
87
                    Tn = Tl * Tm;
88
                    TK = Tl * Tp;
89
                    {
90
                         E Tk, TJ, Tq, TL, Ti, To;
91
                         Ti = W[3];
92
                         Tk = FMA(Ti, Tj, Th);
93
                         TJ = FNMS(Ti, Tg, TI);
94
                         To = W[9];
95
                         Tq = FMA(To, Tp, Tn);
96
                         TL = FNMS(To, Tm, TK);
97
                         Tr = Tk + Tq;
98
                         T1g = Tq - Tk;
99
                         TM = TJ - TL;
100
                         T1a = TJ + TL;
101
                    }
102
               }
103
               {
104
                    E Tt, Tw, Tu, TS, Tz, TC, TA, TU, Ts, Ty;
105
                    Tt = ri[WS(rs, 3)];
106
                    Tw = ii[WS(rs, 3)];
107
                    Ts = W[4];
108
                    Tu = Ts * Tt;
109
                    TS = Ts * Tw;
110
                    Tz = ri[WS(rs, 4)];
111
                    TC = ii[WS(rs, 4)];
112
                    Ty = W[6];
113
                    TA = Ty * Tz;
114
                    TU = Ty * TC;
115
                    {
116
                         E Tx, TT, TD, TV, Tv, TB;
117
                         Tv = W[5];
118
                         Tx = FMA(Tv, Tw, Tu);
119
                         TT = FNMS(Tv, Tt, TS);
120
                         TB = W[7];
121
                         TD = FMA(TB, TC, TA);
122
                         TV = FNMS(TB, Tz, TU);
123
                         TE = Tx + TD;
124
                         T1i = TD - Tx;
125
                         TW = TT - TV;
126
                         T1b = TT + TV;
127
                    }
128
               }
129
               ri[0] = T1 + Te + Tr + TE;
130
               ii[0] = T19 + T1a + T1b + T1c;
131
               {
132
                    E TG, TY, TF, TX, TH;
133
                    TF = FNMS(KP356895867, Tr, Te);
134
                    TG = FNMS(KP692021471, TF, TE);
135
                    TX = FMA(KP554958132, TW, TR);
136
                    TY = FMA(KP801937735, TX, TM);
137
                    TH = FNMS(KP900968867, TG, T1);
138
                    ri[WS(rs, 6)] = FNMS(KP974927912, TY, TH);
139
                    ri[WS(rs, 1)] = FMA(KP974927912, TY, TH);
140
               }
141
               {
142
                    E T1e, T1k, T1d, T1j, T1f;
143
                    T1d = FNMS(KP356895867, T1a, T19);
144
                    T1e = FNMS(KP692021471, T1d, T1b);
145
                    T1j = FMA(KP554958132, T1i, T1h);
146
                    T1k = FMA(KP801937735, T1j, T1g);
147
                    T1f = FNMS(KP900968867, T1e, T1c);
148
                    ii[WS(rs, 1)] = FMA(KP974927912, T1k, T1f);
149
                    ii[WS(rs, 6)] = FNMS(KP974927912, T1k, T1f);
150
               }
151
               {
152
                    E T10, T13, TZ, T12, T11;
153
                    TZ = FNMS(KP356895867, Te, TE);
154
                    T10 = FNMS(KP692021471, TZ, Tr);
155
                    T12 = FMA(KP554958132, TM, TW);
156
                    T13 = FNMS(KP801937735, T12, TR);
157
                    T11 = FNMS(KP900968867, T10, T1);
158
                    ri[WS(rs, 5)] = FNMS(KP974927912, T13, T11);
159
                    ri[WS(rs, 2)] = FMA(KP974927912, T13, T11);
160
               }
161
               {
162
                    E T1m, T1p, T1l, T1o, T1n;
163
                    T1l = FNMS(KP356895867, T19, T1b);
164
                    T1m = FNMS(KP692021471, T1l, T1a);
165
                    T1o = FMA(KP554958132, T1g, T1i);
166
                    T1p = FNMS(KP801937735, T1o, T1h);
167
                    T1n = FNMS(KP900968867, T1m, T1c);
168
                    ii[WS(rs, 2)] = FMA(KP974927912, T1p, T1n);
169
                    ii[WS(rs, 5)] = FNMS(KP974927912, T1p, T1n);
170
               }
171
               {
172
                    E T15, T18, T14, T17, T16;
173
                    T14 = FNMS(KP356895867, TE, Tr);
174
                    T15 = FNMS(KP692021471, T14, Te);
175
                    T17 = FNMS(KP554958132, TR, TM);
176
                    T18 = FNMS(KP801937735, T17, TW);
177
                    T16 = FNMS(KP900968867, T15, T1);
178
                    ri[WS(rs, 4)] = FNMS(KP974927912, T18, T16);
179
                    ri[WS(rs, 3)] = FMA(KP974927912, T18, T16);
180
               }
181
               {
182
                    E T1r, T1u, T1q, T1t, T1s;
183
                    T1q = FNMS(KP356895867, T1b, T1a);
184
                    T1r = FNMS(KP692021471, T1q, T19);
185
                    T1t = FNMS(KP554958132, T1h, T1g);
186
                    T1u = FNMS(KP801937735, T1t, T1i);
187
                    T1s = FNMS(KP900968867, T1r, T1c);
188
                    ii[WS(rs, 3)] = FMA(KP974927912, T1u, T1s);
189
                    ii[WS(rs, 4)] = FNMS(KP974927912, T1u, T1s);
190
               }
191
          }
192
     }
193
}
194

    
195
static const tw_instr twinstr[] = {
196
     {TW_FULL, 0, 7},
197
     {TW_NEXT, 1, 0}
198
};
199

    
200
static const ct_desc desc = { 7, "t1_7", twinstr, &GENUS, {18, 12, 54, 0}, 0, 0, 0 };
201

    
202
void X(codelet_t1_7) (planner *p) {
203
     X(kdft_dit_register) (p, t1_7, &desc);
204
}
205
#else
206

    
207
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 7 -name t1_7 -include dft/scalar/t.h */
208

    
209
/*
210
 * This function contains 72 FP additions, 60 FP multiplications,
211
 * (or, 36 additions, 24 multiplications, 36 fused multiply/add),
212
 * 29 stack variables, 6 constants, and 28 memory accesses
213
 */
214
#include "dft/scalar/t.h"
215

    
216
static void t1_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
217
{
218
     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
219
     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
220
     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
221
     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
222
     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
223
     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
224
     {
225
          INT m;
226
          for (m = mb, W = W + (mb * 12); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 12, MAKE_VOLATILE_STRIDE(14, rs)) {
227
               E T1, TR, Tc, TS, TC, TO, Tn, TT, TI, TP, Ty, TU, TF, TQ;
228
               T1 = ri[0];
229
               TR = ii[0];
230
               {
231
                    E T6, TA, Tb, TB;
232
                    {
233
                         E T3, T5, T2, T4;
234
                         T3 = ri[WS(rs, 1)];
235
                         T5 = ii[WS(rs, 1)];
236
                         T2 = W[0];
237
                         T4 = W[1];
238
                         T6 = FMA(T2, T3, T4 * T5);
239
                         TA = FNMS(T4, T3, T2 * T5);
240
                    }
241
                    {
242
                         E T8, Ta, T7, T9;
243
                         T8 = ri[WS(rs, 6)];
244
                         Ta = ii[WS(rs, 6)];
245
                         T7 = W[10];
246
                         T9 = W[11];
247
                         Tb = FMA(T7, T8, T9 * Ta);
248
                         TB = FNMS(T9, T8, T7 * Ta);
249
                    }
250
                    Tc = T6 + Tb;
251
                    TS = Tb - T6;
252
                    TC = TA - TB;
253
                    TO = TA + TB;
254
               }
255
               {
256
                    E Th, TG, Tm, TH;
257
                    {
258
                         E Te, Tg, Td, Tf;
259
                         Te = ri[WS(rs, 2)];
260
                         Tg = ii[WS(rs, 2)];
261
                         Td = W[2];
262
                         Tf = W[3];
263
                         Th = FMA(Td, Te, Tf * Tg);
264
                         TG = FNMS(Tf, Te, Td * Tg);
265
                    }
266
                    {
267
                         E Tj, Tl, Ti, Tk;
268
                         Tj = ri[WS(rs, 5)];
269
                         Tl = ii[WS(rs, 5)];
270
                         Ti = W[8];
271
                         Tk = W[9];
272
                         Tm = FMA(Ti, Tj, Tk * Tl);
273
                         TH = FNMS(Tk, Tj, Ti * Tl);
274
                    }
275
                    Tn = Th + Tm;
276
                    TT = Tm - Th;
277
                    TI = TG - TH;
278
                    TP = TG + TH;
279
               }
280
               {
281
                    E Ts, TD, Tx, TE;
282
                    {
283
                         E Tp, Tr, To, Tq;
284
                         Tp = ri[WS(rs, 3)];
285
                         Tr = ii[WS(rs, 3)];
286
                         To = W[4];
287
                         Tq = W[5];
288
                         Ts = FMA(To, Tp, Tq * Tr);
289
                         TD = FNMS(Tq, Tp, To * Tr);
290
                    }
291
                    {
292
                         E Tu, Tw, Tt, Tv;
293
                         Tu = ri[WS(rs, 4)];
294
                         Tw = ii[WS(rs, 4)];
295
                         Tt = W[6];
296
                         Tv = W[7];
297
                         Tx = FMA(Tt, Tu, Tv * Tw);
298
                         TE = FNMS(Tv, Tu, Tt * Tw);
299
                    }
300
                    Ty = Ts + Tx;
301
                    TU = Tx - Ts;
302
                    TF = TD - TE;
303
                    TQ = TD + TE;
304
               }
305
               ri[0] = T1 + Tc + Tn + Ty;
306
               ii[0] = TO + TP + TQ + TR;
307
               {
308
                    E TJ, Tz, TX, TY;
309
                    TJ = FNMS(KP781831482, TF, KP974927912 * TC) - (KP433883739 * TI);
310
                    Tz = FMA(KP623489801, Ty, T1) + FNMA(KP900968867, Tn, KP222520933 * Tc);
311
                    ri[WS(rs, 5)] = Tz - TJ;
312
                    ri[WS(rs, 2)] = Tz + TJ;
313
                    TX = FNMS(KP781831482, TU, KP974927912 * TS) - (KP433883739 * TT);
314
                    TY = FMA(KP623489801, TQ, TR) + FNMA(KP900968867, TP, KP222520933 * TO);
315
                    ii[WS(rs, 2)] = TX + TY;
316
                    ii[WS(rs, 5)] = TY - TX;
317
               }
318
               {
319
                    E TL, TK, TV, TW;
320
                    TL = FMA(KP781831482, TC, KP974927912 * TI) + (KP433883739 * TF);
321
                    TK = FMA(KP623489801, Tc, T1) + FNMA(KP900968867, Ty, KP222520933 * Tn);
322
                    ri[WS(rs, 6)] = TK - TL;
323
                    ri[WS(rs, 1)] = TK + TL;
324
                    TV = FMA(KP781831482, TS, KP974927912 * TT) + (KP433883739 * TU);
325
                    TW = FMA(KP623489801, TO, TR) + FNMA(KP900968867, TQ, KP222520933 * TP);
326
                    ii[WS(rs, 1)] = TV + TW;
327
                    ii[WS(rs, 6)] = TW - TV;
328
               }
329
               {
330
                    E TN, TM, TZ, T10;
331
                    TN = FMA(KP433883739, TC, KP974927912 * TF) - (KP781831482 * TI);
332
                    TM = FMA(KP623489801, Tn, T1) + FNMA(KP222520933, Ty, KP900968867 * Tc);
333
                    ri[WS(rs, 4)] = TM - TN;
334
                    ri[WS(rs, 3)] = TM + TN;
335
                    TZ = FMA(KP433883739, TS, KP974927912 * TU) - (KP781831482 * TT);
336
                    T10 = FMA(KP623489801, TP, TR) + FNMA(KP222520933, TQ, KP900968867 * TO);
337
                    ii[WS(rs, 3)] = TZ + T10;
338
                    ii[WS(rs, 4)] = T10 - TZ;
339
               }
340
          }
341
     }
342
}
343

    
344
static const tw_instr twinstr[] = {
345
     {TW_FULL, 0, 7},
346
     {TW_NEXT, 1, 0}
347
};
348

    
349
static const ct_desc desc = { 7, "t1_7", twinstr, &GENUS, {36, 24, 36, 0}, 0, 0, 0 };
350

    
351
void X(codelet_t1_7) (planner *p) {
352
     X(kdft_dit_register) (p, t1_7, &desc);
353
}
354
#endif