To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_8.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (9.12 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:13 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 8 -name t1_8 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 66 FP additions, 36 FP multiplications,
32
 * (or, 44 additions, 14 multiplications, 22 fused multiply/add),
33
 * 34 stack variables, 1 constants, and 32 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
40
     {
41
          INT m;
42
          for (m = mb, W = W + (mb * 14); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs)) {
43
               E T1, T1m, T7, T1l, Tk, TS, Te, TQ, TF, T14, TL, T16, T12, T17, Ts;
44
               E TX, Ty, TZ, TV, T10;
45
               T1 = ri[0];
46
               T1m = ii[0];
47
               {
48
                    E T3, T6, T4, T1k, T2, T5;
49
                    T3 = ri[WS(rs, 4)];
50
                    T6 = ii[WS(rs, 4)];
51
                    T2 = W[6];
52
                    T4 = T2 * T3;
53
                    T1k = T2 * T6;
54
                    T5 = W[7];
55
                    T7 = FMA(T5, T6, T4);
56
                    T1l = FNMS(T5, T3, T1k);
57
               }
58
               {
59
                    E Tg, Tj, Th, TR, Tf, Ti;
60
                    Tg = ri[WS(rs, 6)];
61
                    Tj = ii[WS(rs, 6)];
62
                    Tf = W[10];
63
                    Th = Tf * Tg;
64
                    TR = Tf * Tj;
65
                    Ti = W[11];
66
                    Tk = FMA(Ti, Tj, Th);
67
                    TS = FNMS(Ti, Tg, TR);
68
               }
69
               {
70
                    E Ta, Td, Tb, TP, T9, Tc;
71
                    Ta = ri[WS(rs, 2)];
72
                    Td = ii[WS(rs, 2)];
73
                    T9 = W[2];
74
                    Tb = T9 * Ta;
75
                    TP = T9 * Td;
76
                    Tc = W[3];
77
                    Te = FMA(Tc, Td, Tb);
78
                    TQ = FNMS(Tc, Ta, TP);
79
               }
80
               {
81
                    E TB, TE, TC, T13, TH, TK, TI, T15, TA, TG, TD, TJ;
82
                    TB = ri[WS(rs, 7)];
83
                    TE = ii[WS(rs, 7)];
84
                    TA = W[12];
85
                    TC = TA * TB;
86
                    T13 = TA * TE;
87
                    TH = ri[WS(rs, 3)];
88
                    TK = ii[WS(rs, 3)];
89
                    TG = W[4];
90
                    TI = TG * TH;
91
                    T15 = TG * TK;
92
                    TD = W[13];
93
                    TF = FMA(TD, TE, TC);
94
                    T14 = FNMS(TD, TB, T13);
95
                    TJ = W[5];
96
                    TL = FMA(TJ, TK, TI);
97
                    T16 = FNMS(TJ, TH, T15);
98
                    T12 = TF - TL;
99
                    T17 = T14 - T16;
100
               }
101
               {
102
                    E To, Tr, Tp, TW, Tu, Tx, Tv, TY, Tn, Tt, Tq, Tw;
103
                    To = ri[WS(rs, 1)];
104
                    Tr = ii[WS(rs, 1)];
105
                    Tn = W[0];
106
                    Tp = Tn * To;
107
                    TW = Tn * Tr;
108
                    Tu = ri[WS(rs, 5)];
109
                    Tx = ii[WS(rs, 5)];
110
                    Tt = W[8];
111
                    Tv = Tt * Tu;
112
                    TY = Tt * Tx;
113
                    Tq = W[1];
114
                    Ts = FMA(Tq, Tr, Tp);
115
                    TX = FNMS(Tq, To, TW);
116
                    Tw = W[9];
117
                    Ty = FMA(Tw, Tx, Tv);
118
                    TZ = FNMS(Tw, Tu, TY);
119
                    TV = Ts - Ty;
120
                    T10 = TX - TZ;
121
               }
122
               {
123
                    E TU, T1a, T1t, T1v, T19, T1w, T1d, T1u;
124
                    {
125
                         E TO, TT, T1r, T1s;
126
                         TO = T1 - T7;
127
                         TT = TQ - TS;
128
                         TU = TO + TT;
129
                         T1a = TO - TT;
130
                         T1r = T1m - T1l;
131
                         T1s = Te - Tk;
132
                         T1t = T1r - T1s;
133
                         T1v = T1s + T1r;
134
                    }
135
                    {
136
                         E T11, T18, T1b, T1c;
137
                         T11 = TV + T10;
138
                         T18 = T12 - T17;
139
                         T19 = T11 + T18;
140
                         T1w = T18 - T11;
141
                         T1b = T10 - TV;
142
                         T1c = T12 + T17;
143
                         T1d = T1b - T1c;
144
                         T1u = T1b + T1c;
145
                    }
146
                    ri[WS(rs, 5)] = FNMS(KP707106781, T19, TU);
147
                    ii[WS(rs, 5)] = FNMS(KP707106781, T1u, T1t);
148
                    ri[WS(rs, 1)] = FMA(KP707106781, T19, TU);
149
                    ii[WS(rs, 1)] = FMA(KP707106781, T1u, T1t);
150
                    ri[WS(rs, 7)] = FNMS(KP707106781, T1d, T1a);
151
                    ii[WS(rs, 7)] = FNMS(KP707106781, T1w, T1v);
152
                    ri[WS(rs, 3)] = FMA(KP707106781, T1d, T1a);
153
                    ii[WS(rs, 3)] = FMA(KP707106781, T1w, T1v);
154
               }
155
               {
156
                    E Tm, T1e, T1o, T1q, TN, T1p, T1h, T1i;
157
                    {
158
                         E T8, Tl, T1j, T1n;
159
                         T8 = T1 + T7;
160
                         Tl = Te + Tk;
161
                         Tm = T8 + Tl;
162
                         T1e = T8 - Tl;
163
                         T1j = TQ + TS;
164
                         T1n = T1l + T1m;
165
                         T1o = T1j + T1n;
166
                         T1q = T1n - T1j;
167
                    }
168
                    {
169
                         E Tz, TM, T1f, T1g;
170
                         Tz = Ts + Ty;
171
                         TM = TF + TL;
172
                         TN = Tz + TM;
173
                         T1p = TM - Tz;
174
                         T1f = TX + TZ;
175
                         T1g = T14 + T16;
176
                         T1h = T1f - T1g;
177
                         T1i = T1f + T1g;
178
                    }
179
                    ri[WS(rs, 4)] = Tm - TN;
180
                    ii[WS(rs, 4)] = T1o - T1i;
181
                    ri[0] = Tm + TN;
182
                    ii[0] = T1i + T1o;
183
                    ri[WS(rs, 6)] = T1e - T1h;
184
                    ii[WS(rs, 6)] = T1q - T1p;
185
                    ri[WS(rs, 2)] = T1e + T1h;
186
                    ii[WS(rs, 2)] = T1p + T1q;
187
               }
188
          }
189
     }
190
}
191

    
192
static const tw_instr twinstr[] = {
193
     {TW_FULL, 0, 8},
194
     {TW_NEXT, 1, 0}
195
};
196

    
197
static const ct_desc desc = { 8, "t1_8", twinstr, &GENUS, {44, 14, 22, 0}, 0, 0, 0 };
198

    
199
void X(codelet_t1_8) (planner *p) {
200
     X(kdft_dit_register) (p, t1_8, &desc);
201
}
202
#else
203

    
204
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 8 -name t1_8 -include dft/scalar/t.h */
205

    
206
/*
207
 * This function contains 66 FP additions, 32 FP multiplications,
208
 * (or, 52 additions, 18 multiplications, 14 fused multiply/add),
209
 * 28 stack variables, 1 constants, and 32 memory accesses
210
 */
211
#include "dft/scalar/t.h"
212

    
213
static void t1_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
214
{
215
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
216
     {
217
          INT m;
218
          for (m = mb, W = W + (mb * 14); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs)) {
219
               E T7, T1e, TH, T19, TF, T13, TR, TU, Ti, T1f, TK, T16, Tu, T12, TM;
220
               E TP;
221
               {
222
                    E T1, T18, T6, T17;
223
                    T1 = ri[0];
224
                    T18 = ii[0];
225
                    {
226
                         E T3, T5, T2, T4;
227
                         T3 = ri[WS(rs, 4)];
228
                         T5 = ii[WS(rs, 4)];
229
                         T2 = W[6];
230
                         T4 = W[7];
231
                         T6 = FMA(T2, T3, T4 * T5);
232
                         T17 = FNMS(T4, T3, T2 * T5);
233
                    }
234
                    T7 = T1 + T6;
235
                    T1e = T18 - T17;
236
                    TH = T1 - T6;
237
                    T19 = T17 + T18;
238
               }
239
               {
240
                    E Tz, TS, TE, TT;
241
                    {
242
                         E Tw, Ty, Tv, Tx;
243
                         Tw = ri[WS(rs, 7)];
244
                         Ty = ii[WS(rs, 7)];
245
                         Tv = W[12];
246
                         Tx = W[13];
247
                         Tz = FMA(Tv, Tw, Tx * Ty);
248
                         TS = FNMS(Tx, Tw, Tv * Ty);
249
                    }
250
                    {
251
                         E TB, TD, TA, TC;
252
                         TB = ri[WS(rs, 3)];
253
                         TD = ii[WS(rs, 3)];
254
                         TA = W[4];
255
                         TC = W[5];
256
                         TE = FMA(TA, TB, TC * TD);
257
                         TT = FNMS(TC, TB, TA * TD);
258
                    }
259
                    TF = Tz + TE;
260
                    T13 = TS + TT;
261
                    TR = Tz - TE;
262
                    TU = TS - TT;
263
               }
264
               {
265
                    E Tc, TI, Th, TJ;
266
                    {
267
                         E T9, Tb, T8, Ta;
268
                         T9 = ri[WS(rs, 2)];
269
                         Tb = ii[WS(rs, 2)];
270
                         T8 = W[2];
271
                         Ta = W[3];
272
                         Tc = FMA(T8, T9, Ta * Tb);
273
                         TI = FNMS(Ta, T9, T8 * Tb);
274
                    }
275
                    {
276
                         E Te, Tg, Td, Tf;
277
                         Te = ri[WS(rs, 6)];
278
                         Tg = ii[WS(rs, 6)];
279
                         Td = W[10];
280
                         Tf = W[11];
281
                         Th = FMA(Td, Te, Tf * Tg);
282
                         TJ = FNMS(Tf, Te, Td * Tg);
283
                    }
284
                    Ti = Tc + Th;
285
                    T1f = Tc - Th;
286
                    TK = TI - TJ;
287
                    T16 = TI + TJ;
288
               }
289
               {
290
                    E To, TN, Tt, TO;
291
                    {
292
                         E Tl, Tn, Tk, Tm;
293
                         Tl = ri[WS(rs, 1)];
294
                         Tn = ii[WS(rs, 1)];
295
                         Tk = W[0];
296
                         Tm = W[1];
297
                         To = FMA(Tk, Tl, Tm * Tn);
298
                         TN = FNMS(Tm, Tl, Tk * Tn);
299
                    }
300
                    {
301
                         E Tq, Ts, Tp, Tr;
302
                         Tq = ri[WS(rs, 5)];
303
                         Ts = ii[WS(rs, 5)];
304
                         Tp = W[8];
305
                         Tr = W[9];
306
                         Tt = FMA(Tp, Tq, Tr * Ts);
307
                         TO = FNMS(Tr, Tq, Tp * Ts);
308
                    }
309
                    Tu = To + Tt;
310
                    T12 = TN + TO;
311
                    TM = To - Tt;
312
                    TP = TN - TO;
313
               }
314
               {
315
                    E Tj, TG, T1b, T1c;
316
                    Tj = T7 + Ti;
317
                    TG = Tu + TF;
318
                    ri[WS(rs, 4)] = Tj - TG;
319
                    ri[0] = Tj + TG;
320
                    {
321
                         E T15, T1a, T11, T14;
322
                         T15 = T12 + T13;
323
                         T1a = T16 + T19;
324
                         ii[0] = T15 + T1a;
325
                         ii[WS(rs, 4)] = T1a - T15;
326
                         T11 = T7 - Ti;
327
                         T14 = T12 - T13;
328
                         ri[WS(rs, 6)] = T11 - T14;
329
                         ri[WS(rs, 2)] = T11 + T14;
330
                    }
331
                    T1b = TF - Tu;
332
                    T1c = T19 - T16;
333
                    ii[WS(rs, 2)] = T1b + T1c;
334
                    ii[WS(rs, 6)] = T1c - T1b;
335
                    {
336
                         E TX, T1g, T10, T1d, TY, TZ;
337
                         TX = TH - TK;
338
                         T1g = T1e - T1f;
339
                         TY = TP - TM;
340
                         TZ = TR + TU;
341
                         T10 = KP707106781 * (TY - TZ);
342
                         T1d = KP707106781 * (TY + TZ);
343
                         ri[WS(rs, 7)] = TX - T10;
344
                         ii[WS(rs, 5)] = T1g - T1d;
345
                         ri[WS(rs, 3)] = TX + T10;
346
                         ii[WS(rs, 1)] = T1d + T1g;
347
                    }
348
                    {
349
                         E TL, T1i, TW, T1h, TQ, TV;
350
                         TL = TH + TK;
351
                         T1i = T1f + T1e;
352
                         TQ = TM + TP;
353
                         TV = TR - TU;
354
                         TW = KP707106781 * (TQ + TV);
355
                         T1h = KP707106781 * (TV - TQ);
356
                         ri[WS(rs, 5)] = TL - TW;
357
                         ii[WS(rs, 7)] = T1i - T1h;
358
                         ri[WS(rs, 1)] = TL + TW;
359
                         ii[WS(rs, 3)] = T1h + T1i;
360
                    }
361
               }
362
          }
363
     }
364
}
365

    
366
static const tw_instr twinstr[] = {
367
     {TW_FULL, 0, 8},
368
     {TW_NEXT, 1, 0}
369
};
370

    
371
static const ct_desc desc = { 8, "t1_8", twinstr, &GENUS, {52, 18, 14, 0}, 0, 0, 0 };
372

    
373
void X(codelet_t1_8) (planner *p) {
374
     X(kdft_dit_register) (p, t1_8, &desc);
375
}
376
#endif