To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_16.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (19.5 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:15 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 16 -name t1_16 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 174 FP additions, 100 FP multiplications,
32
 * (or, 104 additions, 30 multiplications, 70 fused multiply/add),
33
 * 60 stack variables, 3 constants, and 64 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
40
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
41
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
42
     {
43
          INT m;
44
          for (m = mb, W = W + (mb * 30); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 30, MAKE_VOLATILE_STRIDE(32, rs)) {
45
               E T8, T3z, T1I, T3o, T1s, T35, T2o, T2r, T1F, T36, T2p, T2w, Tl, T3A, T1N;
46
               E T3k, Tz, T2V, T1T, T1U, T11, T30, T29, T2c, T1e, T31, T2a, T2h, TM, T2W;
47
               E T1W, T21;
48
               {
49
                    E T1, T3n, T3, T6, T4, T3l, T2, T7, T3m, T5;
50
                    T1 = ri[0];
51
                    T3n = ii[0];
52
                    T3 = ri[WS(rs, 8)];
53
                    T6 = ii[WS(rs, 8)];
54
                    T2 = W[14];
55
                    T4 = T2 * T3;
56
                    T3l = T2 * T6;
57
                    T5 = W[15];
58
                    T7 = FMA(T5, T6, T4);
59
                    T3m = FNMS(T5, T3, T3l);
60
                    T8 = T1 + T7;
61
                    T3z = T3n - T3m;
62
                    T1I = T1 - T7;
63
                    T3o = T3m + T3n;
64
               }
65
               {
66
                    E T1h, T1k, T1i, T2k, T1n, T1q, T1o, T2m, T1g, T1m;
67
                    T1h = ri[WS(rs, 15)];
68
                    T1k = ii[WS(rs, 15)];
69
                    T1g = W[28];
70
                    T1i = T1g * T1h;
71
                    T2k = T1g * T1k;
72
                    T1n = ri[WS(rs, 7)];
73
                    T1q = ii[WS(rs, 7)];
74
                    T1m = W[12];
75
                    T1o = T1m * T1n;
76
                    T2m = T1m * T1q;
77
                    {
78
                         E T1l, T2l, T1r, T2n, T1j, T1p;
79
                         T1j = W[29];
80
                         T1l = FMA(T1j, T1k, T1i);
81
                         T2l = FNMS(T1j, T1h, T2k);
82
                         T1p = W[13];
83
                         T1r = FMA(T1p, T1q, T1o);
84
                         T2n = FNMS(T1p, T1n, T2m);
85
                         T1s = T1l + T1r;
86
                         T35 = T2l + T2n;
87
                         T2o = T2l - T2n;
88
                         T2r = T1l - T1r;
89
                    }
90
               }
91
               {
92
                    E T1u, T1x, T1v, T2s, T1A, T1D, T1B, T2u, T1t, T1z;
93
                    T1u = ri[WS(rs, 3)];
94
                    T1x = ii[WS(rs, 3)];
95
                    T1t = W[4];
96
                    T1v = T1t * T1u;
97
                    T2s = T1t * T1x;
98
                    T1A = ri[WS(rs, 11)];
99
                    T1D = ii[WS(rs, 11)];
100
                    T1z = W[20];
101
                    T1B = T1z * T1A;
102
                    T2u = T1z * T1D;
103
                    {
104
                         E T1y, T2t, T1E, T2v, T1w, T1C;
105
                         T1w = W[5];
106
                         T1y = FMA(T1w, T1x, T1v);
107
                         T2t = FNMS(T1w, T1u, T2s);
108
                         T1C = W[21];
109
                         T1E = FMA(T1C, T1D, T1B);
110
                         T2v = FNMS(T1C, T1A, T2u);
111
                         T1F = T1y + T1E;
112
                         T36 = T2t + T2v;
113
                         T2p = T1y - T1E;
114
                         T2w = T2t - T2v;
115
                    }
116
               }
117
               {
118
                    E Ta, Td, Tb, T1J, Tg, Tj, Th, T1L, T9, Tf;
119
                    Ta = ri[WS(rs, 4)];
120
                    Td = ii[WS(rs, 4)];
121
                    T9 = W[6];
122
                    Tb = T9 * Ta;
123
                    T1J = T9 * Td;
124
                    Tg = ri[WS(rs, 12)];
125
                    Tj = ii[WS(rs, 12)];
126
                    Tf = W[22];
127
                    Th = Tf * Tg;
128
                    T1L = Tf * Tj;
129
                    {
130
                         E Te, T1K, Tk, T1M, Tc, Ti;
131
                         Tc = W[7];
132
                         Te = FMA(Tc, Td, Tb);
133
                         T1K = FNMS(Tc, Ta, T1J);
134
                         Ti = W[23];
135
                         Tk = FMA(Ti, Tj, Th);
136
                         T1M = FNMS(Ti, Tg, T1L);
137
                         Tl = Te + Tk;
138
                         T3A = Te - Tk;
139
                         T1N = T1K - T1M;
140
                         T3k = T1K + T1M;
141
                    }
142
               }
143
               {
144
                    E To, Tr, Tp, T1P, Tu, Tx, Tv, T1R, Tn, Tt;
145
                    To = ri[WS(rs, 2)];
146
                    Tr = ii[WS(rs, 2)];
147
                    Tn = W[2];
148
                    Tp = Tn * To;
149
                    T1P = Tn * Tr;
150
                    Tu = ri[WS(rs, 10)];
151
                    Tx = ii[WS(rs, 10)];
152
                    Tt = W[18];
153
                    Tv = Tt * Tu;
154
                    T1R = Tt * Tx;
155
                    {
156
                         E Ts, T1Q, Ty, T1S, Tq, Tw;
157
                         Tq = W[3];
158
                         Ts = FMA(Tq, Tr, Tp);
159
                         T1Q = FNMS(Tq, To, T1P);
160
                         Tw = W[19];
161
                         Ty = FMA(Tw, Tx, Tv);
162
                         T1S = FNMS(Tw, Tu, T1R);
163
                         Tz = Ts + Ty;
164
                         T2V = T1Q + T1S;
165
                         T1T = T1Q - T1S;
166
                         T1U = Ts - Ty;
167
                    }
168
               }
169
               {
170
                    E TQ, TT, TR, T25, TW, TZ, TX, T27, TP, TV;
171
                    TQ = ri[WS(rs, 1)];
172
                    TT = ii[WS(rs, 1)];
173
                    TP = W[0];
174
                    TR = TP * TQ;
175
                    T25 = TP * TT;
176
                    TW = ri[WS(rs, 9)];
177
                    TZ = ii[WS(rs, 9)];
178
                    TV = W[16];
179
                    TX = TV * TW;
180
                    T27 = TV * TZ;
181
                    {
182
                         E TU, T26, T10, T28, TS, TY;
183
                         TS = W[1];
184
                         TU = FMA(TS, TT, TR);
185
                         T26 = FNMS(TS, TQ, T25);
186
                         TY = W[17];
187
                         T10 = FMA(TY, TZ, TX);
188
                         T28 = FNMS(TY, TW, T27);
189
                         T11 = TU + T10;
190
                         T30 = T26 + T28;
191
                         T29 = T26 - T28;
192
                         T2c = TU - T10;
193
                    }
194
               }
195
               {
196
                    E T13, T16, T14, T2d, T19, T1c, T1a, T2f, T12, T18;
197
                    T13 = ri[WS(rs, 5)];
198
                    T16 = ii[WS(rs, 5)];
199
                    T12 = W[8];
200
                    T14 = T12 * T13;
201
                    T2d = T12 * T16;
202
                    T19 = ri[WS(rs, 13)];
203
                    T1c = ii[WS(rs, 13)];
204
                    T18 = W[24];
205
                    T1a = T18 * T19;
206
                    T2f = T18 * T1c;
207
                    {
208
                         E T17, T2e, T1d, T2g, T15, T1b;
209
                         T15 = W[9];
210
                         T17 = FMA(T15, T16, T14);
211
                         T2e = FNMS(T15, T13, T2d);
212
                         T1b = W[25];
213
                         T1d = FMA(T1b, T1c, T1a);
214
                         T2g = FNMS(T1b, T19, T2f);
215
                         T1e = T17 + T1d;
216
                         T31 = T2e + T2g;
217
                         T2a = T17 - T1d;
218
                         T2h = T2e - T2g;
219
                    }
220
               }
221
               {
222
                    E TB, TE, TC, T1X, TH, TK, TI, T1Z, TA, TG;
223
                    TB = ri[WS(rs, 14)];
224
                    TE = ii[WS(rs, 14)];
225
                    TA = W[26];
226
                    TC = TA * TB;
227
                    T1X = TA * TE;
228
                    TH = ri[WS(rs, 6)];
229
                    TK = ii[WS(rs, 6)];
230
                    TG = W[10];
231
                    TI = TG * TH;
232
                    T1Z = TG * TK;
233
                    {
234
                         E TF, T1Y, TL, T20, TD, TJ;
235
                         TD = W[27];
236
                         TF = FMA(TD, TE, TC);
237
                         T1Y = FNMS(TD, TB, T1X);
238
                         TJ = W[11];
239
                         TL = FMA(TJ, TK, TI);
240
                         T20 = FNMS(TJ, TH, T1Z);
241
                         TM = TF + TL;
242
                         T2W = T1Y + T20;
243
                         T1W = TF - TL;
244
                         T21 = T1Y - T20;
245
                    }
246
               }
247
               {
248
                    E TO, T3e, T3q, T3s, T1H, T3r, T3h, T3i;
249
                    {
250
                         E Tm, TN, T3j, T3p;
251
                         Tm = T8 + Tl;
252
                         TN = Tz + TM;
253
                         TO = Tm + TN;
254
                         T3e = Tm - TN;
255
                         T3j = T2V + T2W;
256
                         T3p = T3k + T3o;
257
                         T3q = T3j + T3p;
258
                         T3s = T3p - T3j;
259
                    }
260
                    {
261
                         E T1f, T1G, T3f, T3g;
262
                         T1f = T11 + T1e;
263
                         T1G = T1s + T1F;
264
                         T1H = T1f + T1G;
265
                         T3r = T1G - T1f;
266
                         T3f = T30 + T31;
267
                         T3g = T35 + T36;
268
                         T3h = T3f - T3g;
269
                         T3i = T3f + T3g;
270
                    }
271
                    ri[WS(rs, 8)] = TO - T1H;
272
                    ii[WS(rs, 8)] = T3q - T3i;
273
                    ri[0] = TO + T1H;
274
                    ii[0] = T3i + T3q;
275
                    ri[WS(rs, 12)] = T3e - T3h;
276
                    ii[WS(rs, 12)] = T3s - T3r;
277
                    ri[WS(rs, 4)] = T3e + T3h;
278
                    ii[WS(rs, 4)] = T3r + T3s;
279
               }
280
               {
281
                    E T2Y, T3a, T3v, T3x, T33, T3b, T38, T3c;
282
                    {
283
                         E T2U, T2X, T3t, T3u;
284
                         T2U = T8 - Tl;
285
                         T2X = T2V - T2W;
286
                         T2Y = T2U + T2X;
287
                         T3a = T2U - T2X;
288
                         T3t = TM - Tz;
289
                         T3u = T3o - T3k;
290
                         T3v = T3t + T3u;
291
                         T3x = T3u - T3t;
292
                    }
293
                    {
294
                         E T2Z, T32, T34, T37;
295
                         T2Z = T11 - T1e;
296
                         T32 = T30 - T31;
297
                         T33 = T2Z + T32;
298
                         T3b = T32 - T2Z;
299
                         T34 = T1s - T1F;
300
                         T37 = T35 - T36;
301
                         T38 = T34 - T37;
302
                         T3c = T34 + T37;
303
                    }
304
                    {
305
                         E T39, T3w, T3d, T3y;
306
                         T39 = T33 + T38;
307
                         ri[WS(rs, 10)] = FNMS(KP707106781, T39, T2Y);
308
                         ri[WS(rs, 2)] = FMA(KP707106781, T39, T2Y);
309
                         T3w = T3b + T3c;
310
                         ii[WS(rs, 2)] = FMA(KP707106781, T3w, T3v);
311
                         ii[WS(rs, 10)] = FNMS(KP707106781, T3w, T3v);
312
                         T3d = T3b - T3c;
313
                         ri[WS(rs, 14)] = FNMS(KP707106781, T3d, T3a);
314
                         ri[WS(rs, 6)] = FMA(KP707106781, T3d, T3a);
315
                         T3y = T38 - T33;
316
                         ii[WS(rs, 6)] = FMA(KP707106781, T3y, T3x);
317
                         ii[WS(rs, 14)] = FNMS(KP707106781, T3y, T3x);
318
                    }
319
               }
320
               {
321
                    E T1O, T3B, T3H, T2E, T23, T3C, T2O, T2S, T2H, T3I, T2j, T2B, T2L, T2R, T2y;
322
                    E T2C;
323
                    {
324
                         E T1V, T22, T2b, T2i;
325
                         T1O = T1I - T1N;
326
                         T3B = T3z - T3A;
327
                         T3H = T3A + T3z;
328
                         T2E = T1I + T1N;
329
                         T1V = T1T - T1U;
330
                         T22 = T1W + T21;
331
                         T23 = T1V - T22;
332
                         T3C = T1V + T22;
333
                         {
334
                              E T2M, T2N, T2F, T2G;
335
                              T2M = T2r + T2w;
336
                              T2N = T2o - T2p;
337
                              T2O = FNMS(KP414213562, T2N, T2M);
338
                              T2S = FMA(KP414213562, T2M, T2N);
339
                              T2F = T1U + T1T;
340
                              T2G = T1W - T21;
341
                              T2H = T2F + T2G;
342
                              T3I = T2G - T2F;
343
                         }
344
                         T2b = T29 + T2a;
345
                         T2i = T2c - T2h;
346
                         T2j = FMA(KP414213562, T2i, T2b);
347
                         T2B = FNMS(KP414213562, T2b, T2i);
348
                         {
349
                              E T2J, T2K, T2q, T2x;
350
                              T2J = T2c + T2h;
351
                              T2K = T29 - T2a;
352
                              T2L = FMA(KP414213562, T2K, T2J);
353
                              T2R = FNMS(KP414213562, T2J, T2K);
354
                              T2q = T2o + T2p;
355
                              T2x = T2r - T2w;
356
                              T2y = FNMS(KP414213562, T2x, T2q);
357
                              T2C = FMA(KP414213562, T2q, T2x);
358
                         }
359
                    }
360
                    {
361
                         E T24, T2z, T3J, T3K;
362
                         T24 = FMA(KP707106781, T23, T1O);
363
                         T2z = T2j - T2y;
364
                         ri[WS(rs, 11)] = FNMS(KP923879532, T2z, T24);
365
                         ri[WS(rs, 3)] = FMA(KP923879532, T2z, T24);
366
                         T3J = FMA(KP707106781, T3I, T3H);
367
                         T3K = T2C - T2B;
368
                         ii[WS(rs, 3)] = FMA(KP923879532, T3K, T3J);
369
                         ii[WS(rs, 11)] = FNMS(KP923879532, T3K, T3J);
370
                    }
371
                    {
372
                         E T2A, T2D, T3L, T3M;
373
                         T2A = FNMS(KP707106781, T23, T1O);
374
                         T2D = T2B + T2C;
375
                         ri[WS(rs, 7)] = FNMS(KP923879532, T2D, T2A);
376
                         ri[WS(rs, 15)] = FMA(KP923879532, T2D, T2A);
377
                         T3L = FNMS(KP707106781, T3I, T3H);
378
                         T3M = T2j + T2y;
379
                         ii[WS(rs, 7)] = FNMS(KP923879532, T3M, T3L);
380
                         ii[WS(rs, 15)] = FMA(KP923879532, T3M, T3L);
381
                    }
382
                    {
383
                         E T2I, T2P, T3D, T3E;
384
                         T2I = FMA(KP707106781, T2H, T2E);
385
                         T2P = T2L + T2O;
386
                         ri[WS(rs, 9)] = FNMS(KP923879532, T2P, T2I);
387
                         ri[WS(rs, 1)] = FMA(KP923879532, T2P, T2I);
388
                         T3D = FMA(KP707106781, T3C, T3B);
389
                         T3E = T2R + T2S;
390
                         ii[WS(rs, 1)] = FMA(KP923879532, T3E, T3D);
391
                         ii[WS(rs, 9)] = FNMS(KP923879532, T3E, T3D);
392
                    }
393
                    {
394
                         E T2Q, T2T, T3F, T3G;
395
                         T2Q = FNMS(KP707106781, T2H, T2E);
396
                         T2T = T2R - T2S;
397
                         ri[WS(rs, 13)] = FNMS(KP923879532, T2T, T2Q);
398
                         ri[WS(rs, 5)] = FMA(KP923879532, T2T, T2Q);
399
                         T3F = FNMS(KP707106781, T3C, T3B);
400
                         T3G = T2O - T2L;
401
                         ii[WS(rs, 5)] = FMA(KP923879532, T3G, T3F);
402
                         ii[WS(rs, 13)] = FNMS(KP923879532, T3G, T3F);
403
                    }
404
               }
405
          }
406
     }
407
}
408

    
409
static const tw_instr twinstr[] = {
410
     {TW_FULL, 0, 16},
411
     {TW_NEXT, 1, 0}
412
};
413

    
414
static const ct_desc desc = { 16, "t1_16", twinstr, &GENUS, {104, 30, 70, 0}, 0, 0, 0 };
415

    
416
void X(codelet_t1_16) (planner *p) {
417
     X(kdft_dit_register) (p, t1_16, &desc);
418
}
419
#else
420

    
421
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 16 -name t1_16 -include dft/scalar/t.h */
422

    
423
/*
424
 * This function contains 174 FP additions, 84 FP multiplications,
425
 * (or, 136 additions, 46 multiplications, 38 fused multiply/add),
426
 * 52 stack variables, 3 constants, and 64 memory accesses
427
 */
428
#include "dft/scalar/t.h"
429

    
430
static void t1_16(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
431
{
432
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
433
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
434
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
435
     {
436
          INT m;
437
          for (m = mb, W = W + (mb * 30); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 30, MAKE_VOLATILE_STRIDE(32, rs)) {
438
               E T7, T37, T1t, T2U, Ti, T38, T1w, T2R, Tu, T2s, T1C, T2c, TF, T2t, T1H;
439
               E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2j, T24, T2k, TS, T13, T2w, T2x;
440
               E T2y, T2z, T1O, T2g, T1T, T2h;
441
               {
442
                    E T1, T2T, T6, T2S;
443
                    T1 = ri[0];
444
                    T2T = ii[0];
445
                    {
446
                         E T3, T5, T2, T4;
447
                         T3 = ri[WS(rs, 8)];
448
                         T5 = ii[WS(rs, 8)];
449
                         T2 = W[14];
450
                         T4 = W[15];
451
                         T6 = FMA(T2, T3, T4 * T5);
452
                         T2S = FNMS(T4, T3, T2 * T5);
453
                    }
454
                    T7 = T1 + T6;
455
                    T37 = T2T - T2S;
456
                    T1t = T1 - T6;
457
                    T2U = T2S + T2T;
458
               }
459
               {
460
                    E Tc, T1u, Th, T1v;
461
                    {
462
                         E T9, Tb, T8, Ta;
463
                         T9 = ri[WS(rs, 4)];
464
                         Tb = ii[WS(rs, 4)];
465
                         T8 = W[6];
466
                         Ta = W[7];
467
                         Tc = FMA(T8, T9, Ta * Tb);
468
                         T1u = FNMS(Ta, T9, T8 * Tb);
469
                    }
470
                    {
471
                         E Te, Tg, Td, Tf;
472
                         Te = ri[WS(rs, 12)];
473
                         Tg = ii[WS(rs, 12)];
474
                         Td = W[22];
475
                         Tf = W[23];
476
                         Th = FMA(Td, Te, Tf * Tg);
477
                         T1v = FNMS(Tf, Te, Td * Tg);
478
                    }
479
                    Ti = Tc + Th;
480
                    T38 = Tc - Th;
481
                    T1w = T1u - T1v;
482
                    T2R = T1u + T1v;
483
               }
484
               {
485
                    E To, T1y, Tt, T1z, T1A, T1B;
486
                    {
487
                         E Tl, Tn, Tk, Tm;
488
                         Tl = ri[WS(rs, 2)];
489
                         Tn = ii[WS(rs, 2)];
490
                         Tk = W[2];
491
                         Tm = W[3];
492
                         To = FMA(Tk, Tl, Tm * Tn);
493
                         T1y = FNMS(Tm, Tl, Tk * Tn);
494
                    }
495
                    {
496
                         E Tq, Ts, Tp, Tr;
497
                         Tq = ri[WS(rs, 10)];
498
                         Ts = ii[WS(rs, 10)];
499
                         Tp = W[18];
500
                         Tr = W[19];
501
                         Tt = FMA(Tp, Tq, Tr * Ts);
502
                         T1z = FNMS(Tr, Tq, Tp * Ts);
503
                    }
504
                    Tu = To + Tt;
505
                    T2s = T1y + T1z;
506
                    T1A = T1y - T1z;
507
                    T1B = To - Tt;
508
                    T1C = T1A - T1B;
509
                    T2c = T1B + T1A;
510
               }
511
               {
512
                    E Tz, T1E, TE, T1F, T1D, T1G;
513
                    {
514
                         E Tw, Ty, Tv, Tx;
515
                         Tw = ri[WS(rs, 14)];
516
                         Ty = ii[WS(rs, 14)];
517
                         Tv = W[26];
518
                         Tx = W[27];
519
                         Tz = FMA(Tv, Tw, Tx * Ty);
520
                         T1E = FNMS(Tx, Tw, Tv * Ty);
521
                    }
522
                    {
523
                         E TB, TD, TA, TC;
524
                         TB = ri[WS(rs, 6)];
525
                         TD = ii[WS(rs, 6)];
526
                         TA = W[10];
527
                         TC = W[11];
528
                         TE = FMA(TA, TB, TC * TD);
529
                         T1F = FNMS(TC, TB, TA * TD);
530
                    }
531
                    TF = Tz + TE;
532
                    T2t = T1E + T1F;
533
                    T1D = Tz - TE;
534
                    T1G = T1E - T1F;
535
                    T1H = T1D + T1G;
536
                    T2d = T1D - T1G;
537
               }
538
               {
539
                    E T19, T20, T1p, T1X, T1e, T21, T1k, T1W;
540
                    {
541
                         E T16, T18, T15, T17;
542
                         T16 = ri[WS(rs, 15)];
543
                         T18 = ii[WS(rs, 15)];
544
                         T15 = W[28];
545
                         T17 = W[29];
546
                         T19 = FMA(T15, T16, T17 * T18);
547
                         T20 = FNMS(T17, T16, T15 * T18);
548
                    }
549
                    {
550
                         E T1m, T1o, T1l, T1n;
551
                         T1m = ri[WS(rs, 11)];
552
                         T1o = ii[WS(rs, 11)];
553
                         T1l = W[20];
554
                         T1n = W[21];
555
                         T1p = FMA(T1l, T1m, T1n * T1o);
556
                         T1X = FNMS(T1n, T1m, T1l * T1o);
557
                    }
558
                    {
559
                         E T1b, T1d, T1a, T1c;
560
                         T1b = ri[WS(rs, 7)];
561
                         T1d = ii[WS(rs, 7)];
562
                         T1a = W[12];
563
                         T1c = W[13];
564
                         T1e = FMA(T1a, T1b, T1c * T1d);
565
                         T21 = FNMS(T1c, T1b, T1a * T1d);
566
                    }
567
                    {
568
                         E T1h, T1j, T1g, T1i;
569
                         T1h = ri[WS(rs, 3)];
570
                         T1j = ii[WS(rs, 3)];
571
                         T1g = W[4];
572
                         T1i = W[5];
573
                         T1k = FMA(T1g, T1h, T1i * T1j);
574
                         T1W = FNMS(T1i, T1h, T1g * T1j);
575
                    }
576
                    T1f = T19 + T1e;
577
                    T1q = T1k + T1p;
578
                    T2B = T1f - T1q;
579
                    T2C = T20 + T21;
580
                    T2D = T1W + T1X;
581
                    T2E = T2C - T2D;
582
                    {
583
                         E T1V, T1Y, T22, T23;
584
                         T1V = T19 - T1e;
585
                         T1Y = T1W - T1X;
586
                         T1Z = T1V - T1Y;
587
                         T2j = T1V + T1Y;
588
                         T22 = T20 - T21;
589
                         T23 = T1k - T1p;
590
                         T24 = T22 + T23;
591
                         T2k = T22 - T23;
592
                    }
593
               }
594
               {
595
                    E TM, T1K, T12, T1R, TR, T1L, TX, T1Q;
596
                    {
597
                         E TJ, TL, TI, TK;
598
                         TJ = ri[WS(rs, 1)];
599
                         TL = ii[WS(rs, 1)];
600
                         TI = W[0];
601
                         TK = W[1];
602
                         TM = FMA(TI, TJ, TK * TL);
603
                         T1K = FNMS(TK, TJ, TI * TL);
604
                    }
605
                    {
606
                         E TZ, T11, TY, T10;
607
                         TZ = ri[WS(rs, 13)];
608
                         T11 = ii[WS(rs, 13)];
609
                         TY = W[24];
610
                         T10 = W[25];
611
                         T12 = FMA(TY, TZ, T10 * T11);
612
                         T1R = FNMS(T10, TZ, TY * T11);
613
                    }
614
                    {
615
                         E TO, TQ, TN, TP;
616
                         TO = ri[WS(rs, 9)];
617
                         TQ = ii[WS(rs, 9)];
618
                         TN = W[16];
619
                         TP = W[17];
620
                         TR = FMA(TN, TO, TP * TQ);
621
                         T1L = FNMS(TP, TO, TN * TQ);
622
                    }
623
                    {
624
                         E TU, TW, TT, TV;
625
                         TU = ri[WS(rs, 5)];
626
                         TW = ii[WS(rs, 5)];
627
                         TT = W[8];
628
                         TV = W[9];
629
                         TX = FMA(TT, TU, TV * TW);
630
                         T1Q = FNMS(TV, TU, TT * TW);
631
                    }
632
                    TS = TM + TR;
633
                    T13 = TX + T12;
634
                    T2w = TS - T13;
635
                    T2x = T1K + T1L;
636
                    T2y = T1Q + T1R;
637
                    T2z = T2x - T2y;
638
                    {
639
                         E T1M, T1N, T1P, T1S;
640
                         T1M = T1K - T1L;
641
                         T1N = TX - T12;
642
                         T1O = T1M + T1N;
643
                         T2g = T1M - T1N;
644
                         T1P = TM - TR;
645
                         T1S = T1Q - T1R;
646
                         T1T = T1P - T1S;
647
                         T2h = T1P + T1S;
648
                    }
649
               }
650
               {
651
                    E T1J, T27, T3g, T3i, T26, T3h, T2a, T3d;
652
                    {
653
                         E T1x, T1I, T3e, T3f;
654
                         T1x = T1t - T1w;
655
                         T1I = KP707106781 * (T1C - T1H);
656
                         T1J = T1x + T1I;
657
                         T27 = T1x - T1I;
658
                         T3e = KP707106781 * (T2d - T2c);
659
                         T3f = T38 + T37;
660
                         T3g = T3e + T3f;
661
                         T3i = T3f - T3e;
662
                    }
663
                    {
664
                         E T1U, T25, T28, T29;
665
                         T1U = FMA(KP923879532, T1O, KP382683432 * T1T);
666
                         T25 = FNMS(KP923879532, T24, KP382683432 * T1Z);
667
                         T26 = T1U + T25;
668
                         T3h = T25 - T1U;
669
                         T28 = FNMS(KP923879532, T1T, KP382683432 * T1O);
670
                         T29 = FMA(KP382683432, T24, KP923879532 * T1Z);
671
                         T2a = T28 - T29;
672
                         T3d = T28 + T29;
673
                    }
674
                    ri[WS(rs, 11)] = T1J - T26;
675
                    ii[WS(rs, 11)] = T3g - T3d;
676
                    ri[WS(rs, 3)] = T1J + T26;
677
                    ii[WS(rs, 3)] = T3d + T3g;
678
                    ri[WS(rs, 15)] = T27 - T2a;
679
                    ii[WS(rs, 15)] = T3i - T3h;
680
                    ri[WS(rs, 7)] = T27 + T2a;
681
                    ii[WS(rs, 7)] = T3h + T3i;
682
               }
683
               {
684
                    E T2v, T2H, T32, T34, T2G, T33, T2K, T2Z;
685
                    {
686
                         E T2r, T2u, T30, T31;
687
                         T2r = T7 - Ti;
688
                         T2u = T2s - T2t;
689
                         T2v = T2r + T2u;
690
                         T2H = T2r - T2u;
691
                         T30 = TF - Tu;
692
                         T31 = T2U - T2R;
693
                         T32 = T30 + T31;
694
                         T34 = T31 - T30;
695
                    }
696
                    {
697
                         E T2A, T2F, T2I, T2J;
698
                         T2A = T2w + T2z;
699
                         T2F = T2B - T2E;
700
                         T2G = KP707106781 * (T2A + T2F);
701
                         T33 = KP707106781 * (T2F - T2A);
702
                         T2I = T2z - T2w;
703
                         T2J = T2B + T2E;
704
                         T2K = KP707106781 * (T2I - T2J);
705
                         T2Z = KP707106781 * (T2I + T2J);
706
                    }
707
                    ri[WS(rs, 10)] = T2v - T2G;
708
                    ii[WS(rs, 10)] = T32 - T2Z;
709
                    ri[WS(rs, 2)] = T2v + T2G;
710
                    ii[WS(rs, 2)] = T2Z + T32;
711
                    ri[WS(rs, 14)] = T2H - T2K;
712
                    ii[WS(rs, 14)] = T34 - T33;
713
                    ri[WS(rs, 6)] = T2H + T2K;
714
                    ii[WS(rs, 6)] = T33 + T34;
715
               }
716
               {
717
                    E T2f, T2n, T3a, T3c, T2m, T3b, T2q, T35;
718
                    {
719
                         E T2b, T2e, T36, T39;
720
                         T2b = T1t + T1w;
721
                         T2e = KP707106781 * (T2c + T2d);
722
                         T2f = T2b + T2e;
723
                         T2n = T2b - T2e;
724
                         T36 = KP707106781 * (T1C + T1H);
725
                         T39 = T37 - T38;
726
                         T3a = T36 + T39;
727
                         T3c = T39 - T36;
728
                    }
729
                    {
730
                         E T2i, T2l, T2o, T2p;
731
                         T2i = FMA(KP382683432, T2g, KP923879532 * T2h);
732
                         T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
733
                         T2m = T2i + T2l;
734
                         T3b = T2l - T2i;
735
                         T2o = FNMS(KP382683432, T2h, KP923879532 * T2g);
736
                         T2p = FMA(KP923879532, T2k, KP382683432 * T2j);
737
                         T2q = T2o - T2p;
738
                         T35 = T2o + T2p;
739
                    }
740
                    ri[WS(rs, 9)] = T2f - T2m;
741
                    ii[WS(rs, 9)] = T3a - T35;
742
                    ri[WS(rs, 1)] = T2f + T2m;
743
                    ii[WS(rs, 1)] = T35 + T3a;
744
                    ri[WS(rs, 13)] = T2n - T2q;
745
                    ii[WS(rs, 13)] = T3c - T3b;
746
                    ri[WS(rs, 5)] = T2n + T2q;
747
                    ii[WS(rs, 5)] = T3b + T3c;
748
               }
749
               {
750
                    E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P;
751
                    {
752
                         E Tj, TG, T2Q, T2V;
753
                         Tj = T7 + Ti;
754
                         TG = Tu + TF;
755
                         TH = Tj + TG;
756
                         T2L = Tj - TG;
757
                         T2Q = T2s + T2t;
758
                         T2V = T2R + T2U;
759
                         T2W = T2Q + T2V;
760
                         T2Y = T2V - T2Q;
761
                    }
762
                    {
763
                         E T14, T1r, T2M, T2N;
764
                         T14 = TS + T13;
765
                         T1r = T1f + T1q;
766
                         T1s = T14 + T1r;
767
                         T2X = T1r - T14;
768
                         T2M = T2x + T2y;
769
                         T2N = T2C + T2D;
770
                         T2O = T2M - T2N;
771
                         T2P = T2M + T2N;
772
                    }
773
                    ri[WS(rs, 8)] = TH - T1s;
774
                    ii[WS(rs, 8)] = T2W - T2P;
775
                    ri[0] = TH + T1s;
776
                    ii[0] = T2P + T2W;
777
                    ri[WS(rs, 12)] = T2L - T2O;
778
                    ii[WS(rs, 12)] = T2Y - T2X;
779
                    ri[WS(rs, 4)] = T2L + T2O;
780
                    ii[WS(rs, 4)] = T2X + T2Y;
781
               }
782
          }
783
     }
784
}
785

    
786
static const tw_instr twinstr[] = {
787
     {TW_FULL, 0, 16},
788
     {TW_NEXT, 1, 0}
789
};
790

    
791
static const ct_desc desc = { 16, "t1_16", twinstr, &GENUS, {136, 46, 38, 0}, 0, 0, 0 };
792

    
793
void X(codelet_t1_16) (planner *p) {
794
     X(kdft_dit_register) (p, t1_16, &desc);
795
}
796
#endif