To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t2_20.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (29.3 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:26 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -name t2_20 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 276 FP additions, 198 FP multiplications,
32
 * (or, 136 additions, 58 multiplications, 140 fused multiply/add),
33
 * 95 stack variables, 4 constants, and 80 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t2_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
42
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
43
     {
44
          INT m;
45
          for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(40, rs)) {
46
               E T2, Th, Tf, T6, T5, Ti, Tl, T1n, T3, Tt, Tv, T7, T17, T1L, T24;
47
               E Tb, T13, T1P, T21, T1b, T1D, T1A, T1H, T1f, TA, Tw, Tq, Tm, TK, T1S;
48
               E TO, T1p, T1q, T1u, T2n, T2k, T2h, T2d;
49
               {
50
                    E Tk, Ta, T1e, T4, T1a, Tj, T12, T1G, T16, T1K, Tg, Tz;
51
                    T2 = W[0];
52
                    Th = W[3];
53
                    Tf = W[2];
54
                    Tg = T2 * Tf;
55
                    Tk = T2 * Th;
56
                    T6 = W[5];
57
                    Ta = T2 * T6;
58
                    T1e = Tf * T6;
59
                    T5 = W[1];
60
                    Ti = FNMS(T5, Th, Tg);
61
                    Tl = FMA(T5, Tf, Tk);
62
                    T1n = FMA(T5, Th, Tg);
63
                    T3 = W[4];
64
                    T4 = T2 * T3;
65
                    T1a = Tf * T3;
66
                    Tj = Ti * T3;
67
                    Tt = W[6];
68
                    T12 = Tf * Tt;
69
                    T1G = T2 * Tt;
70
                    Tv = W[7];
71
                    T16 = Tf * Tv;
72
                    T1K = T2 * Tv;
73
                    T7 = FNMS(T5, T6, T4);
74
                    T17 = FNMS(Th, Tt, T16);
75
                    T1L = FNMS(T5, Tt, T1K);
76
                    T24 = FMA(Th, T3, T1e);
77
                    Tb = FMA(T5, T3, Ta);
78
                    T13 = FMA(Th, Tv, T12);
79
                    T1P = FNMS(Tl, T6, Tj);
80
                    T21 = FNMS(Th, T6, T1a);
81
                    T1b = FMA(Th, T6, T1a);
82
                    T1D = FNMS(T5, T3, Ta);
83
                    T1A = FMA(T5, T6, T4);
84
                    T1H = FMA(T5, Tv, T1G);
85
                    T1f = FNMS(Th, T3, T1e);
86
                    Tz = Ti * Tv;
87
                    TA = FNMS(Tl, Tt, Tz);
88
                    {
89
                         E Tu, Tp, TJ, TN;
90
                         Tu = Ti * Tt;
91
                         Tw = FMA(Tl, Tv, Tu);
92
                         Tp = Ti * T6;
93
                         Tq = FNMS(Tl, T3, Tp);
94
                         Tm = FMA(Tl, T6, Tj);
95
                         TJ = Tm * Tt;
96
                         TN = Tm * Tv;
97
                         TK = FMA(Tq, Tv, TJ);
98
                         T1S = FMA(Tl, T3, Tp);
99
                         TO = FNMS(Tq, Tt, TN);
100
                         {
101
                              E T1o, T2g, T1t, T2c;
102
                              T1o = T1n * T3;
103
                              T2g = T1n * Tv;
104
                              T1t = T1n * T6;
105
                              T2c = T1n * Tt;
106
                              T1p = FNMS(T5, Tf, Tk);
107
                              T1q = FNMS(T1p, T6, T1o);
108
                              T1u = FMA(T1p, T3, T1t);
109
                              T2n = FNMS(T1p, T3, T1t);
110
                              T2k = FMA(T1p, T6, T1o);
111
                              T2h = FNMS(T1p, Tt, T2g);
112
                              T2d = FMA(T1p, Tv, T2c);
113
                         }
114
                    }
115
               }
116
               {
117
                    E Te, T2C, T4L, T57, TD, T58, T2H, T4H, T11, T2v, T4k, T4v, T2P, T3P, T3C;
118
                    E T3Z, T2r, T2z, T4g, T4z, T3b, T3T, T3u, T43, T20, T2y, T4d, T4y, T34, T3S;
119
                    E T3n, T42, T1y, T2w, T4n, T4w, T2W, T3Q, T3J, T40;
120
                    {
121
                         E T1, T4K, T8, T9, Tc, T4I, Td, T4J;
122
                         T1 = ri[0];
123
                         T4K = ii[0];
124
                         T8 = ri[WS(rs, 10)];
125
                         T9 = T7 * T8;
126
                         Tc = ii[WS(rs, 10)];
127
                         T4I = T7 * Tc;
128
                         Td = FMA(Tb, Tc, T9);
129
                         Te = T1 + Td;
130
                         T2C = T1 - Td;
131
                         T4J = FNMS(Tb, T8, T4I);
132
                         T4L = T4J + T4K;
133
                         T57 = T4K - T4J;
134
                    }
135
                    {
136
                         E Tn, To, Tr, T2D, Tx, Ty, TB, T2F;
137
                         Tn = ri[WS(rs, 5)];
138
                         To = Tm * Tn;
139
                         Tr = ii[WS(rs, 5)];
140
                         T2D = Tm * Tr;
141
                         Tx = ri[WS(rs, 15)];
142
                         Ty = Tw * Tx;
143
                         TB = ii[WS(rs, 15)];
144
                         T2F = Tw * TB;
145
                         {
146
                              E Ts, TC, T2E, T2G;
147
                              Ts = FMA(Tq, Tr, To);
148
                              TC = FMA(TA, TB, Ty);
149
                              TD = Ts + TC;
150
                              T58 = Ts - TC;
151
                              T2E = FNMS(Tq, Tn, T2D);
152
                              T2G = FNMS(TA, Tx, T2F);
153
                              T2H = T2E - T2G;
154
                              T4H = T2E + T2G;
155
                         }
156
                    }
157
                    {
158
                         E TI, T3x, TZ, T2N, TQ, T3z, TV, T2L;
159
                         {
160
                              E TF, TG, TH, T3w;
161
                              TF = ri[WS(rs, 4)];
162
                              TG = Ti * TF;
163
                              TH = ii[WS(rs, 4)];
164
                              T3w = Ti * TH;
165
                              TI = FMA(Tl, TH, TG);
166
                              T3x = FNMS(Tl, TF, T3w);
167
                         }
168
                         {
169
                              E TW, TX, TY, T2M;
170
                              TW = ri[WS(rs, 19)];
171
                              TX = Tt * TW;
172
                              TY = ii[WS(rs, 19)];
173
                              T2M = Tt * TY;
174
                              TZ = FMA(Tv, TY, TX);
175
                              T2N = FNMS(Tv, TW, T2M);
176
                         }
177
                         {
178
                              E TL, TM, TP, T3y;
179
                              TL = ri[WS(rs, 14)];
180
                              TM = TK * TL;
181
                              TP = ii[WS(rs, 14)];
182
                              T3y = TK * TP;
183
                              TQ = FMA(TO, TP, TM);
184
                              T3z = FNMS(TO, TL, T3y);
185
                         }
186
                         {
187
                              E TS, TT, TU, T2K;
188
                              TS = ri[WS(rs, 9)];
189
                              TT = T3 * TS;
190
                              TU = ii[WS(rs, 9)];
191
                              T2K = T3 * TU;
192
                              TV = FMA(T6, TU, TT);
193
                              T2L = FNMS(T6, TS, T2K);
194
                         }
195
                         {
196
                              E TR, T10, T4i, T4j;
197
                              TR = TI + TQ;
198
                              T10 = TV + TZ;
199
                              T11 = TR - T10;
200
                              T2v = TR + T10;
201
                              T4i = T3x + T3z;
202
                              T4j = T2L + T2N;
203
                              T4k = T4i - T4j;
204
                              T4v = T4i + T4j;
205
                         }
206
                         {
207
                              E T2J, T2O, T3A, T3B;
208
                              T2J = TI - TQ;
209
                              T2O = T2L - T2N;
210
                              T2P = T2J - T2O;
211
                              T3P = T2J + T2O;
212
                              T3A = T3x - T3z;
213
                              T3B = TV - TZ;
214
                              T3C = T3A + T3B;
215
                              T3Z = T3A - T3B;
216
                         }
217
                    }
218
                    {
219
                         E T26, T3p, T2p, T39, T2a, T3r, T2j, T37;
220
                         {
221
                              E T22, T23, T25, T3o;
222
                              T22 = ri[WS(rs, 12)];
223
                              T23 = T21 * T22;
224
                              T25 = ii[WS(rs, 12)];
225
                              T3o = T21 * T25;
226
                              T26 = FMA(T24, T25, T23);
227
                              T3p = FNMS(T24, T22, T3o);
228
                         }
229
                         {
230
                              E T2l, T2m, T2o, T38;
231
                              T2l = ri[WS(rs, 7)];
232
                              T2m = T2k * T2l;
233
                              T2o = ii[WS(rs, 7)];
234
                              T38 = T2k * T2o;
235
                              T2p = FMA(T2n, T2o, T2m);
236
                              T39 = FNMS(T2n, T2l, T38);
237
                         }
238
                         {
239
                              E T27, T28, T29, T3q;
240
                              T27 = ri[WS(rs, 2)];
241
                              T28 = T1n * T27;
242
                              T29 = ii[WS(rs, 2)];
243
                              T3q = T1n * T29;
244
                              T2a = FMA(T1p, T29, T28);
245
                              T3r = FNMS(T1p, T27, T3q);
246
                         }
247
                         {
248
                              E T2e, T2f, T2i, T36;
249
                              T2e = ri[WS(rs, 17)];
250
                              T2f = T2d * T2e;
251
                              T2i = ii[WS(rs, 17)];
252
                              T36 = T2d * T2i;
253
                              T2j = FMA(T2h, T2i, T2f);
254
                              T37 = FNMS(T2h, T2e, T36);
255
                         }
256
                         {
257
                              E T2b, T2q, T4e, T4f;
258
                              T2b = T26 + T2a;
259
                              T2q = T2j + T2p;
260
                              T2r = T2b - T2q;
261
                              T2z = T2b + T2q;
262
                              T4e = T3p + T3r;
263
                              T4f = T37 + T39;
264
                              T4g = T4e - T4f;
265
                              T4z = T4e + T4f;
266
                         }
267
                         {
268
                              E T35, T3a, T3s, T3t;
269
                              T35 = T26 - T2a;
270
                              T3a = T37 - T39;
271
                              T3b = T35 - T3a;
272
                              T3T = T35 + T3a;
273
                              T3s = T3p - T3r;
274
                              T3t = T2j - T2p;
275
                              T3u = T3s + T3t;
276
                              T43 = T3s - T3t;
277
                         }
278
                    }
279
                    {
280
                         E T1F, T3i, T1Y, T32, T1N, T3k, T1U, T30;
281
                         {
282
                              E T1B, T1C, T1E, T3h;
283
                              T1B = ri[WS(rs, 8)];
284
                              T1C = T1A * T1B;
285
                              T1E = ii[WS(rs, 8)];
286
                              T3h = T1A * T1E;
287
                              T1F = FMA(T1D, T1E, T1C);
288
                              T3i = FNMS(T1D, T1B, T3h);
289
                         }
290
                         {
291
                              E T1V, T1W, T1X, T31;
292
                              T1V = ri[WS(rs, 3)];
293
                              T1W = Tf * T1V;
294
                              T1X = ii[WS(rs, 3)];
295
                              T31 = Tf * T1X;
296
                              T1Y = FMA(Th, T1X, T1W);
297
                              T32 = FNMS(Th, T1V, T31);
298
                         }
299
                         {
300
                              E T1I, T1J, T1M, T3j;
301
                              T1I = ri[WS(rs, 18)];
302
                              T1J = T1H * T1I;
303
                              T1M = ii[WS(rs, 18)];
304
                              T3j = T1H * T1M;
305
                              T1N = FMA(T1L, T1M, T1J);
306
                              T3k = FNMS(T1L, T1I, T3j);
307
                         }
308
                         {
309
                              E T1Q, T1R, T1T, T2Z;
310
                              T1Q = ri[WS(rs, 13)];
311
                              T1R = T1P * T1Q;
312
                              T1T = ii[WS(rs, 13)];
313
                              T2Z = T1P * T1T;
314
                              T1U = FMA(T1S, T1T, T1R);
315
                              T30 = FNMS(T1S, T1Q, T2Z);
316
                         }
317
                         {
318
                              E T1O, T1Z, T4b, T4c;
319
                              T1O = T1F + T1N;
320
                              T1Z = T1U + T1Y;
321
                              T20 = T1O - T1Z;
322
                              T2y = T1O + T1Z;
323
                              T4b = T3i + T3k;
324
                              T4c = T30 + T32;
325
                              T4d = T4b - T4c;
326
                              T4y = T4b + T4c;
327
                         }
328
                         {
329
                              E T2Y, T33, T3l, T3m;
330
                              T2Y = T1F - T1N;
331
                              T33 = T30 - T32;
332
                              T34 = T2Y - T33;
333
                              T3S = T2Y + T33;
334
                              T3l = T3i - T3k;
335
                              T3m = T1U - T1Y;
336
                              T3n = T3l + T3m;
337
                              T42 = T3l - T3m;
338
                         }
339
                    }
340
                    {
341
                         E T19, T3E, T1w, T2U, T1h, T3G, T1m, T2S;
342
                         {
343
                              E T14, T15, T18, T3D;
344
                              T14 = ri[WS(rs, 16)];
345
                              T15 = T13 * T14;
346
                              T18 = ii[WS(rs, 16)];
347
                              T3D = T13 * T18;
348
                              T19 = FMA(T17, T18, T15);
349
                              T3E = FNMS(T17, T14, T3D);
350
                         }
351
                         {
352
                              E T1r, T1s, T1v, T2T;
353
                              T1r = ri[WS(rs, 11)];
354
                              T1s = T1q * T1r;
355
                              T1v = ii[WS(rs, 11)];
356
                              T2T = T1q * T1v;
357
                              T1w = FMA(T1u, T1v, T1s);
358
                              T2U = FNMS(T1u, T1r, T2T);
359
                         }
360
                         {
361
                              E T1c, T1d, T1g, T3F;
362
                              T1c = ri[WS(rs, 6)];
363
                              T1d = T1b * T1c;
364
                              T1g = ii[WS(rs, 6)];
365
                              T3F = T1b * T1g;
366
                              T1h = FMA(T1f, T1g, T1d);
367
                              T3G = FNMS(T1f, T1c, T3F);
368
                         }
369
                         {
370
                              E T1j, T1k, T1l, T2R;
371
                              T1j = ri[WS(rs, 1)];
372
                              T1k = T2 * T1j;
373
                              T1l = ii[WS(rs, 1)];
374
                              T2R = T2 * T1l;
375
                              T1m = FMA(T5, T1l, T1k);
376
                              T2S = FNMS(T5, T1j, T2R);
377
                         }
378
                         {
379
                              E T1i, T1x, T4l, T4m;
380
                              T1i = T19 + T1h;
381
                              T1x = T1m + T1w;
382
                              T1y = T1i - T1x;
383
                              T2w = T1i + T1x;
384
                              T4l = T3E + T3G;
385
                              T4m = T2S + T2U;
386
                              T4n = T4l - T4m;
387
                              T4w = T4l + T4m;
388
                         }
389
                         {
390
                              E T2Q, T2V, T3H, T3I;
391
                              T2Q = T19 - T1h;
392
                              T2V = T2S - T2U;
393
                              T2W = T2Q - T2V;
394
                              T3Q = T2Q + T2V;
395
                              T3H = T3E - T3G;
396
                              T3I = T1m - T1w;
397
                              T3J = T3H + T3I;
398
                              T40 = T3H - T3I;
399
                         }
400
                    }
401
                    {
402
                         E T4p, T4r, TE, T2t, T48, T49, T4q, T4a;
403
                         {
404
                              E T4h, T4o, T1z, T2s;
405
                              T4h = T4d - T4g;
406
                              T4o = T4k - T4n;
407
                              T4p = FNMS(KP618033988, T4o, T4h);
408
                              T4r = FMA(KP618033988, T4h, T4o);
409
                              TE = Te - TD;
410
                              T1z = T11 + T1y;
411
                              T2s = T20 + T2r;
412
                              T2t = T1z + T2s;
413
                              T48 = FNMS(KP250000000, T2t, TE);
414
                              T49 = T1z - T2s;
415
                         }
416
                         ri[WS(rs, 10)] = TE + T2t;
417
                         T4q = FMA(KP559016994, T49, T48);
418
                         ri[WS(rs, 14)] = FNMS(KP951056516, T4r, T4q);
419
                         ri[WS(rs, 6)] = FMA(KP951056516, T4r, T4q);
420
                         T4a = FNMS(KP559016994, T49, T48);
421
                         ri[WS(rs, 2)] = FNMS(KP951056516, T4p, T4a);
422
                         ri[WS(rs, 18)] = FMA(KP951056516, T4p, T4a);
423
                    }
424
                    {
425
                         E T54, T56, T4V, T4Y, T4Z, T50, T55, T51;
426
                         {
427
                              E T52, T53, T4W, T4X;
428
                              T52 = T20 - T2r;
429
                              T53 = T11 - T1y;
430
                              T54 = FNMS(KP618033988, T53, T52);
431
                              T56 = FMA(KP618033988, T52, T53);
432
                              T4V = T4L - T4H;
433
                              T4W = T4k + T4n;
434
                              T4X = T4d + T4g;
435
                              T4Y = T4W + T4X;
436
                              T4Z = FNMS(KP250000000, T4Y, T4V);
437
                              T50 = T4W - T4X;
438
                         }
439
                         ii[WS(rs, 10)] = T4Y + T4V;
440
                         T55 = FMA(KP559016994, T50, T4Z);
441
                         ii[WS(rs, 6)] = FNMS(KP951056516, T56, T55);
442
                         ii[WS(rs, 14)] = FMA(KP951056516, T56, T55);
443
                         T51 = FNMS(KP559016994, T50, T4Z);
444
                         ii[WS(rs, 2)] = FMA(KP951056516, T54, T51);
445
                         ii[WS(rs, 18)] = FNMS(KP951056516, T54, T51);
446
                    }
447
                    {
448
                         E T4B, T4D, T2u, T2B, T4s, T4t, T4C, T4u;
449
                         {
450
                              E T4x, T4A, T2x, T2A;
451
                              T4x = T4v - T4w;
452
                              T4A = T4y - T4z;
453
                              T4B = FMA(KP618033988, T4A, T4x);
454
                              T4D = FNMS(KP618033988, T4x, T4A);
455
                              T2u = Te + TD;
456
                              T2x = T2v + T2w;
457
                              T2A = T2y + T2z;
458
                              T2B = T2x + T2A;
459
                              T4s = FNMS(KP250000000, T2B, T2u);
460
                              T4t = T2x - T2A;
461
                         }
462
                         ri[0] = T2u + T2B;
463
                         T4C = FNMS(KP559016994, T4t, T4s);
464
                         ri[WS(rs, 12)] = FNMS(KP951056516, T4D, T4C);
465
                         ri[WS(rs, 8)] = FMA(KP951056516, T4D, T4C);
466
                         T4u = FMA(KP559016994, T4t, T4s);
467
                         ri[WS(rs, 4)] = FNMS(KP951056516, T4B, T4u);
468
                         ri[WS(rs, 16)] = FMA(KP951056516, T4B, T4u);
469
                    }
470
                    {
471
                         E T4S, T4U, T4M, T4G, T4N, T4O, T4T, T4P;
472
                         {
473
                              E T4Q, T4R, T4E, T4F;
474
                              T4Q = T2v - T2w;
475
                              T4R = T2y - T2z;
476
                              T4S = FMA(KP618033988, T4R, T4Q);
477
                              T4U = FNMS(KP618033988, T4Q, T4R);
478
                              T4M = T4H + T4L;
479
                              T4E = T4v + T4w;
480
                              T4F = T4y + T4z;
481
                              T4G = T4E + T4F;
482
                              T4N = FNMS(KP250000000, T4G, T4M);
483
                              T4O = T4E - T4F;
484
                         }
485
                         ii[0] = T4G + T4M;
486
                         T4T = FNMS(KP559016994, T4O, T4N);
487
                         ii[WS(rs, 8)] = FNMS(KP951056516, T4U, T4T);
488
                         ii[WS(rs, 12)] = FMA(KP951056516, T4U, T4T);
489
                         T4P = FMA(KP559016994, T4O, T4N);
490
                         ii[WS(rs, 4)] = FMA(KP951056516, T4S, T4P);
491
                         ii[WS(rs, 16)] = FNMS(KP951056516, T4S, T4P);
492
                    }
493
                    {
494
                         E T3L, T3N, T2I, T3d, T3e, T3f, T3M, T3g;
495
                         {
496
                              E T3v, T3K, T2X, T3c;
497
                              T3v = T3n - T3u;
498
                              T3K = T3C - T3J;
499
                              T3L = FNMS(KP618033988, T3K, T3v);
500
                              T3N = FMA(KP618033988, T3v, T3K);
501
                              T2I = T2C - T2H;
502
                              T2X = T2P + T2W;
503
                              T3c = T34 + T3b;
504
                              T3d = T2X + T3c;
505
                              T3e = FNMS(KP250000000, T3d, T2I);
506
                              T3f = T2X - T3c;
507
                         }
508
                         ri[WS(rs, 15)] = T2I + T3d;
509
                         T3M = FMA(KP559016994, T3f, T3e);
510
                         ri[WS(rs, 11)] = FMA(KP951056516, T3N, T3M);
511
                         ri[WS(rs, 19)] = FNMS(KP951056516, T3N, T3M);
512
                         T3g = FNMS(KP559016994, T3f, T3e);
513
                         ri[WS(rs, 3)] = FMA(KP951056516, T3L, T3g);
514
                         ri[WS(rs, 7)] = FNMS(KP951056516, T3L, T3g);
515
                    }
516
                    {
517
                         E T5u, T5w, T5l, T5o, T5p, T5q, T5v, T5r;
518
                         {
519
                              E T5s, T5t, T5m, T5n;
520
                              T5s = T34 - T3b;
521
                              T5t = T2P - T2W;
522
                              T5u = FNMS(KP618033988, T5t, T5s);
523
                              T5w = FMA(KP618033988, T5s, T5t);
524
                              T5l = T58 + T57;
525
                              T5m = T3C + T3J;
526
                              T5n = T3n + T3u;
527
                              T5o = T5m + T5n;
528
                              T5p = FNMS(KP250000000, T5o, T5l);
529
                              T5q = T5m - T5n;
530
                         }
531
                         ii[WS(rs, 15)] = T5o + T5l;
532
                         T5v = FMA(KP559016994, T5q, T5p);
533
                         ii[WS(rs, 11)] = FNMS(KP951056516, T5w, T5v);
534
                         ii[WS(rs, 19)] = FMA(KP951056516, T5w, T5v);
535
                         T5r = FNMS(KP559016994, T5q, T5p);
536
                         ii[WS(rs, 3)] = FNMS(KP951056516, T5u, T5r);
537
                         ii[WS(rs, 7)] = FMA(KP951056516, T5u, T5r);
538
                    }
539
                    {
540
                         E T45, T47, T3O, T3V, T3W, T3X, T46, T3Y;
541
                         {
542
                              E T41, T44, T3R, T3U;
543
                              T41 = T3Z - T40;
544
                              T44 = T42 - T43;
545
                              T45 = FMA(KP618033988, T44, T41);
546
                              T47 = FNMS(KP618033988, T41, T44);
547
                              T3O = T2C + T2H;
548
                              T3R = T3P + T3Q;
549
                              T3U = T3S + T3T;
550
                              T3V = T3R + T3U;
551
                              T3W = FNMS(KP250000000, T3V, T3O);
552
                              T3X = T3R - T3U;
553
                         }
554
                         ri[WS(rs, 5)] = T3O + T3V;
555
                         T46 = FNMS(KP559016994, T3X, T3W);
556
                         ri[WS(rs, 13)] = FMA(KP951056516, T47, T46);
557
                         ri[WS(rs, 17)] = FNMS(KP951056516, T47, T46);
558
                         T3Y = FMA(KP559016994, T3X, T3W);
559
                         ri[WS(rs, 1)] = FMA(KP951056516, T45, T3Y);
560
                         ri[WS(rs, 9)] = FNMS(KP951056516, T45, T3Y);
561
                    }
562
                    {
563
                         E T5i, T5k, T59, T5c, T5d, T5e, T5j, T5f;
564
                         {
565
                              E T5g, T5h, T5a, T5b;
566
                              T5g = T3P - T3Q;
567
                              T5h = T3S - T3T;
568
                              T5i = FMA(KP618033988, T5h, T5g);
569
                              T5k = FNMS(KP618033988, T5g, T5h);
570
                              T59 = T57 - T58;
571
                              T5a = T3Z + T40;
572
                              T5b = T42 + T43;
573
                              T5c = T5a + T5b;
574
                              T5d = FNMS(KP250000000, T5c, T59);
575
                              T5e = T5a - T5b;
576
                         }
577
                         ii[WS(rs, 5)] = T5c + T59;
578
                         T5j = FNMS(KP559016994, T5e, T5d);
579
                         ii[WS(rs, 13)] = FNMS(KP951056516, T5k, T5j);
580
                         ii[WS(rs, 17)] = FMA(KP951056516, T5k, T5j);
581
                         T5f = FMA(KP559016994, T5e, T5d);
582
                         ii[WS(rs, 1)] = FNMS(KP951056516, T5i, T5f);
583
                         ii[WS(rs, 9)] = FMA(KP951056516, T5i, T5f);
584
                    }
585
               }
586
          }
587
     }
588
}
589

    
590
static const tw_instr twinstr[] = {
591
     {TW_CEXP, 0, 1},
592
     {TW_CEXP, 0, 3},
593
     {TW_CEXP, 0, 9},
594
     {TW_CEXP, 0, 19},
595
     {TW_NEXT, 1, 0}
596
};
597

    
598
static const ct_desc desc = { 20, "t2_20", twinstr, &GENUS, {136, 58, 140, 0}, 0, 0, 0 };
599

    
600
void X(codelet_t2_20) (planner *p) {
601
     X(kdft_dit_register) (p, t2_20, &desc);
602
}
603
#else
604

    
605
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -name t2_20 -include dft/scalar/t.h */
606

    
607
/*
608
 * This function contains 276 FP additions, 164 FP multiplications,
609
 * (or, 204 additions, 92 multiplications, 72 fused multiply/add),
610
 * 123 stack variables, 4 constants, and 80 memory accesses
611
 */
612
#include "dft/scalar/t.h"
613

    
614
static void t2_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
615
{
616
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
617
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
618
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
619
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
620
     {
621
          INT m;
622
          for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(40, rs)) {
623
               E T2, T5, Tg, Ti, Tk, To, T1h, T1f, T6, T3, T8, T14, T1Q, Tc, T1O;
624
               E T1v, T18, T1t, T1n, T24, T1j, T22, Tq, Tu, T1E, T1G, Tx, Ty, Tz, TJ;
625
               E T1Z, TB, T1X, T1A, TZ, TL, T1y, TX;
626
               {
627
                    E T7, T16, Ta, T13, T4, T17, Tb, T12;
628
                    {
629
                         E Th, Tn, Tj, Tm;
630
                         T2 = W[0];
631
                         T5 = W[1];
632
                         Tg = W[2];
633
                         Ti = W[3];
634
                         Th = T2 * Tg;
635
                         Tn = T5 * Tg;
636
                         Tj = T5 * Ti;
637
                         Tm = T2 * Ti;
638
                         Tk = Th - Tj;
639
                         To = Tm + Tn;
640
                         T1h = Tm - Tn;
641
                         T1f = Th + Tj;
642
                         T6 = W[5];
643
                         T7 = T5 * T6;
644
                         T16 = Tg * T6;
645
                         Ta = T2 * T6;
646
                         T13 = Ti * T6;
647
                         T3 = W[4];
648
                         T4 = T2 * T3;
649
                         T17 = Ti * T3;
650
                         Tb = T5 * T3;
651
                         T12 = Tg * T3;
652
                    }
653
                    T8 = T4 - T7;
654
                    T14 = T12 + T13;
655
                    T1Q = T16 + T17;
656
                    Tc = Ta + Tb;
657
                    T1O = T12 - T13;
658
                    T1v = Ta - Tb;
659
                    T18 = T16 - T17;
660
                    T1t = T4 + T7;
661
                    {
662
                         E T1l, T1m, T1g, T1i;
663
                         T1l = T1f * T6;
664
                         T1m = T1h * T3;
665
                         T1n = T1l + T1m;
666
                         T24 = T1l - T1m;
667
                         T1g = T1f * T3;
668
                         T1i = T1h * T6;
669
                         T1j = T1g - T1i;
670
                         T22 = T1g + T1i;
671
                         {
672
                              E Tl, Tp, Ts, Tt;
673
                              Tl = Tk * T3;
674
                              Tp = To * T6;
675
                              Tq = Tl + Tp;
676
                              Ts = Tk * T6;
677
                              Tt = To * T3;
678
                              Tu = Ts - Tt;
679
                              T1E = Tl - Tp;
680
                              T1G = Ts + Tt;
681
                              Tx = W[6];
682
                              Ty = W[7];
683
                              Tz = FMA(Tk, Tx, To * Ty);
684
                              TJ = FMA(Tq, Tx, Tu * Ty);
685
                              T1Z = FNMS(T1h, Tx, T1f * Ty);
686
                              TB = FNMS(To, Tx, Tk * Ty);
687
                              T1X = FMA(T1f, Tx, T1h * Ty);
688
                              T1A = FNMS(T5, Tx, T2 * Ty);
689
                              TZ = FNMS(Ti, Tx, Tg * Ty);
690
                              TL = FNMS(Tu, Tx, Tq * Ty);
691
                              T1y = FMA(T2, Tx, T5 * Ty);
692
                              TX = FMA(Tg, Tx, Ti * Ty);
693
                         }
694
                    }
695
               }
696
               {
697
                    E TF, T2b, T4A, T4J, T2K, T3r, T4a, T4m, T1N, T28, T29, T3C, T3F, T4o, T3X;
698
                    E T3Y, T44, T2f, T2g, T2h, T2n, T2s, T4L, T3g, T3h, T4w, T3n, T3o, T3p, T30;
699
                    E T35, T36, TW, T1r, T1s, T3J, T3M, T4n, T3U, T3V, T43, T2c, T2d, T2e, T2y;
700
                    E T2D, T4K, T3d, T3e, T4v, T3k, T3l, T3m, T2P, T2U, T2V;
701
                    {
702
                         E T1, T48, Te, T47, Tw, T2H, TD, T2I, T9, Td;
703
                         T1 = ri[0];
704
                         T48 = ii[0];
705
                         T9 = ri[WS(rs, 10)];
706
                         Td = ii[WS(rs, 10)];
707
                         Te = FMA(T8, T9, Tc * Td);
708
                         T47 = FNMS(Tc, T9, T8 * Td);
709
                         {
710
                              E Tr, Tv, TA, TC;
711
                              Tr = ri[WS(rs, 5)];
712
                              Tv = ii[WS(rs, 5)];
713
                              Tw = FMA(Tq, Tr, Tu * Tv);
714
                              T2H = FNMS(Tu, Tr, Tq * Tv);
715
                              TA = ri[WS(rs, 15)];
716
                              TC = ii[WS(rs, 15)];
717
                              TD = FMA(Tz, TA, TB * TC);
718
                              T2I = FNMS(TB, TA, Tz * TC);
719
                         }
720
                         {
721
                              E Tf, TE, T4y, T4z;
722
                              Tf = T1 + Te;
723
                              TE = Tw + TD;
724
                              TF = Tf - TE;
725
                              T2b = Tf + TE;
726
                              T4y = T48 - T47;
727
                              T4z = Tw - TD;
728
                              T4A = T4y - T4z;
729
                              T4J = T4z + T4y;
730
                         }
731
                         {
732
                              E T2G, T2J, T46, T49;
733
                              T2G = T1 - Te;
734
                              T2J = T2H - T2I;
735
                              T2K = T2G - T2J;
736
                              T3r = T2G + T2J;
737
                              T46 = T2H + T2I;
738
                              T49 = T47 + T48;
739
                              T4a = T46 + T49;
740
                              T4m = T49 - T46;
741
                         }
742
                    }
743
                    {
744
                         E T1D, T3A, T2l, T2W, T27, T3E, T2r, T34, T1M, T3B, T2m, T2Z, T1W, T3D, T2q;
745
                         E T31;
746
                         {
747
                              E T1x, T2j, T1C, T2k;
748
                              {
749
                                   E T1u, T1w, T1z, T1B;
750
                                   T1u = ri[WS(rs, 8)];
751
                                   T1w = ii[WS(rs, 8)];
752
                                   T1x = FMA(T1t, T1u, T1v * T1w);
753
                                   T2j = FNMS(T1v, T1u, T1t * T1w);
754
                                   T1z = ri[WS(rs, 18)];
755
                                   T1B = ii[WS(rs, 18)];
756
                                   T1C = FMA(T1y, T1z, T1A * T1B);
757
                                   T2k = FNMS(T1A, T1z, T1y * T1B);
758
                              }
759
                              T1D = T1x + T1C;
760
                              T3A = T2j + T2k;
761
                              T2l = T2j - T2k;
762
                              T2W = T1x - T1C;
763
                         }
764
                         {
765
                              E T21, T32, T26, T33;
766
                              {
767
                                   E T1Y, T20, T23, T25;
768
                                   T1Y = ri[WS(rs, 17)];
769
                                   T20 = ii[WS(rs, 17)];
770
                                   T21 = FMA(T1X, T1Y, T1Z * T20);
771
                                   T32 = FNMS(T1Z, T1Y, T1X * T20);
772
                                   T23 = ri[WS(rs, 7)];
773
                                   T25 = ii[WS(rs, 7)];
774
                                   T26 = FMA(T22, T23, T24 * T25);
775
                                   T33 = FNMS(T24, T23, T22 * T25);
776
                              }
777
                              T27 = T21 + T26;
778
                              T3E = T32 + T33;
779
                              T2r = T21 - T26;
780
                              T34 = T32 - T33;
781
                         }
782
                         {
783
                              E T1I, T2X, T1L, T2Y;
784
                              {
785
                                   E T1F, T1H, T1J, T1K;
786
                                   T1F = ri[WS(rs, 13)];
787
                                   T1H = ii[WS(rs, 13)];
788
                                   T1I = FMA(T1E, T1F, T1G * T1H);
789
                                   T2X = FNMS(T1G, T1F, T1E * T1H);
790
                                   T1J = ri[WS(rs, 3)];
791
                                   T1K = ii[WS(rs, 3)];
792
                                   T1L = FMA(Tg, T1J, Ti * T1K);
793
                                   T2Y = FNMS(Ti, T1J, Tg * T1K);
794
                              }
795
                              T1M = T1I + T1L;
796
                              T3B = T2X + T2Y;
797
                              T2m = T1I - T1L;
798
                              T2Z = T2X - T2Y;
799
                         }
800
                         {
801
                              E T1S, T2o, T1V, T2p;
802
                              {
803
                                   E T1P, T1R, T1T, T1U;
804
                                   T1P = ri[WS(rs, 12)];
805
                                   T1R = ii[WS(rs, 12)];
806
                                   T1S = FMA(T1O, T1P, T1Q * T1R);
807
                                   T2o = FNMS(T1Q, T1P, T1O * T1R);
808
                                   T1T = ri[WS(rs, 2)];
809
                                   T1U = ii[WS(rs, 2)];
810
                                   T1V = FMA(T1f, T1T, T1h * T1U);
811
                                   T2p = FNMS(T1h, T1T, T1f * T1U);
812
                              }
813
                              T1W = T1S + T1V;
814
                              T3D = T2o + T2p;
815
                              T2q = T2o - T2p;
816
                              T31 = T1S - T1V;
817
                         }
818
                         T1N = T1D - T1M;
819
                         T28 = T1W - T27;
820
                         T29 = T1N + T28;
821
                         T3C = T3A - T3B;
822
                         T3F = T3D - T3E;
823
                         T4o = T3C + T3F;
824
                         T3X = T3A + T3B;
825
                         T3Y = T3D + T3E;
826
                         T44 = T3X + T3Y;
827
                         T2f = T1D + T1M;
828
                         T2g = T1W + T27;
829
                         T2h = T2f + T2g;
830
                         T2n = T2l + T2m;
831
                         T2s = T2q + T2r;
832
                         T4L = T2n + T2s;
833
                         T3g = T2l - T2m;
834
                         T3h = T2q - T2r;
835
                         T4w = T3g + T3h;
836
                         T3n = T2W + T2Z;
837
                         T3o = T31 + T34;
838
                         T3p = T3n + T3o;
839
                         T30 = T2W - T2Z;
840
                         T35 = T31 - T34;
841
                         T36 = T30 + T35;
842
                    }
843
                    {
844
                         E TO, T3H, T2w, T2L, T1q, T3L, T2C, T2T, TV, T3I, T2x, T2O, T1b, T3K, T2B;
845
                         E T2Q;
846
                         {
847
                              E TI, T2u, TN, T2v;
848
                              {
849
                                   E TG, TH, TK, TM;
850
                                   TG = ri[WS(rs, 4)];
851
                                   TH = ii[WS(rs, 4)];
852
                                   TI = FMA(Tk, TG, To * TH);
853
                                   T2u = FNMS(To, TG, Tk * TH);
854
                                   TK = ri[WS(rs, 14)];
855
                                   TM = ii[WS(rs, 14)];
856
                                   TN = FMA(TJ, TK, TL * TM);
857
                                   T2v = FNMS(TL, TK, TJ * TM);
858
                              }
859
                              TO = TI + TN;
860
                              T3H = T2u + T2v;
861
                              T2w = T2u - T2v;
862
                              T2L = TI - TN;
863
                         }
864
                         {
865
                              E T1e, T2R, T1p, T2S;
866
                              {
867
                                   E T1c, T1d, T1k, T1o;
868
                                   T1c = ri[WS(rs, 1)];
869
                                   T1d = ii[WS(rs, 1)];
870
                                   T1e = FMA(T2, T1c, T5 * T1d);
871
                                   T2R = FNMS(T5, T1c, T2 * T1d);
872
                                   T1k = ri[WS(rs, 11)];
873
                                   T1o = ii[WS(rs, 11)];
874
                                   T1p = FMA(T1j, T1k, T1n * T1o);
875
                                   T2S = FNMS(T1n, T1k, T1j * T1o);
876
                              }
877
                              T1q = T1e + T1p;
878
                              T3L = T2R + T2S;
879
                              T2C = T1e - T1p;
880
                              T2T = T2R - T2S;
881
                         }
882
                         {
883
                              E TR, T2M, TU, T2N;
884
                              {
885
                                   E TP, TQ, TS, TT;
886
                                   TP = ri[WS(rs, 9)];
887
                                   TQ = ii[WS(rs, 9)];
888
                                   TR = FMA(T3, TP, T6 * TQ);
889
                                   T2M = FNMS(T6, TP, T3 * TQ);
890
                                   TS = ri[WS(rs, 19)];
891
                                   TT = ii[WS(rs, 19)];
892
                                   TU = FMA(Tx, TS, Ty * TT);
893
                                   T2N = FNMS(Ty, TS, Tx * TT);
894
                              }
895
                              TV = TR + TU;
896
                              T3I = T2M + T2N;
897
                              T2x = TR - TU;
898
                              T2O = T2M - T2N;
899
                         }
900
                         {
901
                              E T11, T2z, T1a, T2A;
902
                              {
903
                                   E TY, T10, T15, T19;
904
                                   TY = ri[WS(rs, 16)];
905
                                   T10 = ii[WS(rs, 16)];
906
                                   T11 = FMA(TX, TY, TZ * T10);
907
                                   T2z = FNMS(TZ, TY, TX * T10);
908
                                   T15 = ri[WS(rs, 6)];
909
                                   T19 = ii[WS(rs, 6)];
910
                                   T1a = FMA(T14, T15, T18 * T19);
911
                                   T2A = FNMS(T18, T15, T14 * T19);
912
                              }
913
                              T1b = T11 + T1a;
914
                              T3K = T2z + T2A;
915
                              T2B = T2z - T2A;
916
                              T2Q = T11 - T1a;
917
                         }
918
                         TW = TO - TV;
919
                         T1r = T1b - T1q;
920
                         T1s = TW + T1r;
921
                         T3J = T3H - T3I;
922
                         T3M = T3K - T3L;
923
                         T4n = T3J + T3M;
924
                         T3U = T3H + T3I;
925
                         T3V = T3K + T3L;
926
                         T43 = T3U + T3V;
927
                         T2c = TO + TV;
928
                         T2d = T1b + T1q;
929
                         T2e = T2c + T2d;
930
                         T2y = T2w + T2x;
931
                         T2D = T2B + T2C;
932
                         T4K = T2y + T2D;
933
                         T3d = T2w - T2x;
934
                         T3e = T2B - T2C;
935
                         T4v = T3d + T3e;
936
                         T3k = T2L + T2O;
937
                         T3l = T2Q + T2T;
938
                         T3m = T3k + T3l;
939
                         T2P = T2L - T2O;
940
                         T2U = T2Q - T2T;
941
                         T2V = T2P + T2U;
942
                    }
943
                    {
944
                         E T3y, T2a, T3x, T3O, T3Q, T3G, T3N, T3P, T3z;
945
                         T3y = KP559016994 * (T1s - T29);
946
                         T2a = T1s + T29;
947
                         T3x = FNMS(KP250000000, T2a, TF);
948
                         T3G = T3C - T3F;
949
                         T3N = T3J - T3M;
950
                         T3O = FNMS(KP587785252, T3N, KP951056516 * T3G);
951
                         T3Q = FMA(KP951056516, T3N, KP587785252 * T3G);
952
                         ri[WS(rs, 10)] = TF + T2a;
953
                         T3P = T3y + T3x;
954
                         ri[WS(rs, 14)] = T3P - T3Q;
955
                         ri[WS(rs, 6)] = T3P + T3Q;
956
                         T3z = T3x - T3y;
957
                         ri[WS(rs, 2)] = T3z - T3O;
958
                         ri[WS(rs, 18)] = T3z + T3O;
959
                    }
960
                    {
961
                         E T4r, T4p, T4q, T4l, T4u, T4j, T4k, T4t, T4s;
962
                         T4r = KP559016994 * (T4n - T4o);
963
                         T4p = T4n + T4o;
964
                         T4q = FNMS(KP250000000, T4p, T4m);
965
                         T4j = T1N - T28;
966
                         T4k = TW - T1r;
967
                         T4l = FNMS(KP587785252, T4k, KP951056516 * T4j);
968
                         T4u = FMA(KP951056516, T4k, KP587785252 * T4j);
969
                         ii[WS(rs, 10)] = T4p + T4m;
970
                         T4t = T4r + T4q;
971
                         ii[WS(rs, 6)] = T4t - T4u;
972
                         ii[WS(rs, 14)] = T4u + T4t;
973
                         T4s = T4q - T4r;
974
                         ii[WS(rs, 2)] = T4l + T4s;
975
                         ii[WS(rs, 18)] = T4s - T4l;
976
                    }
977
                    {
978
                         E T3R, T2i, T3S, T40, T42, T3W, T3Z, T41, T3T;
979
                         T3R = KP559016994 * (T2e - T2h);
980
                         T2i = T2e + T2h;
981
                         T3S = FNMS(KP250000000, T2i, T2b);
982
                         T3W = T3U - T3V;
983
                         T3Z = T3X - T3Y;
984
                         T40 = FMA(KP951056516, T3W, KP587785252 * T3Z);
985
                         T42 = FNMS(KP587785252, T3W, KP951056516 * T3Z);
986
                         ri[0] = T2b + T2i;
987
                         T41 = T3S - T3R;
988
                         ri[WS(rs, 12)] = T41 - T42;
989
                         ri[WS(rs, 8)] = T41 + T42;
990
                         T3T = T3R + T3S;
991
                         ri[WS(rs, 4)] = T3T - T40;
992
                         ri[WS(rs, 16)] = T3T + T40;
993
                    }
994
                    {
995
                         E T4e, T45, T4f, T4d, T4i, T4b, T4c, T4h, T4g;
996
                         T4e = KP559016994 * (T43 - T44);
997
                         T45 = T43 + T44;
998
                         T4f = FNMS(KP250000000, T45, T4a);
999
                         T4b = T2c - T2d;
1000
                         T4c = T2f - T2g;
1001
                         T4d = FMA(KP951056516, T4b, KP587785252 * T4c);
1002
                         T4i = FNMS(KP587785252, T4b, KP951056516 * T4c);
1003
                         ii[0] = T45 + T4a;
1004
                         T4h = T4f - T4e;
1005
                         ii[WS(rs, 8)] = T4h - T4i;
1006
                         ii[WS(rs, 12)] = T4i + T4h;
1007
                         T4g = T4e + T4f;
1008
                         ii[WS(rs, 4)] = T4d + T4g;
1009
                         ii[WS(rs, 16)] = T4g - T4d;
1010
                    }
1011
                    {
1012
                         E T39, T37, T38, T2F, T3b, T2t, T2E, T3c, T3a;
1013
                         T39 = KP559016994 * (T2V - T36);
1014
                         T37 = T2V + T36;
1015
                         T38 = FNMS(KP250000000, T37, T2K);
1016
                         T2t = T2n - T2s;
1017
                         T2E = T2y - T2D;
1018
                         T2F = FNMS(KP587785252, T2E, KP951056516 * T2t);
1019
                         T3b = FMA(KP951056516, T2E, KP587785252 * T2t);
1020
                         ri[WS(rs, 15)] = T2K + T37;
1021
                         T3c = T39 + T38;
1022
                         ri[WS(rs, 11)] = T3b + T3c;
1023
                         ri[WS(rs, 19)] = T3c - T3b;
1024
                         T3a = T38 - T39;
1025
                         ri[WS(rs, 3)] = T2F + T3a;
1026
                         ri[WS(rs, 7)] = T3a - T2F;
1027
                    }
1028
                    {
1029
                         E T4O, T4M, T4N, T4S, T4U, T4Q, T4R, T4T, T4P;
1030
                         T4O = KP559016994 * (T4K - T4L);
1031
                         T4M = T4K + T4L;
1032
                         T4N = FNMS(KP250000000, T4M, T4J);
1033
                         T4Q = T30 - T35;
1034
                         T4R = T2P - T2U;
1035
                         T4S = FNMS(KP587785252, T4R, KP951056516 * T4Q);
1036
                         T4U = FMA(KP951056516, T4R, KP587785252 * T4Q);
1037
                         ii[WS(rs, 15)] = T4M + T4J;
1038
                         T4T = T4O + T4N;
1039
                         ii[WS(rs, 11)] = T4T - T4U;
1040
                         ii[WS(rs, 19)] = T4U + T4T;
1041
                         T4P = T4N - T4O;
1042
                         ii[WS(rs, 3)] = T4P - T4S;
1043
                         ii[WS(rs, 7)] = T4S + T4P;
1044
                    }
1045
                    {
1046
                         E T3q, T3s, T3t, T3j, T3v, T3f, T3i, T3w, T3u;
1047
                         T3q = KP559016994 * (T3m - T3p);
1048
                         T3s = T3m + T3p;
1049
                         T3t = FNMS(KP250000000, T3s, T3r);
1050
                         T3f = T3d - T3e;
1051
                         T3i = T3g - T3h;
1052
                         T3j = FMA(KP951056516, T3f, KP587785252 * T3i);
1053
                         T3v = FNMS(KP587785252, T3f, KP951056516 * T3i);
1054
                         ri[WS(rs, 5)] = T3r + T3s;
1055
                         T3w = T3t - T3q;
1056
                         ri[WS(rs, 13)] = T3v + T3w;
1057
                         ri[WS(rs, 17)] = T3w - T3v;
1058
                         T3u = T3q + T3t;
1059
                         ri[WS(rs, 1)] = T3j + T3u;
1060
                         ri[WS(rs, 9)] = T3u - T3j;
1061
                    }
1062
                    {
1063
                         E T4x, T4B, T4C, T4G, T4I, T4E, T4F, T4H, T4D;
1064
                         T4x = KP559016994 * (T4v - T4w);
1065
                         T4B = T4v + T4w;
1066
                         T4C = FNMS(KP250000000, T4B, T4A);
1067
                         T4E = T3k - T3l;
1068
                         T4F = T3n - T3o;
1069
                         T4G = FMA(KP951056516, T4E, KP587785252 * T4F);
1070
                         T4I = FNMS(KP587785252, T4E, KP951056516 * T4F);
1071
                         ii[WS(rs, 5)] = T4B + T4A;
1072
                         T4H = T4C - T4x;
1073
                         ii[WS(rs, 13)] = T4H - T4I;
1074
                         ii[WS(rs, 17)] = T4I + T4H;
1075
                         T4D = T4x + T4C;
1076
                         ii[WS(rs, 1)] = T4D - T4G;
1077
                         ii[WS(rs, 9)] = T4G + T4D;
1078
                    }
1079
               }
1080
          }
1081
     }
1082
}
1083

    
1084
static const tw_instr twinstr[] = {
1085
     {TW_CEXP, 0, 1},
1086
     {TW_CEXP, 0, 3},
1087
     {TW_CEXP, 0, 9},
1088
     {TW_CEXP, 0, 19},
1089
     {TW_NEXT, 1, 0}
1090
};
1091

    
1092
static const ct_desc desc = { 20, "t2_20", twinstr, &GENUS, {204, 92, 72, 0}, 0, 0, 0 };
1093

    
1094
void X(codelet_t2_20) (planner *p) {
1095
     X(kdft_dit_register) (p, t2_20, &desc);
1096
}
1097
#endif