To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_32.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (43.6 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:15 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -name t1_32 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 434 FP additions, 260 FP multiplications,
32
 * (or, 236 additions, 62 multiplications, 198 fused multiply/add),
33
 * 102 stack variables, 7 constants, and 128 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
40
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
41
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
42
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
43
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
44
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
45
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
46
     {
47
          INT m;
48
          for (m = mb, W = W + (mb * 62); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
49
               E T8, T8x, T3w, T87, Tl, T8y, T3B, T83, Tz, T6F, T3J, T5T, TM, T6G, T3Q;
50
               E T5U, T11, T1e, T6M, T6J, T6K, T6L, T3Z, T5X, T46, T5Y, T1s, T1F, T6O, T6P;
51
               E T6Q, T6R, T4e, T60, T4l, T61, T32, T7b, T78, T7N, T54, T6f, T5r, T6c, T29;
52
               E T70, T6X, T7I, T4v, T68, T4S, T65, T3t, T79, T7e, T7O, T5b, T5s, T5i, T5t;
53
               E T2A, T6Y, T73, T7J, T4C, T4T, T4J, T4U;
54
               {
55
                    E T1, T86, T3, T6, T4, T84, T2, T7, T85, T5;
56
                    T1 = ri[0];
57
                    T86 = ii[0];
58
                    T3 = ri[WS(rs, 16)];
59
                    T6 = ii[WS(rs, 16)];
60
                    T2 = W[30];
61
                    T4 = T2 * T3;
62
                    T84 = T2 * T6;
63
                    T5 = W[31];
64
                    T7 = FMA(T5, T6, T4);
65
                    T85 = FNMS(T5, T3, T84);
66
                    T8 = T1 + T7;
67
                    T8x = T86 - T85;
68
                    T3w = T1 - T7;
69
                    T87 = T85 + T86;
70
               }
71
               {
72
                    E Ta, Td, Tb, T3x, Tg, Tj, Th, T3z, T9, Tf;
73
                    Ta = ri[WS(rs, 8)];
74
                    Td = ii[WS(rs, 8)];
75
                    T9 = W[14];
76
                    Tb = T9 * Ta;
77
                    T3x = T9 * Td;
78
                    Tg = ri[WS(rs, 24)];
79
                    Tj = ii[WS(rs, 24)];
80
                    Tf = W[46];
81
                    Th = Tf * Tg;
82
                    T3z = Tf * Tj;
83
                    {
84
                         E Te, T3y, Tk, T3A, Tc, Ti;
85
                         Tc = W[15];
86
                         Te = FMA(Tc, Td, Tb);
87
                         T3y = FNMS(Tc, Ta, T3x);
88
                         Ti = W[47];
89
                         Tk = FMA(Ti, Tj, Th);
90
                         T3A = FNMS(Ti, Tg, T3z);
91
                         Tl = Te + Tk;
92
                         T8y = Te - Tk;
93
                         T3B = T3y - T3A;
94
                         T83 = T3y + T3A;
95
                    }
96
               }
97
               {
98
                    E Ts, T3F, Ty, T3H, T3D, T3I;
99
                    {
100
                         E To, Tr, Tp, T3E, Tn, Tq;
101
                         To = ri[WS(rs, 4)];
102
                         Tr = ii[WS(rs, 4)];
103
                         Tn = W[6];
104
                         Tp = Tn * To;
105
                         T3E = Tn * Tr;
106
                         Tq = W[7];
107
                         Ts = FMA(Tq, Tr, Tp);
108
                         T3F = FNMS(Tq, To, T3E);
109
                    }
110
                    {
111
                         E Tu, Tx, Tv, T3G, Tt, Tw;
112
                         Tu = ri[WS(rs, 20)];
113
                         Tx = ii[WS(rs, 20)];
114
                         Tt = W[38];
115
                         Tv = Tt * Tu;
116
                         T3G = Tt * Tx;
117
                         Tw = W[39];
118
                         Ty = FMA(Tw, Tx, Tv);
119
                         T3H = FNMS(Tw, Tu, T3G);
120
                    }
121
                    Tz = Ts + Ty;
122
                    T6F = T3F + T3H;
123
                    T3D = Ts - Ty;
124
                    T3I = T3F - T3H;
125
                    T3J = T3D + T3I;
126
                    T5T = T3I - T3D;
127
               }
128
               {
129
                    E TF, T3M, TL, T3O, T3K, T3P;
130
                    {
131
                         E TB, TE, TC, T3L, TA, TD;
132
                         TB = ri[WS(rs, 28)];
133
                         TE = ii[WS(rs, 28)];
134
                         TA = W[54];
135
                         TC = TA * TB;
136
                         T3L = TA * TE;
137
                         TD = W[55];
138
                         TF = FMA(TD, TE, TC);
139
                         T3M = FNMS(TD, TB, T3L);
140
                    }
141
                    {
142
                         E TH, TK, TI, T3N, TG, TJ;
143
                         TH = ri[WS(rs, 12)];
144
                         TK = ii[WS(rs, 12)];
145
                         TG = W[22];
146
                         TI = TG * TH;
147
                         T3N = TG * TK;
148
                         TJ = W[23];
149
                         TL = FMA(TJ, TK, TI);
150
                         T3O = FNMS(TJ, TH, T3N);
151
                    }
152
                    TM = TF + TL;
153
                    T6G = T3M + T3O;
154
                    T3K = TF - TL;
155
                    T3P = T3M - T3O;
156
                    T3Q = T3K - T3P;
157
                    T5U = T3K + T3P;
158
               }
159
               {
160
                    E TU, T3U, T1d, T44, T10, T3W, T17, T42;
161
                    {
162
                         E TQ, TT, TR, T3T, TP, TS;
163
                         TQ = ri[WS(rs, 2)];
164
                         TT = ii[WS(rs, 2)];
165
                         TP = W[2];
166
                         TR = TP * TQ;
167
                         T3T = TP * TT;
168
                         TS = W[3];
169
                         TU = FMA(TS, TT, TR);
170
                         T3U = FNMS(TS, TQ, T3T);
171
                    }
172
                    {
173
                         E T19, T1c, T1a, T43, T18, T1b;
174
                         T19 = ri[WS(rs, 26)];
175
                         T1c = ii[WS(rs, 26)];
176
                         T18 = W[50];
177
                         T1a = T18 * T19;
178
                         T43 = T18 * T1c;
179
                         T1b = W[51];
180
                         T1d = FMA(T1b, T1c, T1a);
181
                         T44 = FNMS(T1b, T19, T43);
182
                    }
183
                    {
184
                         E TW, TZ, TX, T3V, TV, TY;
185
                         TW = ri[WS(rs, 18)];
186
                         TZ = ii[WS(rs, 18)];
187
                         TV = W[34];
188
                         TX = TV * TW;
189
                         T3V = TV * TZ;
190
                         TY = W[35];
191
                         T10 = FMA(TY, TZ, TX);
192
                         T3W = FNMS(TY, TW, T3V);
193
                    }
194
                    {
195
                         E T13, T16, T14, T41, T12, T15;
196
                         T13 = ri[WS(rs, 10)];
197
                         T16 = ii[WS(rs, 10)];
198
                         T12 = W[18];
199
                         T14 = T12 * T13;
200
                         T41 = T12 * T16;
201
                         T15 = W[19];
202
                         T17 = FMA(T15, T16, T14);
203
                         T42 = FNMS(T15, T13, T41);
204
                    }
205
                    T11 = TU + T10;
206
                    T1e = T17 + T1d;
207
                    T6M = T11 - T1e;
208
                    T6J = T3U + T3W;
209
                    T6K = T42 + T44;
210
                    T6L = T6J - T6K;
211
                    {
212
                         E T3X, T3Y, T40, T45;
213
                         T3X = T3U - T3W;
214
                         T3Y = T17 - T1d;
215
                         T3Z = T3X - T3Y;
216
                         T5X = T3X + T3Y;
217
                         T40 = TU - T10;
218
                         T45 = T42 - T44;
219
                         T46 = T40 + T45;
220
                         T5Y = T40 - T45;
221
                    }
222
               }
223
               {
224
                    E T1l, T49, T1E, T4j, T1r, T4b, T1y, T4h;
225
                    {
226
                         E T1h, T1k, T1i, T48, T1g, T1j;
227
                         T1h = ri[WS(rs, 30)];
228
                         T1k = ii[WS(rs, 30)];
229
                         T1g = W[58];
230
                         T1i = T1g * T1h;
231
                         T48 = T1g * T1k;
232
                         T1j = W[59];
233
                         T1l = FMA(T1j, T1k, T1i);
234
                         T49 = FNMS(T1j, T1h, T48);
235
                    }
236
                    {
237
                         E T1A, T1D, T1B, T4i, T1z, T1C;
238
                         T1A = ri[WS(rs, 22)];
239
                         T1D = ii[WS(rs, 22)];
240
                         T1z = W[42];
241
                         T1B = T1z * T1A;
242
                         T4i = T1z * T1D;
243
                         T1C = W[43];
244
                         T1E = FMA(T1C, T1D, T1B);
245
                         T4j = FNMS(T1C, T1A, T4i);
246
                    }
247
                    {
248
                         E T1n, T1q, T1o, T4a, T1m, T1p;
249
                         T1n = ri[WS(rs, 14)];
250
                         T1q = ii[WS(rs, 14)];
251
                         T1m = W[26];
252
                         T1o = T1m * T1n;
253
                         T4a = T1m * T1q;
254
                         T1p = W[27];
255
                         T1r = FMA(T1p, T1q, T1o);
256
                         T4b = FNMS(T1p, T1n, T4a);
257
                    }
258
                    {
259
                         E T1u, T1x, T1v, T4g, T1t, T1w;
260
                         T1u = ri[WS(rs, 6)];
261
                         T1x = ii[WS(rs, 6)];
262
                         T1t = W[10];
263
                         T1v = T1t * T1u;
264
                         T4g = T1t * T1x;
265
                         T1w = W[11];
266
                         T1y = FMA(T1w, T1x, T1v);
267
                         T4h = FNMS(T1w, T1u, T4g);
268
                    }
269
                    T1s = T1l + T1r;
270
                    T1F = T1y + T1E;
271
                    T6O = T1s - T1F;
272
                    T6P = T49 + T4b;
273
                    T6Q = T4h + T4j;
274
                    T6R = T6P - T6Q;
275
                    {
276
                         E T4c, T4d, T4f, T4k;
277
                         T4c = T49 - T4b;
278
                         T4d = T1y - T1E;
279
                         T4e = T4c - T4d;
280
                         T60 = T4c + T4d;
281
                         T4f = T1l - T1r;
282
                         T4k = T4h - T4j;
283
                         T4l = T4f + T4k;
284
                         T61 = T4f - T4k;
285
                    }
286
               }
287
               {
288
                    E T2H, T4Z, T30, T5p, T2N, T51, T2U, T5n;
289
                    {
290
                         E T2D, T2G, T2E, T4Y, T2C, T2F;
291
                         T2D = ri[WS(rs, 31)];
292
                         T2G = ii[WS(rs, 31)];
293
                         T2C = W[60];
294
                         T2E = T2C * T2D;
295
                         T4Y = T2C * T2G;
296
                         T2F = W[61];
297
                         T2H = FMA(T2F, T2G, T2E);
298
                         T4Z = FNMS(T2F, T2D, T4Y);
299
                    }
300
                    {
301
                         E T2W, T2Z, T2X, T5o, T2V, T2Y;
302
                         T2W = ri[WS(rs, 23)];
303
                         T2Z = ii[WS(rs, 23)];
304
                         T2V = W[44];
305
                         T2X = T2V * T2W;
306
                         T5o = T2V * T2Z;
307
                         T2Y = W[45];
308
                         T30 = FMA(T2Y, T2Z, T2X);
309
                         T5p = FNMS(T2Y, T2W, T5o);
310
                    }
311
                    {
312
                         E T2J, T2M, T2K, T50, T2I, T2L;
313
                         T2J = ri[WS(rs, 15)];
314
                         T2M = ii[WS(rs, 15)];
315
                         T2I = W[28];
316
                         T2K = T2I * T2J;
317
                         T50 = T2I * T2M;
318
                         T2L = W[29];
319
                         T2N = FMA(T2L, T2M, T2K);
320
                         T51 = FNMS(T2L, T2J, T50);
321
                    }
322
                    {
323
                         E T2Q, T2T, T2R, T5m, T2P, T2S;
324
                         T2Q = ri[WS(rs, 7)];
325
                         T2T = ii[WS(rs, 7)];
326
                         T2P = W[12];
327
                         T2R = T2P * T2Q;
328
                         T5m = T2P * T2T;
329
                         T2S = W[13];
330
                         T2U = FMA(T2S, T2T, T2R);
331
                         T5n = FNMS(T2S, T2Q, T5m);
332
                    }
333
                    {
334
                         E T2O, T31, T76, T77;
335
                         T2O = T2H + T2N;
336
                         T31 = T2U + T30;
337
                         T32 = T2O + T31;
338
                         T7b = T2O - T31;
339
                         T76 = T4Z + T51;
340
                         T77 = T5n + T5p;
341
                         T78 = T76 - T77;
342
                         T7N = T76 + T77;
343
                    }
344
                    {
345
                         E T52, T53, T5l, T5q;
346
                         T52 = T4Z - T51;
347
                         T53 = T2U - T30;
348
                         T54 = T52 - T53;
349
                         T6f = T52 + T53;
350
                         T5l = T2H - T2N;
351
                         T5q = T5n - T5p;
352
                         T5r = T5l + T5q;
353
                         T6c = T5l - T5q;
354
                    }
355
               }
356
               {
357
                    E T1O, T4q, T27, T4Q, T1U, T4s, T21, T4O;
358
                    {
359
                         E T1K, T1N, T1L, T4p, T1J, T1M;
360
                         T1K = ri[WS(rs, 1)];
361
                         T1N = ii[WS(rs, 1)];
362
                         T1J = W[0];
363
                         T1L = T1J * T1K;
364
                         T4p = T1J * T1N;
365
                         T1M = W[1];
366
                         T1O = FMA(T1M, T1N, T1L);
367
                         T4q = FNMS(T1M, T1K, T4p);
368
                    }
369
                    {
370
                         E T23, T26, T24, T4P, T22, T25;
371
                         T23 = ri[WS(rs, 25)];
372
                         T26 = ii[WS(rs, 25)];
373
                         T22 = W[48];
374
                         T24 = T22 * T23;
375
                         T4P = T22 * T26;
376
                         T25 = W[49];
377
                         T27 = FMA(T25, T26, T24);
378
                         T4Q = FNMS(T25, T23, T4P);
379
                    }
380
                    {
381
                         E T1Q, T1T, T1R, T4r, T1P, T1S;
382
                         T1Q = ri[WS(rs, 17)];
383
                         T1T = ii[WS(rs, 17)];
384
                         T1P = W[32];
385
                         T1R = T1P * T1Q;
386
                         T4r = T1P * T1T;
387
                         T1S = W[33];
388
                         T1U = FMA(T1S, T1T, T1R);
389
                         T4s = FNMS(T1S, T1Q, T4r);
390
                    }
391
                    {
392
                         E T1X, T20, T1Y, T4N, T1W, T1Z;
393
                         T1X = ri[WS(rs, 9)];
394
                         T20 = ii[WS(rs, 9)];
395
                         T1W = W[16];
396
                         T1Y = T1W * T1X;
397
                         T4N = T1W * T20;
398
                         T1Z = W[17];
399
                         T21 = FMA(T1Z, T20, T1Y);
400
                         T4O = FNMS(T1Z, T1X, T4N);
401
                    }
402
                    {
403
                         E T1V, T28, T6V, T6W;
404
                         T1V = T1O + T1U;
405
                         T28 = T21 + T27;
406
                         T29 = T1V + T28;
407
                         T70 = T1V - T28;
408
                         T6V = T4q + T4s;
409
                         T6W = T4O + T4Q;
410
                         T6X = T6V - T6W;
411
                         T7I = T6V + T6W;
412
                    }
413
                    {
414
                         E T4t, T4u, T4M, T4R;
415
                         T4t = T4q - T4s;
416
                         T4u = T21 - T27;
417
                         T4v = T4t - T4u;
418
                         T68 = T4t + T4u;
419
                         T4M = T1O - T1U;
420
                         T4R = T4O - T4Q;
421
                         T4S = T4M + T4R;
422
                         T65 = T4M - T4R;
423
                    }
424
               }
425
               {
426
                    E T38, T56, T3r, T5g, T3e, T58, T3l, T5e;
427
                    {
428
                         E T34, T37, T35, T55, T33, T36;
429
                         T34 = ri[WS(rs, 3)];
430
                         T37 = ii[WS(rs, 3)];
431
                         T33 = W[4];
432
                         T35 = T33 * T34;
433
                         T55 = T33 * T37;
434
                         T36 = W[5];
435
                         T38 = FMA(T36, T37, T35);
436
                         T56 = FNMS(T36, T34, T55);
437
                    }
438
                    {
439
                         E T3n, T3q, T3o, T5f, T3m, T3p;
440
                         T3n = ri[WS(rs, 11)];
441
                         T3q = ii[WS(rs, 11)];
442
                         T3m = W[20];
443
                         T3o = T3m * T3n;
444
                         T5f = T3m * T3q;
445
                         T3p = W[21];
446
                         T3r = FMA(T3p, T3q, T3o);
447
                         T5g = FNMS(T3p, T3n, T5f);
448
                    }
449
                    {
450
                         E T3a, T3d, T3b, T57, T39, T3c;
451
                         T3a = ri[WS(rs, 19)];
452
                         T3d = ii[WS(rs, 19)];
453
                         T39 = W[36];
454
                         T3b = T39 * T3a;
455
                         T57 = T39 * T3d;
456
                         T3c = W[37];
457
                         T3e = FMA(T3c, T3d, T3b);
458
                         T58 = FNMS(T3c, T3a, T57);
459
                    }
460
                    {
461
                         E T3h, T3k, T3i, T5d, T3g, T3j;
462
                         T3h = ri[WS(rs, 27)];
463
                         T3k = ii[WS(rs, 27)];
464
                         T3g = W[52];
465
                         T3i = T3g * T3h;
466
                         T5d = T3g * T3k;
467
                         T3j = W[53];
468
                         T3l = FMA(T3j, T3k, T3i);
469
                         T5e = FNMS(T3j, T3h, T5d);
470
                    }
471
                    {
472
                         E T3f, T3s, T7c, T7d;
473
                         T3f = T38 + T3e;
474
                         T3s = T3l + T3r;
475
                         T3t = T3f + T3s;
476
                         T79 = T3s - T3f;
477
                         T7c = T56 + T58;
478
                         T7d = T5e + T5g;
479
                         T7e = T7c - T7d;
480
                         T7O = T7c + T7d;
481
                    }
482
                    {
483
                         E T59, T5a, T5c, T5h;
484
                         T59 = T56 - T58;
485
                         T5a = T38 - T3e;
486
                         T5b = T59 - T5a;
487
                         T5s = T5a + T59;
488
                         T5c = T3l - T3r;
489
                         T5h = T5e - T5g;
490
                         T5i = T5c + T5h;
491
                         T5t = T5c - T5h;
492
                    }
493
               }
494
               {
495
                    E T2f, T4x, T2y, T4H, T2l, T4z, T2s, T4F;
496
                    {
497
                         E T2b, T2e, T2c, T4w, T2a, T2d;
498
                         T2b = ri[WS(rs, 5)];
499
                         T2e = ii[WS(rs, 5)];
500
                         T2a = W[8];
501
                         T2c = T2a * T2b;
502
                         T4w = T2a * T2e;
503
                         T2d = W[9];
504
                         T2f = FMA(T2d, T2e, T2c);
505
                         T4x = FNMS(T2d, T2b, T4w);
506
                    }
507
                    {
508
                         E T2u, T2x, T2v, T4G, T2t, T2w;
509
                         T2u = ri[WS(rs, 13)];
510
                         T2x = ii[WS(rs, 13)];
511
                         T2t = W[24];
512
                         T2v = T2t * T2u;
513
                         T4G = T2t * T2x;
514
                         T2w = W[25];
515
                         T2y = FMA(T2w, T2x, T2v);
516
                         T4H = FNMS(T2w, T2u, T4G);
517
                    }
518
                    {
519
                         E T2h, T2k, T2i, T4y, T2g, T2j;
520
                         T2h = ri[WS(rs, 21)];
521
                         T2k = ii[WS(rs, 21)];
522
                         T2g = W[40];
523
                         T2i = T2g * T2h;
524
                         T4y = T2g * T2k;
525
                         T2j = W[41];
526
                         T2l = FMA(T2j, T2k, T2i);
527
                         T4z = FNMS(T2j, T2h, T4y);
528
                    }
529
                    {
530
                         E T2o, T2r, T2p, T4E, T2n, T2q;
531
                         T2o = ri[WS(rs, 29)];
532
                         T2r = ii[WS(rs, 29)];
533
                         T2n = W[56];
534
                         T2p = T2n * T2o;
535
                         T4E = T2n * T2r;
536
                         T2q = W[57];
537
                         T2s = FMA(T2q, T2r, T2p);
538
                         T4F = FNMS(T2q, T2o, T4E);
539
                    }
540
                    {
541
                         E T2m, T2z, T71, T72;
542
                         T2m = T2f + T2l;
543
                         T2z = T2s + T2y;
544
                         T2A = T2m + T2z;
545
                         T6Y = T2z - T2m;
546
                         T71 = T4x + T4z;
547
                         T72 = T4F + T4H;
548
                         T73 = T71 - T72;
549
                         T7J = T71 + T72;
550
                    }
551
                    {
552
                         E T4A, T4B, T4D, T4I;
553
                         T4A = T4x - T4z;
554
                         T4B = T2f - T2l;
555
                         T4C = T4A - T4B;
556
                         T4T = T4B + T4A;
557
                         T4D = T2s - T2y;
558
                         T4I = T4F - T4H;
559
                         T4J = T4D + T4I;
560
                         T4U = T4D - T4I;
561
                    }
562
               }
563
               {
564
                    E TO, T7C, T7Z, T80, T89, T8e, T1H, T8d, T3v, T8b, T7L, T7T, T7Q, T7U, T7F;
565
                    E T81;
566
                    {
567
                         E Tm, TN, T7X, T7Y;
568
                         Tm = T8 + Tl;
569
                         TN = Tz + TM;
570
                         TO = Tm + TN;
571
                         T7C = Tm - TN;
572
                         T7X = T7I + T7J;
573
                         T7Y = T7N + T7O;
574
                         T7Z = T7X - T7Y;
575
                         T80 = T7X + T7Y;
576
                    }
577
                    {
578
                         E T82, T88, T1f, T1G;
579
                         T82 = T6F + T6G;
580
                         T88 = T83 + T87;
581
                         T89 = T82 + T88;
582
                         T8e = T88 - T82;
583
                         T1f = T11 + T1e;
584
                         T1G = T1s + T1F;
585
                         T1H = T1f + T1G;
586
                         T8d = T1G - T1f;
587
                    }
588
                    {
589
                         E T2B, T3u, T7H, T7K;
590
                         T2B = T29 + T2A;
591
                         T3u = T32 + T3t;
592
                         T3v = T2B + T3u;
593
                         T8b = T3u - T2B;
594
                         T7H = T29 - T2A;
595
                         T7K = T7I - T7J;
596
                         T7L = T7H + T7K;
597
                         T7T = T7K - T7H;
598
                    }
599
                    {
600
                         E T7M, T7P, T7D, T7E;
601
                         T7M = T32 - T3t;
602
                         T7P = T7N - T7O;
603
                         T7Q = T7M - T7P;
604
                         T7U = T7M + T7P;
605
                         T7D = T6J + T6K;
606
                         T7E = T6P + T6Q;
607
                         T7F = T7D - T7E;
608
                         T81 = T7D + T7E;
609
                    }
610
                    {
611
                         E T1I, T8a, T7W, T8c;
612
                         T1I = TO + T1H;
613
                         ri[WS(rs, 16)] = T1I - T3v;
614
                         ri[0] = T1I + T3v;
615
                         T8a = T81 + T89;
616
                         ii[0] = T80 + T8a;
617
                         ii[WS(rs, 16)] = T8a - T80;
618
                         T7W = TO - T1H;
619
                         ri[WS(rs, 24)] = T7W - T7Z;
620
                         ri[WS(rs, 8)] = T7W + T7Z;
621
                         T8c = T89 - T81;
622
                         ii[WS(rs, 8)] = T8b + T8c;
623
                         ii[WS(rs, 24)] = T8c - T8b;
624
                    }
625
                    {
626
                         E T7G, T7R, T8f, T8g;
627
                         T7G = T7C + T7F;
628
                         T7R = T7L + T7Q;
629
                         ri[WS(rs, 20)] = FNMS(KP707106781, T7R, T7G);
630
                         ri[WS(rs, 4)] = FMA(KP707106781, T7R, T7G);
631
                         T8f = T8d + T8e;
632
                         T8g = T7T + T7U;
633
                         ii[WS(rs, 4)] = FMA(KP707106781, T8g, T8f);
634
                         ii[WS(rs, 20)] = FNMS(KP707106781, T8g, T8f);
635
                    }
636
                    {
637
                         E T7S, T7V, T8h, T8i;
638
                         T7S = T7C - T7F;
639
                         T7V = T7T - T7U;
640
                         ri[WS(rs, 28)] = FNMS(KP707106781, T7V, T7S);
641
                         ri[WS(rs, 12)] = FMA(KP707106781, T7V, T7S);
642
                         T8h = T8e - T8d;
643
                         T8i = T7Q - T7L;
644
                         ii[WS(rs, 12)] = FMA(KP707106781, T8i, T8h);
645
                         ii[WS(rs, 28)] = FNMS(KP707106781, T8i, T8h);
646
                    }
647
               }
648
               {
649
                    E T6I, T7m, T7w, T7A, T8l, T8r, T6T, T8m, T75, T7j, T7p, T8s, T7t, T7z, T7g;
650
                    E T7k;
651
                    {
652
                         E T6E, T6H, T7u, T7v;
653
                         T6E = T8 - Tl;
654
                         T6H = T6F - T6G;
655
                         T6I = T6E - T6H;
656
                         T7m = T6E + T6H;
657
                         T7u = T7b + T7e;
658
                         T7v = T78 + T79;
659
                         T7w = FNMS(KP414213562, T7v, T7u);
660
                         T7A = FMA(KP414213562, T7u, T7v);
661
                    }
662
                    {
663
                         E T8j, T8k, T6N, T6S;
664
                         T8j = TM - Tz;
665
                         T8k = T87 - T83;
666
                         T8l = T8j + T8k;
667
                         T8r = T8k - T8j;
668
                         T6N = T6L - T6M;
669
                         T6S = T6O + T6R;
670
                         T6T = T6N - T6S;
671
                         T8m = T6N + T6S;
672
                    }
673
                    {
674
                         E T6Z, T74, T7n, T7o;
675
                         T6Z = T6X - T6Y;
676
                         T74 = T70 - T73;
677
                         T75 = FMA(KP414213562, T74, T6Z);
678
                         T7j = FNMS(KP414213562, T6Z, T74);
679
                         T7n = T6M + T6L;
680
                         T7o = T6O - T6R;
681
                         T7p = T7n + T7o;
682
                         T8s = T7o - T7n;
683
                    }
684
                    {
685
                         E T7r, T7s, T7a, T7f;
686
                         T7r = T70 + T73;
687
                         T7s = T6X + T6Y;
688
                         T7t = FMA(KP414213562, T7s, T7r);
689
                         T7z = FNMS(KP414213562, T7r, T7s);
690
                         T7a = T78 - T79;
691
                         T7f = T7b - T7e;
692
                         T7g = FNMS(KP414213562, T7f, T7a);
693
                         T7k = FMA(KP414213562, T7a, T7f);
694
                    }
695
                    {
696
                         E T6U, T7h, T8t, T8u;
697
                         T6U = FMA(KP707106781, T6T, T6I);
698
                         T7h = T75 - T7g;
699
                         ri[WS(rs, 22)] = FNMS(KP923879532, T7h, T6U);
700
                         ri[WS(rs, 6)] = FMA(KP923879532, T7h, T6U);
701
                         T8t = FMA(KP707106781, T8s, T8r);
702
                         T8u = T7k - T7j;
703
                         ii[WS(rs, 6)] = FMA(KP923879532, T8u, T8t);
704
                         ii[WS(rs, 22)] = FNMS(KP923879532, T8u, T8t);
705
                    }
706
                    {
707
                         E T7i, T7l, T8v, T8w;
708
                         T7i = FNMS(KP707106781, T6T, T6I);
709
                         T7l = T7j + T7k;
710
                         ri[WS(rs, 14)] = FNMS(KP923879532, T7l, T7i);
711
                         ri[WS(rs, 30)] = FMA(KP923879532, T7l, T7i);
712
                         T8v = FNMS(KP707106781, T8s, T8r);
713
                         T8w = T75 + T7g;
714
                         ii[WS(rs, 14)] = FNMS(KP923879532, T8w, T8v);
715
                         ii[WS(rs, 30)] = FMA(KP923879532, T8w, T8v);
716
                    }
717
                    {
718
                         E T7q, T7x, T8n, T8o;
719
                         T7q = FMA(KP707106781, T7p, T7m);
720
                         T7x = T7t + T7w;
721
                         ri[WS(rs, 18)] = FNMS(KP923879532, T7x, T7q);
722
                         ri[WS(rs, 2)] = FMA(KP923879532, T7x, T7q);
723
                         T8n = FMA(KP707106781, T8m, T8l);
724
                         T8o = T7z + T7A;
725
                         ii[WS(rs, 2)] = FMA(KP923879532, T8o, T8n);
726
                         ii[WS(rs, 18)] = FNMS(KP923879532, T8o, T8n);
727
                    }
728
                    {
729
                         E T7y, T7B, T8p, T8q;
730
                         T7y = FNMS(KP707106781, T7p, T7m);
731
                         T7B = T7z - T7A;
732
                         ri[WS(rs, 26)] = FNMS(KP923879532, T7B, T7y);
733
                         ri[WS(rs, 10)] = FMA(KP923879532, T7B, T7y);
734
                         T8p = FNMS(KP707106781, T8m, T8l);
735
                         T8q = T7w - T7t;
736
                         ii[WS(rs, 10)] = FMA(KP923879532, T8q, T8p);
737
                         ii[WS(rs, 26)] = FNMS(KP923879532, T8q, T8p);
738
                    }
739
               }
740
               {
741
                    E T3S, T5C, T4n, T8C, T8B, T8H, T5F, T8I, T5w, T5Q, T5A, T5M, T4X, T5P, T5z;
742
                    E T5J;
743
                    {
744
                         E T3C, T3R, T5D, T5E;
745
                         T3C = T3w + T3B;
746
                         T3R = T3J + T3Q;
747
                         T3S = FNMS(KP707106781, T3R, T3C);
748
                         T5C = FMA(KP707106781, T3R, T3C);
749
                         {
750
                              E T47, T4m, T8z, T8A;
751
                              T47 = FNMS(KP414213562, T46, T3Z);
752
                              T4m = FMA(KP414213562, T4l, T4e);
753
                              T4n = T47 - T4m;
754
                              T8C = T47 + T4m;
755
                              T8z = T8x - T8y;
756
                              T8A = T5T + T5U;
757
                              T8B = FMA(KP707106781, T8A, T8z);
758
                              T8H = FNMS(KP707106781, T8A, T8z);
759
                         }
760
                         T5D = FMA(KP414213562, T3Z, T46);
761
                         T5E = FNMS(KP414213562, T4e, T4l);
762
                         T5F = T5D + T5E;
763
                         T8I = T5E - T5D;
764
                         {
765
                              E T5k, T5L, T5v, T5K, T5j, T5u;
766
                              T5j = T5b + T5i;
767
                              T5k = FNMS(KP707106781, T5j, T54);
768
                              T5L = FMA(KP707106781, T5j, T54);
769
                              T5u = T5s + T5t;
770
                              T5v = FNMS(KP707106781, T5u, T5r);
771
                              T5K = FMA(KP707106781, T5u, T5r);
772
                              T5w = FNMS(KP668178637, T5v, T5k);
773
                              T5Q = FMA(KP198912367, T5K, T5L);
774
                              T5A = FMA(KP668178637, T5k, T5v);
775
                              T5M = FNMS(KP198912367, T5L, T5K);
776
                         }
777
                         {
778
                              E T4L, T5I, T4W, T5H, T4K, T4V;
779
                              T4K = T4C + T4J;
780
                              T4L = FNMS(KP707106781, T4K, T4v);
781
                              T5I = FMA(KP707106781, T4K, T4v);
782
                              T4V = T4T + T4U;
783
                              T4W = FNMS(KP707106781, T4V, T4S);
784
                              T5H = FMA(KP707106781, T4V, T4S);
785
                              T4X = FMA(KP668178637, T4W, T4L);
786
                              T5P = FNMS(KP198912367, T5H, T5I);
787
                              T5z = FNMS(KP668178637, T4L, T4W);
788
                              T5J = FMA(KP198912367, T5I, T5H);
789
                         }
790
                    }
791
                    {
792
                         E T4o, T5x, T8J, T8K;
793
                         T4o = FMA(KP923879532, T4n, T3S);
794
                         T5x = T4X - T5w;
795
                         ri[WS(rs, 21)] = FNMS(KP831469612, T5x, T4o);
796
                         ri[WS(rs, 5)] = FMA(KP831469612, T5x, T4o);
797
                         T8J = FMA(KP923879532, T8I, T8H);
798
                         T8K = T5A - T5z;
799
                         ii[WS(rs, 5)] = FMA(KP831469612, T8K, T8J);
800
                         ii[WS(rs, 21)] = FNMS(KP831469612, T8K, T8J);
801
                    }
802
                    {
803
                         E T5y, T5B, T8L, T8M;
804
                         T5y = FNMS(KP923879532, T4n, T3S);
805
                         T5B = T5z + T5A;
806
                         ri[WS(rs, 13)] = FNMS(KP831469612, T5B, T5y);
807
                         ri[WS(rs, 29)] = FMA(KP831469612, T5B, T5y);
808
                         T8L = FNMS(KP923879532, T8I, T8H);
809
                         T8M = T4X + T5w;
810
                         ii[WS(rs, 13)] = FNMS(KP831469612, T8M, T8L);
811
                         ii[WS(rs, 29)] = FMA(KP831469612, T8M, T8L);
812
                    }
813
                    {
814
                         E T5G, T5N, T8D, T8E;
815
                         T5G = FMA(KP923879532, T5F, T5C);
816
                         T5N = T5J + T5M;
817
                         ri[WS(rs, 17)] = FNMS(KP980785280, T5N, T5G);
818
                         ri[WS(rs, 1)] = FMA(KP980785280, T5N, T5G);
819
                         T8D = FMA(KP923879532, T8C, T8B);
820
                         T8E = T5P + T5Q;
821
                         ii[WS(rs, 1)] = FMA(KP980785280, T8E, T8D);
822
                         ii[WS(rs, 17)] = FNMS(KP980785280, T8E, T8D);
823
                    }
824
                    {
825
                         E T5O, T5R, T8F, T8G;
826
                         T5O = FNMS(KP923879532, T5F, T5C);
827
                         T5R = T5P - T5Q;
828
                         ri[WS(rs, 25)] = FNMS(KP980785280, T5R, T5O);
829
                         ri[WS(rs, 9)] = FMA(KP980785280, T5R, T5O);
830
                         T8F = FNMS(KP923879532, T8C, T8B);
831
                         T8G = T5M - T5J;
832
                         ii[WS(rs, 9)] = FMA(KP980785280, T8G, T8F);
833
                         ii[WS(rs, 25)] = FNMS(KP980785280, T8G, T8F);
834
                    }
835
               }
836
               {
837
                    E T5W, T6o, T63, T8W, T8P, T8V, T6r, T8Q, T6i, T6C, T6m, T6y, T6b, T6B, T6l;
838
                    E T6v;
839
                    {
840
                         E T5S, T5V, T6p, T6q;
841
                         T5S = T3w - T3B;
842
                         T5V = T5T - T5U;
843
                         T5W = FMA(KP707106781, T5V, T5S);
844
                         T6o = FNMS(KP707106781, T5V, T5S);
845
                         {
846
                              E T5Z, T62, T8N, T8O;
847
                              T5Z = FMA(KP414213562, T5Y, T5X);
848
                              T62 = FNMS(KP414213562, T61, T60);
849
                              T63 = T5Z - T62;
850
                              T8W = T5Z + T62;
851
                              T8N = T8y + T8x;
852
                              T8O = T3Q - T3J;
853
                              T8P = FMA(KP707106781, T8O, T8N);
854
                              T8V = FNMS(KP707106781, T8O, T8N);
855
                         }
856
                         T6p = FNMS(KP414213562, T5X, T5Y);
857
                         T6q = FMA(KP414213562, T60, T61);
858
                         T6r = T6p + T6q;
859
                         T8Q = T6q - T6p;
860
                         {
861
                              E T6e, T6x, T6h, T6w, T6d, T6g;
862
                              T6d = T5i - T5b;
863
                              T6e = FNMS(KP707106781, T6d, T6c);
864
                              T6x = FMA(KP707106781, T6d, T6c);
865
                              T6g = T5s - T5t;
866
                              T6h = FNMS(KP707106781, T6g, T6f);
867
                              T6w = FMA(KP707106781, T6g, T6f);
868
                              T6i = FNMS(KP668178637, T6h, T6e);
869
                              T6C = FMA(KP198912367, T6w, T6x);
870
                              T6m = FMA(KP668178637, T6e, T6h);
871
                              T6y = FNMS(KP198912367, T6x, T6w);
872
                         }
873
                         {
874
                              E T67, T6u, T6a, T6t, T66, T69;
875
                              T66 = T4J - T4C;
876
                              T67 = FNMS(KP707106781, T66, T65);
877
                              T6u = FMA(KP707106781, T66, T65);
878
                              T69 = T4T - T4U;
879
                              T6a = FNMS(KP707106781, T69, T68);
880
                              T6t = FMA(KP707106781, T69, T68);
881
                              T6b = FMA(KP668178637, T6a, T67);
882
                              T6B = FNMS(KP198912367, T6t, T6u);
883
                              T6l = FNMS(KP668178637, T67, T6a);
884
                              T6v = FMA(KP198912367, T6u, T6t);
885
                         }
886
                    }
887
                    {
888
                         E T64, T6j, T8R, T8S;
889
                         T64 = FMA(KP923879532, T63, T5W);
890
                         T6j = T6b + T6i;
891
                         ri[WS(rs, 19)] = FNMS(KP831469612, T6j, T64);
892
                         ri[WS(rs, 3)] = FMA(KP831469612, T6j, T64);
893
                         T8R = FMA(KP923879532, T8Q, T8P);
894
                         T8S = T6l + T6m;
895
                         ii[WS(rs, 3)] = FMA(KP831469612, T8S, T8R);
896
                         ii[WS(rs, 19)] = FNMS(KP831469612, T8S, T8R);
897
                    }
898
                    {
899
                         E T6k, T6n, T8T, T8U;
900
                         T6k = FNMS(KP923879532, T63, T5W);
901
                         T6n = T6l - T6m;
902
                         ri[WS(rs, 27)] = FNMS(KP831469612, T6n, T6k);
903
                         ri[WS(rs, 11)] = FMA(KP831469612, T6n, T6k);
904
                         T8T = FNMS(KP923879532, T8Q, T8P);
905
                         T8U = T6i - T6b;
906
                         ii[WS(rs, 11)] = FMA(KP831469612, T8U, T8T);
907
                         ii[WS(rs, 27)] = FNMS(KP831469612, T8U, T8T);
908
                    }
909
                    {
910
                         E T6s, T6z, T8X, T8Y;
911
                         T6s = FNMS(KP923879532, T6r, T6o);
912
                         T6z = T6v - T6y;
913
                         ri[WS(rs, 23)] = FNMS(KP980785280, T6z, T6s);
914
                         ri[WS(rs, 7)] = FMA(KP980785280, T6z, T6s);
915
                         T8X = FNMS(KP923879532, T8W, T8V);
916
                         T8Y = T6C - T6B;
917
                         ii[WS(rs, 7)] = FMA(KP980785280, T8Y, T8X);
918
                         ii[WS(rs, 23)] = FNMS(KP980785280, T8Y, T8X);
919
                    }
920
                    {
921
                         E T6A, T6D, T8Z, T90;
922
                         T6A = FMA(KP923879532, T6r, T6o);
923
                         T6D = T6B + T6C;
924
                         ri[WS(rs, 15)] = FNMS(KP980785280, T6D, T6A);
925
                         ri[WS(rs, 31)] = FMA(KP980785280, T6D, T6A);
926
                         T8Z = FMA(KP923879532, T8W, T8V);
927
                         T90 = T6v + T6y;
928
                         ii[WS(rs, 15)] = FNMS(KP980785280, T90, T8Z);
929
                         ii[WS(rs, 31)] = FMA(KP980785280, T90, T8Z);
930
                    }
931
               }
932
          }
933
     }
934
}
935

    
936
static const tw_instr twinstr[] = {
937
     {TW_FULL, 0, 32},
938
     {TW_NEXT, 1, 0}
939
};
940

    
941
static const ct_desc desc = { 32, "t1_32", twinstr, &GENUS, {236, 62, 198, 0}, 0, 0, 0 };
942

    
943
void X(codelet_t1_32) (planner *p) {
944
     X(kdft_dit_register) (p, t1_32, &desc);
945
}
946
#else
947

    
948
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 32 -name t1_32 -include dft/scalar/t.h */
949

    
950
/*
951
 * This function contains 434 FP additions, 208 FP multiplications,
952
 * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
953
 * 96 stack variables, 7 constants, and 128 memory accesses
954
 */
955
#include "dft/scalar/t.h"
956

    
957
static void t1_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
958
{
959
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
960
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
961
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
962
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
963
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
964
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
965
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
966
     {
967
          INT m;
968
          for (m = mb, W = W + (mb * 62); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 62, MAKE_VOLATILE_STRIDE(64, rs)) {
969
               E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T59, T41;
970
               E T56, T2B, T67, T6e, T6O, T4b, T5d, T4s, T5g, TG, T7l, T5I, T73, T3a, T4U;
971
               E T3f, T4V, T14, T5N, T5M, T6E, T3m, T4Y, T3r, T4Z, T1r, T5P, T5S, T6F, T3x;
972
               E T51, T3C, T52, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P;
973
               E T4m, T5h, T4v, T5e;
974
               {
975
                    E T1, T76, T6, T75, Tc, T32, Th, T33;
976
                    T1 = ri[0];
977
                    T76 = ii[0];
978
                    {
979
                         E T3, T5, T2, T4;
980
                         T3 = ri[WS(rs, 16)];
981
                         T5 = ii[WS(rs, 16)];
982
                         T2 = W[30];
983
                         T4 = W[31];
984
                         T6 = FMA(T2, T3, T4 * T5);
985
                         T75 = FNMS(T4, T3, T2 * T5);
986
                    }
987
                    {
988
                         E T9, Tb, T8, Ta;
989
                         T9 = ri[WS(rs, 8)];
990
                         Tb = ii[WS(rs, 8)];
991
                         T8 = W[14];
992
                         Ta = W[15];
993
                         Tc = FMA(T8, T9, Ta * Tb);
994
                         T32 = FNMS(Ta, T9, T8 * Tb);
995
                    }
996
                    {
997
                         E Te, Tg, Td, Tf;
998
                         Te = ri[WS(rs, 24)];
999
                         Tg = ii[WS(rs, 24)];
1000
                         Td = W[46];
1001
                         Tf = W[47];
1002
                         Th = FMA(Td, Te, Tf * Tg);
1003
                         T33 = FNMS(Tf, Te, Td * Tg);
1004
                    }
1005
                    {
1006
                         E T7, Ti, T7A, T7B;
1007
                         T7 = T1 + T6;
1008
                         Ti = Tc + Th;
1009
                         Tj = T7 + Ti;
1010
                         T5F = T7 - Ti;
1011
                         T7A = T76 - T75;
1012
                         T7B = Tc - Th;
1013
                         T7C = T7A - T7B;
1014
                         T7Q = T7B + T7A;
1015
                    }
1016
                    {
1017
                         E T31, T34, T74, T77;
1018
                         T31 = T1 - T6;
1019
                         T34 = T32 - T33;
1020
                         T35 = T31 - T34;
1021
                         T4T = T31 + T34;
1022
                         T74 = T32 + T33;
1023
                         T77 = T75 + T76;
1024
                         T78 = T74 + T77;
1025
                         T7m = T77 - T74;
1026
                    }
1027
               }
1028
               {
1029
                    E T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y;
1030
                    {
1031
                         E T1v, T1x, T1u, T1w;
1032
                         T1v = ri[WS(rs, 1)];
1033
                         T1x = ii[WS(rs, 1)];
1034
                         T1u = W[0];
1035
                         T1w = W[1];
1036
                         T1y = FMA(T1u, T1v, T1w * T1x);
1037
                         T3G = FNMS(T1w, T1v, T1u * T1x);
1038
                    }
1039
                    {
1040
                         E T1L, T1N, T1K, T1M;
1041
                         T1L = ri[WS(rs, 25)];
1042
                         T1N = ii[WS(rs, 25)];
1043
                         T1K = W[48];
1044
                         T1M = W[49];
1045
                         T1O = FMA(T1K, T1L, T1M * T1N);
1046
                         T3Z = FNMS(T1M, T1L, T1K * T1N);
1047
                    }
1048
                    {
1049
                         E T1A, T1C, T1z, T1B;
1050
                         T1A = ri[WS(rs, 17)];
1051
                         T1C = ii[WS(rs, 17)];
1052
                         T1z = W[32];
1053
                         T1B = W[33];
1054
                         T1D = FMA(T1z, T1A, T1B * T1C);
1055
                         T3H = FNMS(T1B, T1A, T1z * T1C);
1056
                    }
1057
                    {
1058
                         E T1G, T1I, T1F, T1H;
1059
                         T1G = ri[WS(rs, 9)];
1060
                         T1I = ii[WS(rs, 9)];
1061
                         T1F = W[16];
1062
                         T1H = W[17];
1063
                         T1J = FMA(T1F, T1G, T1H * T1I);
1064
                         T3Y = FNMS(T1H, T1G, T1F * T1I);
1065
                    }
1066
                    {
1067
                         E T1E, T1P, T5W, T5X;
1068
                         T1E = T1y + T1D;
1069
                         T1P = T1J + T1O;
1070
                         T1Q = T1E + T1P;
1071
                         T61 = T1E - T1P;
1072
                         T5W = T3G + T3H;
1073
                         T5X = T3Y + T3Z;
1074
                         T5Y = T5W - T5X;
1075
                         T6J = T5W + T5X;
1076
                    }
1077
                    {
1078
                         E T3I, T3J, T3X, T40;
1079
                         T3I = T3G - T3H;
1080
                         T3J = T1J - T1O;
1081
                         T3K = T3I + T3J;
1082
                         T59 = T3I - T3J;
1083
                         T3X = T1y - T1D;
1084
                         T40 = T3Y - T3Z;
1085
                         T41 = T3X - T40;
1086
                         T56 = T3X + T40;
1087
                    }
1088
               }
1089
               {
1090
                    E T2j, T4o, T2z, T49, T2o, T4p, T2u, T48;
1091
                    {
1092
                         E T2g, T2i, T2f, T2h;
1093
                         T2g = ri[WS(rs, 31)];
1094
                         T2i = ii[WS(rs, 31)];
1095
                         T2f = W[60];
1096
                         T2h = W[61];
1097
                         T2j = FMA(T2f, T2g, T2h * T2i);
1098
                         T4o = FNMS(T2h, T2g, T2f * T2i);
1099
                    }
1100
                    {
1101
                         E T2w, T2y, T2v, T2x;
1102
                         T2w = ri[WS(rs, 23)];
1103
                         T2y = ii[WS(rs, 23)];
1104
                         T2v = W[44];
1105
                         T2x = W[45];
1106
                         T2z = FMA(T2v, T2w, T2x * T2y);
1107
                         T49 = FNMS(T2x, T2w, T2v * T2y);
1108
                    }
1109
                    {
1110
                         E T2l, T2n, T2k, T2m;
1111
                         T2l = ri[WS(rs, 15)];
1112
                         T2n = ii[WS(rs, 15)];
1113
                         T2k = W[28];
1114
                         T2m = W[29];
1115
                         T2o = FMA(T2k, T2l, T2m * T2n);
1116
                         T4p = FNMS(T2m, T2l, T2k * T2n);
1117
                    }
1118
                    {
1119
                         E T2r, T2t, T2q, T2s;
1120
                         T2r = ri[WS(rs, 7)];
1121
                         T2t = ii[WS(rs, 7)];
1122
                         T2q = W[12];
1123
                         T2s = W[13];
1124
                         T2u = FMA(T2q, T2r, T2s * T2t);
1125
                         T48 = FNMS(T2s, T2r, T2q * T2t);
1126
                    }
1127
                    {
1128
                         E T2p, T2A, T6c, T6d;
1129
                         T2p = T2j + T2o;
1130
                         T2A = T2u + T2z;
1131
                         T2B = T2p + T2A;
1132
                         T67 = T2p - T2A;
1133
                         T6c = T4o + T4p;
1134
                         T6d = T48 + T49;
1135
                         T6e = T6c - T6d;
1136
                         T6O = T6c + T6d;
1137
                    }
1138
                    {
1139
                         E T47, T4a, T4q, T4r;
1140
                         T47 = T2j - T2o;
1141
                         T4a = T48 - T49;
1142
                         T4b = T47 - T4a;
1143
                         T5d = T47 + T4a;
1144
                         T4q = T4o - T4p;
1145
                         T4r = T2u - T2z;
1146
                         T4s = T4q + T4r;
1147
                         T5g = T4q - T4r;
1148
                    }
1149
               }
1150
               {
1151
                    E To, T36, TE, T3d, Tt, T37, Tz, T3c;
1152
                    {
1153
                         E Tl, Tn, Tk, Tm;
1154
                         Tl = ri[WS(rs, 4)];
1155
                         Tn = ii[WS(rs, 4)];
1156
                         Tk = W[6];
1157
                         Tm = W[7];
1158
                         To = FMA(Tk, Tl, Tm * Tn);
1159
                         T36 = FNMS(Tm, Tl, Tk * Tn);
1160
                    }
1161
                    {
1162
                         E TB, TD, TA, TC;
1163
                         TB = ri[WS(rs, 12)];
1164
                         TD = ii[WS(rs, 12)];
1165
                         TA = W[22];
1166
                         TC = W[23];
1167
                         TE = FMA(TA, TB, TC * TD);
1168
                         T3d = FNMS(TC, TB, TA * TD);
1169
                    }
1170
                    {
1171
                         E Tq, Ts, Tp, Tr;
1172
                         Tq = ri[WS(rs, 20)];
1173
                         Ts = ii[WS(rs, 20)];
1174
                         Tp = W[38];
1175
                         Tr = W[39];
1176
                         Tt = FMA(Tp, Tq, Tr * Ts);
1177
                         T37 = FNMS(Tr, Tq, Tp * Ts);
1178
                    }
1179
                    {
1180
                         E Tw, Ty, Tv, Tx;
1181
                         Tw = ri[WS(rs, 28)];
1182
                         Ty = ii[WS(rs, 28)];
1183
                         Tv = W[54];
1184
                         Tx = W[55];
1185
                         Tz = FMA(Tv, Tw, Tx * Ty);
1186
                         T3c = FNMS(Tx, Tw, Tv * Ty);
1187
                    }
1188
                    {
1189
                         E Tu, TF, T5G, T5H;
1190
                         Tu = To + Tt;
1191
                         TF = Tz + TE;
1192
                         TG = Tu + TF;
1193
                         T7l = TF - Tu;
1194
                         T5G = T36 + T37;
1195
                         T5H = T3c + T3d;
1196
                         T5I = T5G - T5H;
1197
                         T73 = T5G + T5H;
1198
                    }
1199
                    {
1200
                         E T38, T39, T3b, T3e;
1201
                         T38 = T36 - T37;
1202
                         T39 = To - Tt;
1203
                         T3a = T38 - T39;
1204
                         T4U = T39 + T38;
1205
                         T3b = Tz - TE;
1206
                         T3e = T3c - T3d;
1207
                         T3f = T3b + T3e;
1208
                         T4V = T3b - T3e;
1209
                    }
1210
               }
1211
               {
1212
                    E TM, T3i, T12, T3p, TR, T3j, TX, T3o;
1213
                    {
1214
                         E TJ, TL, TI, TK;
1215
                         TJ = ri[WS(rs, 2)];
1216
                         TL = ii[WS(rs, 2)];
1217
                         TI = W[2];
1218
                         TK = W[3];
1219
                         TM = FMA(TI, TJ, TK * TL);
1220
                         T3i = FNMS(TK, TJ, TI * TL);
1221
                    }
1222
                    {
1223
                         E TZ, T11, TY, T10;
1224
                         TZ = ri[WS(rs, 26)];
1225
                         T11 = ii[WS(rs, 26)];
1226
                         TY = W[50];
1227
                         T10 = W[51];
1228
                         T12 = FMA(TY, TZ, T10 * T11);
1229
                         T3p = FNMS(T10, TZ, TY * T11);
1230
                    }
1231
                    {
1232
                         E TO, TQ, TN, TP;
1233
                         TO = ri[WS(rs, 18)];
1234
                         TQ = ii[WS(rs, 18)];
1235
                         TN = W[34];
1236
                         TP = W[35];
1237
                         TR = FMA(TN, TO, TP * TQ);
1238
                         T3j = FNMS(TP, TO, TN * TQ);
1239
                    }
1240
                    {
1241
                         E TU, TW, TT, TV;
1242
                         TU = ri[WS(rs, 10)];
1243
                         TW = ii[WS(rs, 10)];
1244
                         TT = W[18];
1245
                         TV = W[19];
1246
                         TX = FMA(TT, TU, TV * TW);
1247
                         T3o = FNMS(TV, TU, TT * TW);
1248
                    }
1249
                    {
1250
                         E TS, T13, T5K, T5L;
1251
                         TS = TM + TR;
1252
                         T13 = TX + T12;
1253
                         T14 = TS + T13;
1254
                         T5N = TS - T13;
1255
                         T5K = T3i + T3j;
1256
                         T5L = T3o + T3p;
1257
                         T5M = T5K - T5L;
1258
                         T6E = T5K + T5L;
1259
                    }
1260
                    {
1261
                         E T3k, T3l, T3n, T3q;
1262
                         T3k = T3i - T3j;
1263
                         T3l = TX - T12;
1264
                         T3m = T3k + T3l;
1265
                         T4Y = T3k - T3l;
1266
                         T3n = TM - TR;
1267
                         T3q = T3o - T3p;
1268
                         T3r = T3n - T3q;
1269
                         T4Z = T3n + T3q;
1270
                    }
1271
               }
1272
               {
1273
                    E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
1274
                    {
1275
                         E T16, T18, T15, T17;
1276
                         T16 = ri[WS(rs, 30)];
1277
                         T18 = ii[WS(rs, 30)];
1278
                         T15 = W[58];
1279
                         T17 = W[59];
1280
                         T19 = FMA(T15, T16, T17 * T18);
1281
                         T3t = FNMS(T17, T16, T15 * T18);
1282
                    }
1283
                    {
1284
                         E T1m, T1o, T1l, T1n;
1285
                         T1m = ri[WS(rs, 22)];
1286
                         T1o = ii[WS(rs, 22)];
1287
                         T1l = W[42];
1288
                         T1n = W[43];
1289
                         T1p = FMA(T1l, T1m, T1n * T1o);
1290
                         T3A = FNMS(T1n, T1m, T1l * T1o);
1291
                    }
1292
                    {
1293
                         E T1b, T1d, T1a, T1c;
1294
                         T1b = ri[WS(rs, 14)];
1295
                         T1d = ii[WS(rs, 14)];
1296
                         T1a = W[26];
1297
                         T1c = W[27];
1298
                         T1e = FMA(T1a, T1b, T1c * T1d);
1299
                         T3u = FNMS(T1c, T1b, T1a * T1d);
1300
                    }
1301
                    {
1302
                         E T1h, T1j, T1g, T1i;
1303
                         T1h = ri[WS(rs, 6)];
1304
                         T1j = ii[WS(rs, 6)];
1305
                         T1g = W[10];
1306
                         T1i = W[11];
1307
                         T1k = FMA(T1g, T1h, T1i * T1j);
1308
                         T3z = FNMS(T1i, T1h, T1g * T1j);
1309
                    }
1310
                    {
1311
                         E T1f, T1q, T5Q, T5R;
1312
                         T1f = T19 + T1e;
1313
                         T1q = T1k + T1p;
1314
                         T1r = T1f + T1q;
1315
                         T5P = T1f - T1q;
1316
                         T5Q = T3t + T3u;
1317
                         T5R = T3z + T3A;
1318
                         T5S = T5Q - T5R;
1319
                         T6F = T5Q + T5R;
1320
                    }
1321
                    {
1322
                         E T3v, T3w, T3y, T3B;
1323
                         T3v = T3t - T3u;
1324
                         T3w = T1k - T1p;
1325
                         T3x = T3v + T3w;
1326
                         T51 = T3v - T3w;
1327
                         T3y = T19 - T1e;
1328
                         T3B = T3z - T3A;
1329
                         T3C = T3y - T3B;
1330
                         T52 = T3y + T3B;
1331
                    }
1332
               }
1333
               {
1334
                    E T1V, T3R, T20, T3S, T3Q, T3T, T26, T3M, T2b, T3N, T3L, T3O;
1335
                    {
1336
                         E T1S, T1U, T1R, T1T;
1337
                         T1S = ri[WS(rs, 5)];
1338
                         T1U = ii[WS(rs, 5)];
1339
                         T1R = W[8];
1340
                         T1T = W[9];
1341
                         T1V = FMA(T1R, T1S, T1T * T1U);
1342
                         T3R = FNMS(T1T, T1S, T1R * T1U);
1343
                    }
1344
                    {
1345
                         E T1X, T1Z, T1W, T1Y;
1346
                         T1X = ri[WS(rs, 21)];
1347
                         T1Z = ii[WS(rs, 21)];
1348
                         T1W = W[40];
1349
                         T1Y = W[41];
1350
                         T20 = FMA(T1W, T1X, T1Y * T1Z);
1351
                         T3S = FNMS(T1Y, T1X, T1W * T1Z);
1352
                    }
1353
                    T3Q = T1V - T20;
1354
                    T3T = T3R - T3S;
1355
                    {
1356
                         E T23, T25, T22, T24;
1357
                         T23 = ri[WS(rs, 29)];
1358
                         T25 = ii[WS(rs, 29)];
1359
                         T22 = W[56];
1360
                         T24 = W[57];
1361
                         T26 = FMA(T22, T23, T24 * T25);
1362
                         T3M = FNMS(T24, T23, T22 * T25);
1363
                    }
1364
                    {
1365
                         E T28, T2a, T27, T29;
1366
                         T28 = ri[WS(rs, 13)];
1367
                         T2a = ii[WS(rs, 13)];
1368
                         T27 = W[24];
1369
                         T29 = W[25];
1370
                         T2b = FMA(T27, T28, T29 * T2a);
1371
                         T3N = FNMS(T29, T28, T27 * T2a);
1372
                    }
1373
                    T3L = T26 - T2b;
1374
                    T3O = T3M - T3N;
1375
                    {
1376
                         E T21, T2c, T62, T63;
1377
                         T21 = T1V + T20;
1378
                         T2c = T26 + T2b;
1379
                         T2d = T21 + T2c;
1380
                         T5Z = T2c - T21;
1381
                         T62 = T3R + T3S;
1382
                         T63 = T3M + T3N;
1383
                         T64 = T62 - T63;
1384
                         T6K = T62 + T63;
1385
                    }
1386
                    {
1387
                         E T3P, T3U, T42, T43;
1388
                         T3P = T3L - T3O;
1389
                         T3U = T3Q + T3T;
1390
                         T3V = KP707106781 * (T3P - T3U);
1391
                         T57 = KP707106781 * (T3U + T3P);
1392
                         T42 = T3T - T3Q;
1393
                         T43 = T3L + T3O;
1394
                         T44 = KP707106781 * (T42 - T43);
1395
                         T5a = KP707106781 * (T42 + T43);
1396
                    }
1397
               }
1398
               {
1399
                    E T2G, T4c, T2L, T4d, T4e, T4f, T2R, T4i, T2W, T4j, T4h, T4k;
1400
                    {
1401
                         E T2D, T2F, T2C, T2E;
1402
                         T2D = ri[WS(rs, 3)];
1403
                         T2F = ii[WS(rs, 3)];
1404
                         T2C = W[4];
1405
                         T2E = W[5];
1406
                         T2G = FMA(T2C, T2D, T2E * T2F);
1407
                         T4c = FNMS(T2E, T2D, T2C * T2F);
1408
                    }
1409
                    {
1410
                         E T2I, T2K, T2H, T2J;
1411
                         T2I = ri[WS(rs, 19)];
1412
                         T2K = ii[WS(rs, 19)];
1413
                         T2H = W[36];
1414
                         T2J = W[37];
1415
                         T2L = FMA(T2H, T2I, T2J * T2K);
1416
                         T4d = FNMS(T2J, T2I, T2H * T2K);
1417
                    }
1418
                    T4e = T4c - T4d;
1419
                    T4f = T2G - T2L;
1420
                    {
1421
                         E T2O, T2Q, T2N, T2P;
1422
                         T2O = ri[WS(rs, 27)];
1423
                         T2Q = ii[WS(rs, 27)];
1424
                         T2N = W[52];
1425
                         T2P = W[53];
1426
                         T2R = FMA(T2N, T2O, T2P * T2Q);
1427
                         T4i = FNMS(T2P, T2O, T2N * T2Q);
1428
                    }
1429
                    {
1430
                         E T2T, T2V, T2S, T2U;
1431
                         T2T = ri[WS(rs, 11)];
1432
                         T2V = ii[WS(rs, 11)];
1433
                         T2S = W[20];
1434
                         T2U = W[21];
1435
                         T2W = FMA(T2S, T2T, T2U * T2V);
1436
                         T4j = FNMS(T2U, T2T, T2S * T2V);
1437
                    }
1438
                    T4h = T2R - T2W;
1439
                    T4k = T4i - T4j;
1440
                    {
1441
                         E T2M, T2X, T68, T69;
1442
                         T2M = T2G + T2L;
1443
                         T2X = T2R + T2W;
1444
                         T2Y = T2M + T2X;
1445
                         T6f = T2X - T2M;
1446
                         T68 = T4c + T4d;
1447
                         T69 = T4i + T4j;
1448
                         T6a = T68 - T69;
1449
                         T6P = T68 + T69;
1450
                    }
1451
                    {
1452
                         E T4g, T4l, T4t, T4u;
1453
                         T4g = T4e - T4f;
1454
                         T4l = T4h + T4k;
1455
                         T4m = KP707106781 * (T4g - T4l);
1456
                         T5h = KP707106781 * (T4g + T4l);
1457
                         T4t = T4h - T4k;
1458
                         T4u = T4f + T4e;
1459
                         T4v = KP707106781 * (T4t - T4u);
1460
                         T5e = KP707106781 * (T4u + T4t);
1461
                    }
1462
               }
1463
               {
1464
                    E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
1465
                    {
1466
                         E TH, T1s, T72, T79;
1467
                         TH = Tj + TG;
1468
                         T1s = T14 + T1r;
1469
                         T1t = TH + T1s;
1470
                         T6X = TH - T1s;
1471
                         T72 = T6E + T6F;
1472
                         T79 = T73 + T78;
1473
                         T7a = T72 + T79;
1474
                         T7c = T79 - T72;
1475
                    }
1476
                    {
1477
                         E T2e, T2Z, T6Y, T6Z;
1478
                         T2e = T1Q + T2d;
1479
                         T2Z = T2B + T2Y;
1480
                         T30 = T2e + T2Z;
1481
                         T7b = T2Z - T2e;
1482
                         T6Y = T6J + T6K;
1483
                         T6Z = T6O + T6P;
1484
                         T70 = T6Y - T6Z;
1485
                         T71 = T6Y + T6Z;
1486
                    }
1487
                    ri[WS(rs, 16)] = T1t - T30;
1488
                    ii[WS(rs, 16)] = T7a - T71;
1489
                    ri[0] = T1t + T30;
1490
                    ii[0] = T71 + T7a;
1491
                    ri[WS(rs, 24)] = T6X - T70;
1492
                    ii[WS(rs, 24)] = T7c - T7b;
1493
                    ri[WS(rs, 8)] = T6X + T70;
1494
                    ii[WS(rs, 8)] = T7b + T7c;
1495
               }
1496
               {
1497
                    E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
1498
                    {
1499
                         E T6D, T6G, T7e, T7f;
1500
                         T6D = Tj - TG;
1501
                         T6G = T6E - T6F;
1502
                         T6H = T6D + T6G;
1503
                         T6T = T6D - T6G;
1504
                         T7e = T1r - T14;
1505
                         T7f = T78 - T73;
1506
                         T7g = T7e + T7f;
1507
                         T7i = T7f - T7e;
1508
                    }
1509
                    {
1510
                         E T6I, T6L, T6N, T6Q;
1511
                         T6I = T1Q - T2d;
1512
                         T6L = T6J - T6K;
1513
                         T6M = T6I + T6L;
1514
                         T6U = T6L - T6I;
1515
                         T6N = T2B - T2Y;
1516
                         T6Q = T6O - T6P;
1517
                         T6R = T6N - T6Q;
1518
                         T6V = T6N + T6Q;
1519
                    }
1520
                    {
1521
                         E T6S, T7d, T6W, T7h;
1522
                         T6S = KP707106781 * (T6M + T6R);
1523
                         ri[WS(rs, 20)] = T6H - T6S;
1524
                         ri[WS(rs, 4)] = T6H + T6S;
1525
                         T7d = KP707106781 * (T6U + T6V);
1526
                         ii[WS(rs, 4)] = T7d + T7g;
1527
                         ii[WS(rs, 20)] = T7g - T7d;
1528
                         T6W = KP707106781 * (T6U - T6V);
1529
                         ri[WS(rs, 28)] = T6T - T6W;
1530
                         ri[WS(rs, 12)] = T6T + T6W;
1531
                         T7h = KP707106781 * (T6R - T6M);
1532
                         ii[WS(rs, 12)] = T7h + T7i;
1533
                         ii[WS(rs, 28)] = T7i - T7h;
1534
                    }
1535
               }
1536
               {
1537
                    E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
1538
                    E T6l;
1539
                    {
1540
                         E T5O, T5T, T60, T65;
1541
                         T5J = T5F - T5I;
1542
                         T7n = T7l + T7m;
1543
                         T7t = T7m - T7l;
1544
                         T6n = T5F + T5I;
1545
                         T5O = T5M - T5N;
1546
                         T5T = T5P + T5S;
1547
                         T5U = KP707106781 * (T5O - T5T);
1548
                         T7k = KP707106781 * (T5O + T5T);
1549
                         {
1550
                              E T6v, T6w, T6o, T6p;
1551
                              T6v = T67 + T6a;
1552
                              T6w = T6e + T6f;
1553
                              T6x = FNMS(KP382683432, T6w, KP923879532 * T6v);
1554
                              T6B = FMA(KP923879532, T6w, KP382683432 * T6v);
1555
                              T6o = T5N + T5M;
1556
                              T6p = T5P - T5S;
1557
                              T6q = KP707106781 * (T6o + T6p);
1558
                              T7s = KP707106781 * (T6p - T6o);
1559
                         }
1560
                         T60 = T5Y - T5Z;
1561
                         T65 = T61 - T64;
1562
                         T66 = FMA(KP923879532, T60, KP382683432 * T65);
1563
                         T6k = FNMS(KP923879532, T65, KP382683432 * T60);
1564
                         {
1565
                              E T6s, T6t, T6b, T6g;
1566
                              T6s = T5Y + T5Z;
1567
                              T6t = T61 + T64;
1568
                              T6u = FMA(KP382683432, T6s, KP923879532 * T6t);
1569
                              T6A = FNMS(KP382683432, T6t, KP923879532 * T6s);
1570
                              T6b = T67 - T6a;
1571
                              T6g = T6e - T6f;
1572
                              T6h = FNMS(KP923879532, T6g, KP382683432 * T6b);
1573
                              T6l = FMA(KP382683432, T6g, KP923879532 * T6b);
1574
                         }
1575
                    }
1576
                    {
1577
                         E T5V, T6i, T7r, T7u;
1578
                         T5V = T5J + T5U;
1579
                         T6i = T66 + T6h;
1580
                         ri[WS(rs, 22)] = T5V - T6i;
1581
                         ri[WS(rs, 6)] = T5V + T6i;
1582
                         T7r = T6k + T6l;
1583
                         T7u = T7s + T7t;
1584
                         ii[WS(rs, 6)] = T7r + T7u;
1585
                         ii[WS(rs, 22)] = T7u - T7r;
1586
                    }
1587
                    {
1588
                         E T6j, T6m, T7v, T7w;
1589
                         T6j = T5J - T5U;
1590
                         T6m = T6k - T6l;
1591
                         ri[WS(rs, 30)] = T6j - T6m;
1592
                         ri[WS(rs, 14)] = T6j + T6m;
1593
                         T7v = T6h - T66;
1594
                         T7w = T7t - T7s;
1595
                         ii[WS(rs, 14)] = T7v + T7w;
1596
                         ii[WS(rs, 30)] = T7w - T7v;
1597
                    }
1598
                    {
1599
                         E T6r, T6y, T7j, T7o;
1600
                         T6r = T6n + T6q;
1601
                         T6y = T6u + T6x;
1602
                         ri[WS(rs, 18)] = T6r - T6y;
1603
                         ri[WS(rs, 2)] = T6r + T6y;
1604
                         T7j = T6A + T6B;
1605
                         T7o = T7k + T7n;
1606
                         ii[WS(rs, 2)] = T7j + T7o;
1607
                         ii[WS(rs, 18)] = T7o - T7j;
1608
                    }
1609
                    {
1610
                         E T6z, T6C, T7p, T7q;
1611
                         T6z = T6n - T6q;
1612
                         T6C = T6A - T6B;
1613
                         ri[WS(rs, 26)] = T6z - T6C;
1614
                         ri[WS(rs, 10)] = T6z + T6C;
1615
                         T7p = T6x - T6u;
1616
                         T7q = T7n - T7k;
1617
                         ii[WS(rs, 10)] = T7p + T7q;
1618
                         ii[WS(rs, 26)] = T7q - T7p;
1619
                    }
1620
               }
1621
               {
1622
                    E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
1623
                    E T4B, T3g, T7P;
1624
                    T3g = KP707106781 * (T3a - T3f);
1625
                    T3h = T35 - T3g;
1626
                    T4D = T35 + T3g;
1627
                    T7P = KP707106781 * (T4V - T4U);
1628
                    T7R = T7P + T7Q;
1629
                    T7X = T7Q - T7P;
1630
                    {
1631
                         E T3s, T3D, T4L, T4M;
1632
                         T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
1633
                         T3D = FMA(KP382683432, T3x, KP923879532 * T3C);
1634
                         T3E = T3s - T3D;
1635
                         T7O = T3s + T3D;
1636
                         T4L = T4b + T4m;
1637
                         T4M = T4s + T4v;
1638
                         T4N = FNMS(KP555570233, T4M, KP831469612 * T4L);
1639
                         T4R = FMA(KP831469612, T4M, KP555570233 * T4L);
1640
                    }
1641
                    {
1642
                         E T3W, T45, T4E, T4F;
1643
                         T3W = T3K - T3V;
1644
                         T45 = T41 - T44;
1645
                         T46 = FMA(KP980785280, T3W, KP195090322 * T45);
1646
                         T4A = FNMS(KP980785280, T45, KP195090322 * T3W);
1647
                         T4E = FMA(KP923879532, T3m, KP382683432 * T3r);
1648
                         T4F = FNMS(KP923879532, T3x, KP382683432 * T3C);
1649
                         T4G = T4E + T4F;
1650
                         T7W = T4F - T4E;
1651
                    }
1652
                    {
1653
                         E T4I, T4J, T4n, T4w;
1654
                         T4I = T3K + T3V;
1655
                         T4J = T41 + T44;
1656
                         T4K = FMA(KP555570233, T4I, KP831469612 * T4J);
1657
                         T4Q = FNMS(KP555570233, T4J, KP831469612 * T4I);
1658
                         T4n = T4b - T4m;
1659
                         T4w = T4s - T4v;
1660
                         T4x = FNMS(KP980785280, T4w, KP195090322 * T4n);
1661
                         T4B = FMA(KP195090322, T4w, KP980785280 * T4n);
1662
                    }
1663
                    {
1664
                         E T3F, T4y, T7V, T7Y;
1665
                         T3F = T3h + T3E;
1666
                         T4y = T46 + T4x;
1667
                         ri[WS(rs, 23)] = T3F - T4y;
1668
                         ri[WS(rs, 7)] = T3F + T4y;
1669
                         T7V = T4A + T4B;
1670
                         T7Y = T7W + T7X;
1671
                         ii[WS(rs, 7)] = T7V + T7Y;
1672
                         ii[WS(rs, 23)] = T7Y - T7V;
1673
                    }
1674
                    {
1675
                         E T4z, T4C, T7Z, T80;
1676
                         T4z = T3h - T3E;
1677
                         T4C = T4A - T4B;
1678
                         ri[WS(rs, 31)] = T4z - T4C;
1679
                         ri[WS(rs, 15)] = T4z + T4C;
1680
                         T7Z = T4x - T46;
1681
                         T80 = T7X - T7W;
1682
                         ii[WS(rs, 15)] = T7Z + T80;
1683
                         ii[WS(rs, 31)] = T80 - T7Z;
1684
                    }
1685
                    {
1686
                         E T4H, T4O, T7N, T7S;
1687
                         T4H = T4D + T4G;
1688
                         T4O = T4K + T4N;
1689
                         ri[WS(rs, 19)] = T4H - T4O;
1690
                         ri[WS(rs, 3)] = T4H + T4O;
1691
                         T7N = T4Q + T4R;
1692
                         T7S = T7O + T7R;
1693
                         ii[WS(rs, 3)] = T7N + T7S;
1694
                         ii[WS(rs, 19)] = T7S - T7N;
1695
                    }
1696
                    {
1697
                         E T4P, T4S, T7T, T7U;
1698
                         T4P = T4D - T4G;
1699
                         T4S = T4Q - T4R;
1700
                         ri[WS(rs, 27)] = T4P - T4S;
1701
                         ri[WS(rs, 11)] = T4P + T4S;
1702
                         T7T = T4N - T4K;
1703
                         T7U = T7R - T7O;
1704
                         ii[WS(rs, 11)] = T7T + T7U;
1705
                         ii[WS(rs, 27)] = T7U - T7T;
1706
                    }
1707
               }
1708
               {
1709
                    E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
1710
                    E T5n, T4W, T7z;
1711
                    T4W = KP707106781 * (T4U + T4V);
1712
                    T4X = T4T - T4W;
1713
                    T5p = T4T + T4W;
1714
                    T7z = KP707106781 * (T3a + T3f);
1715
                    T7D = T7z + T7C;
1716
                    T7J = T7C - T7z;
1717
                    {
1718
                         E T50, T53, T5x, T5y;
1719
                         T50 = FNMS(KP382683432, T4Z, KP923879532 * T4Y);
1720
                         T53 = FMA(KP923879532, T51, KP382683432 * T52);
1721
                         T54 = T50 - T53;
1722
                         T7y = T50 + T53;
1723
                         T5x = T5d + T5e;
1724
                         T5y = T5g + T5h;
1725
                         T5z = FNMS(KP195090322, T5y, KP980785280 * T5x);
1726
                         T5D = FMA(KP195090322, T5x, KP980785280 * T5y);
1727
                    }
1728
                    {
1729
                         E T58, T5b, T5q, T5r;
1730
                         T58 = T56 - T57;
1731
                         T5b = T59 - T5a;
1732
                         T5c = FMA(KP555570233, T58, KP831469612 * T5b);
1733
                         T5m = FNMS(KP831469612, T58, KP555570233 * T5b);
1734
                         T5q = FMA(KP382683432, T4Y, KP923879532 * T4Z);
1735
                         T5r = FNMS(KP382683432, T51, KP923879532 * T52);
1736
                         T5s = T5q + T5r;
1737
                         T7I = T5r - T5q;
1738
                    }
1739
                    {
1740
                         E T5u, T5v, T5f, T5i;
1741
                         T5u = T56 + T57;
1742
                         T5v = T59 + T5a;
1743
                         T5w = FMA(KP980785280, T5u, KP195090322 * T5v);
1744
                         T5C = FNMS(KP195090322, T5u, KP980785280 * T5v);
1745
                         T5f = T5d - T5e;
1746
                         T5i = T5g - T5h;
1747
                         T5j = FNMS(KP831469612, T5i, KP555570233 * T5f);
1748
                         T5n = FMA(KP831469612, T5f, KP555570233 * T5i);
1749
                    }
1750
                    {
1751
                         E T55, T5k, T7H, T7K;
1752
                         T55 = T4X + T54;
1753
                         T5k = T5c + T5j;
1754
                         ri[WS(rs, 21)] = T55 - T5k;
1755
                         ri[WS(rs, 5)] = T55 + T5k;
1756
                         T7H = T5m + T5n;
1757
                         T7K = T7I + T7J;
1758
                         ii[WS(rs, 5)] = T7H + T7K;
1759
                         ii[WS(rs, 21)] = T7K - T7H;
1760
                    }
1761
                    {
1762
                         E T5l, T5o, T7L, T7M;
1763
                         T5l = T4X - T54;
1764
                         T5o = T5m - T5n;
1765
                         ri[WS(rs, 29)] = T5l - T5o;
1766
                         ri[WS(rs, 13)] = T5l + T5o;
1767
                         T7L = T5j - T5c;
1768
                         T7M = T7J - T7I;
1769
                         ii[WS(rs, 13)] = T7L + T7M;
1770
                         ii[WS(rs, 29)] = T7M - T7L;
1771
                    }
1772
                    {
1773
                         E T5t, T5A, T7x, T7E;
1774
                         T5t = T5p + T5s;
1775
                         T5A = T5w + T5z;
1776
                         ri[WS(rs, 17)] = T5t - T5A;
1777
                         ri[WS(rs, 1)] = T5t + T5A;
1778
                         T7x = T5C + T5D;
1779
                         T7E = T7y + T7D;
1780
                         ii[WS(rs, 1)] = T7x + T7E;
1781
                         ii[WS(rs, 17)] = T7E - T7x;
1782
                    }
1783
                    {
1784
                         E T5B, T5E, T7F, T7G;
1785
                         T5B = T5p - T5s;
1786
                         T5E = T5C - T5D;
1787
                         ri[WS(rs, 25)] = T5B - T5E;
1788
                         ri[WS(rs, 9)] = T5B + T5E;
1789
                         T7F = T5z - T5w;
1790
                         T7G = T7D - T7y;
1791
                         ii[WS(rs, 9)] = T7F + T7G;
1792
                         ii[WS(rs, 25)] = T7G - T7F;
1793
                    }
1794
               }
1795
          }
1796
     }
1797
}
1798

    
1799
static const tw_instr twinstr[] = {
1800
     {TW_FULL, 0, 32},
1801
     {TW_NEXT, 1, 0}
1802
};
1803

    
1804
static const ct_desc desc = { 32, "t1_32", twinstr, &GENUS, {340, 114, 94, 0}, 0, 0, 0 };
1805

    
1806
void X(codelet_t1_32) (planner *p) {
1807
     X(kdft_dit_register) (p, t1_32, &desc);
1808
}
1809
#endif