To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t2_32.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (51.1 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:20 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -name t2_32 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 488 FP additions, 350 FP multiplications,
32
 * (or, 236 additions, 98 multiplications, 252 fused multiply/add),
33
 * 164 stack variables, 7 constants, and 128 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t2_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
40
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
41
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
42
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
43
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
44
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
45
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
46
     {
47
          INT m;
48
          for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
49
               E T2, T8, T3, T6, Te, Ti, T5, T7, TJ, Tb, TM, Tc, Ts, T23, T1w;
50
               E T19, TA, TE, T1s, T1N, T1o, T1C, T1F, T1K, T15, T11, T2F, T31, T2J, T34;
51
               E T3f, T3z, T3j, T3C, Tw, T3M, T3Q, T1z, T2s, T2w, T1d, T3n, T3r, T26, T2T;
52
               E T2X, Th, TR, TP, Td, Tj, TW, Tn, TS, T1U, T2b, T29, T1R, T1V, T2g;
53
               E T1Z, T2c;
54
               {
55
                    E Tz, T1n, T10, TD, T1r, T14, T9, T1Q, Tv, T1c;
56
                    {
57
                         E T4, T18, Ta, Tr;
58
                         T2 = W[0];
59
                         T8 = W[4];
60
                         T3 = W[2];
61
                         T6 = W[3];
62
                         T4 = T2 * T3;
63
                         T18 = T3 * T8;
64
                         Ta = T2 * T6;
65
                         Tr = T2 * T8;
66
                         Te = W[6];
67
                         Tz = T3 * Te;
68
                         T1n = T8 * Te;
69
                         T10 = T2 * Te;
70
                         Ti = W[7];
71
                         TD = T3 * Ti;
72
                         T1r = T8 * Ti;
73
                         T14 = T2 * Ti;
74
                         T5 = W[1];
75
                         T7 = FMA(T5, T6, T4);
76
                         TJ = FNMS(T5, T6, T4);
77
                         T9 = T7 * T8;
78
                         T1Q = TJ * T8;
79
                         Tb = FNMS(T5, T3, Ta);
80
                         TM = FMA(T5, T3, Ta);
81
                         Tc = W[5];
82
                         Tv = T2 * Tc;
83
                         T1c = T3 * Tc;
84
                         Ts = FMA(T5, Tc, Tr);
85
                         T23 = FMA(T6, Tc, T18);
86
                         T1w = FNMS(T5, Tc, Tr);
87
                         T19 = FNMS(T6, Tc, T18);
88
                    }
89
                    TA = FMA(T6, Ti, Tz);
90
                    TE = FNMS(T6, Te, TD);
91
                    T1s = FNMS(Tc, Te, T1r);
92
                    T1N = FMA(T6, Te, TD);
93
                    T1o = FMA(Tc, Ti, T1n);
94
                    T1C = FMA(T5, Ti, T10);
95
                    T1F = FNMS(T5, Te, T14);
96
                    T1K = FNMS(T6, Ti, Tz);
97
                    T15 = FMA(T5, Te, T14);
98
                    T11 = FNMS(T5, Ti, T10);
99
                    {
100
                         E T2E, T2I, T2S, T2W;
101
                         T2E = T7 * Te;
102
                         T2F = FMA(Tb, Ti, T2E);
103
                         T31 = FNMS(Tb, Ti, T2E);
104
                         T2I = T7 * Ti;
105
                         T2J = FNMS(Tb, Te, T2I);
106
                         T34 = FMA(Tb, Te, T2I);
107
                         {
108
                              E T3e, T3i, T3L, T3P;
109
                              T3e = TJ * Te;
110
                              T3f = FNMS(TM, Ti, T3e);
111
                              T3z = FMA(TM, Ti, T3e);
112
                              T3i = TJ * Ti;
113
                              T3j = FMA(TM, Te, T3i);
114
                              T3C = FNMS(TM, Te, T3i);
115
                              T3L = Ts * Te;
116
                              T3P = Ts * Ti;
117
                              Tw = FNMS(T5, T8, Tv);
118
                              T3M = FMA(Tw, Ti, T3L);
119
                              T3Q = FNMS(Tw, Te, T3P);
120
                         }
121
                         {
122
                              E T2r, T2v, T3m, T3q;
123
                              T2r = T1w * Te;
124
                              T2v = T1w * Ti;
125
                              T1z = FMA(T5, T8, Tv);
126
                              T2s = FMA(T1z, Ti, T2r);
127
                              T2w = FNMS(T1z, Te, T2v);
128
                              T3m = T19 * Te;
129
                              T3q = T19 * Ti;
130
                              T1d = FMA(T6, T8, T1c);
131
                              T3n = FMA(T1d, Ti, T3m);
132
                              T3r = FNMS(T1d, Te, T3q);
133
                         }
134
                         T2S = T23 * Te;
135
                         T2W = T23 * Ti;
136
                         T26 = FNMS(T6, T8, T1c);
137
                         T2T = FMA(T26, Ti, T2S);
138
                         T2X = FNMS(T26, Te, T2W);
139
                         {
140
                              E TQ, TV, Tf, Tm, Tg;
141
                              Tg = T7 * Tc;
142
                              Th = FMA(Tb, T8, Tg);
143
                              TR = FNMS(Tb, T8, Tg);
144
                              TP = FMA(Tb, Tc, T9);
145
                              TQ = TP * Te;
146
                              TV = TP * Ti;
147
                              Td = FNMS(Tb, Tc, T9);
148
                              Tf = Td * Te;
149
                              Tm = Td * Ti;
150
                              Tj = FMA(Th, Ti, Tf);
151
                              TW = FNMS(TR, Te, TV);
152
                              Tn = FNMS(Th, Te, Tm);
153
                              TS = FMA(TR, Ti, TQ);
154
                         }
155
                         {
156
                              E T2a, T2f, T1S, T1Y, T1T;
157
                              T1T = TJ * Tc;
158
                              T1U = FMA(TM, T8, T1T);
159
                              T2b = FNMS(TM, T8, T1T);
160
                              T29 = FMA(TM, Tc, T1Q);
161
                              T2a = T29 * Te;
162
                              T2f = T29 * Ti;
163
                              T1R = FNMS(TM, Tc, T1Q);
164
                              T1S = T1R * Te;
165
                              T1Y = T1R * Ti;
166
                              T1V = FMA(T1U, Ti, T1S);
167
                              T2g = FNMS(T2b, Te, T2f);
168
                              T1Z = FNMS(T1U, Te, T1Y);
169
                              T2c = FMA(T2b, Ti, T2a);
170
                         }
171
                    }
172
               }
173
               {
174
                    E Tq, T46, T8H, T97, TH, T98, T4b, T8D, TZ, T7f, T4j, T6t, T1g, T7g, T4q;
175
                    E T6u, T1v, T1I, T7m, T7j, T7k, T7l, T4z, T6x, T4G, T6y, T22, T2j, T7o, T7p;
176
                    E T7q, T7r, T4O, T6A, T4V, T6B, T3G, T7L, T7I, T8n, T5E, T6P, T61, T6M, T2N;
177
                    E T7A, T7x, T8i, T55, T6I, T5s, T6F, T43, T7J, T7O, T8o, T5L, T62, T5S, T63;
178
                    E T3c, T7y, T7D, T8j, T5c, T5t, T5j, T5u;
179
                    {
180
                         E T1, T8G, Tk, Tl, To, T8E, Tp, T8F;
181
                         T1 = ri[0];
182
                         T8G = ii[0];
183
                         Tk = ri[WS(rs, 16)];
184
                         Tl = Tj * Tk;
185
                         To = ii[WS(rs, 16)];
186
                         T8E = Tj * To;
187
                         Tp = FMA(Tn, To, Tl);
188
                         Tq = T1 + Tp;
189
                         T46 = T1 - Tp;
190
                         T8F = FNMS(Tn, Tk, T8E);
191
                         T8H = T8F + T8G;
192
                         T97 = T8G - T8F;
193
                    }
194
                    {
195
                         E Tt, Tu, Tx, T47, TB, TC, TF, T49;
196
                         Tt = ri[WS(rs, 8)];
197
                         Tu = Ts * Tt;
198
                         Tx = ii[WS(rs, 8)];
199
                         T47 = Ts * Tx;
200
                         TB = ri[WS(rs, 24)];
201
                         TC = TA * TB;
202
                         TF = ii[WS(rs, 24)];
203
                         T49 = TA * TF;
204
                         {
205
                              E Ty, TG, T48, T4a;
206
                              Ty = FMA(Tw, Tx, Tu);
207
                              TG = FMA(TE, TF, TC);
208
                              TH = Ty + TG;
209
                              T98 = Ty - TG;
210
                              T48 = FNMS(Tw, Tt, T47);
211
                              T4a = FNMS(TE, TB, T49);
212
                              T4b = T48 - T4a;
213
                              T8D = T48 + T4a;
214
                         }
215
                    }
216
                    {
217
                         E TO, T4f, TY, T4h, T4d, T4i;
218
                         {
219
                              E TK, TL, TN, T4e;
220
                              TK = ri[WS(rs, 4)];
221
                              TL = TJ * TK;
222
                              TN = ii[WS(rs, 4)];
223
                              T4e = TJ * TN;
224
                              TO = FMA(TM, TN, TL);
225
                              T4f = FNMS(TM, TK, T4e);
226
                         }
227
                         {
228
                              E TT, TU, TX, T4g;
229
                              TT = ri[WS(rs, 20)];
230
                              TU = TS * TT;
231
                              TX = ii[WS(rs, 20)];
232
                              T4g = TS * TX;
233
                              TY = FMA(TW, TX, TU);
234
                              T4h = FNMS(TW, TT, T4g);
235
                         }
236
                         TZ = TO + TY;
237
                         T7f = T4f + T4h;
238
                         T4d = TO - TY;
239
                         T4i = T4f - T4h;
240
                         T4j = T4d + T4i;
241
                         T6t = T4i - T4d;
242
                    }
243
                    {
244
                         E T17, T4m, T1f, T4o, T4k, T4p;
245
                         {
246
                              E T12, T13, T16, T4l;
247
                              T12 = ri[WS(rs, 28)];
248
                              T13 = T11 * T12;
249
                              T16 = ii[WS(rs, 28)];
250
                              T4l = T11 * T16;
251
                              T17 = FMA(T15, T16, T13);
252
                              T4m = FNMS(T15, T12, T4l);
253
                         }
254
                         {
255
                              E T1a, T1b, T1e, T4n;
256
                              T1a = ri[WS(rs, 12)];
257
                              T1b = T19 * T1a;
258
                              T1e = ii[WS(rs, 12)];
259
                              T4n = T19 * T1e;
260
                              T1f = FMA(T1d, T1e, T1b);
261
                              T4o = FNMS(T1d, T1a, T4n);
262
                         }
263
                         T1g = T17 + T1f;
264
                         T7g = T4m + T4o;
265
                         T4k = T17 - T1f;
266
                         T4p = T4m - T4o;
267
                         T4q = T4k - T4p;
268
                         T6u = T4k + T4p;
269
                    }
270
                    {
271
                         E T1m, T4u, T1H, T4E, T1u, T4w, T1B, T4C;
272
                         {
273
                              E T1j, T1k, T1l, T4t;
274
                              T1j = ri[WS(rs, 2)];
275
                              T1k = T7 * T1j;
276
                              T1l = ii[WS(rs, 2)];
277
                              T4t = T7 * T1l;
278
                              T1m = FMA(Tb, T1l, T1k);
279
                              T4u = FNMS(Tb, T1j, T4t);
280
                         }
281
                         {
282
                              E T1D, T1E, T1G, T4D;
283
                              T1D = ri[WS(rs, 26)];
284
                              T1E = T1C * T1D;
285
                              T1G = ii[WS(rs, 26)];
286
                              T4D = T1C * T1G;
287
                              T1H = FMA(T1F, T1G, T1E);
288
                              T4E = FNMS(T1F, T1D, T4D);
289
                         }
290
                         {
291
                              E T1p, T1q, T1t, T4v;
292
                              T1p = ri[WS(rs, 18)];
293
                              T1q = T1o * T1p;
294
                              T1t = ii[WS(rs, 18)];
295
                              T4v = T1o * T1t;
296
                              T1u = FMA(T1s, T1t, T1q);
297
                              T4w = FNMS(T1s, T1p, T4v);
298
                         }
299
                         {
300
                              E T1x, T1y, T1A, T4B;
301
                              T1x = ri[WS(rs, 10)];
302
                              T1y = T1w * T1x;
303
                              T1A = ii[WS(rs, 10)];
304
                              T4B = T1w * T1A;
305
                              T1B = FMA(T1z, T1A, T1y);
306
                              T4C = FNMS(T1z, T1x, T4B);
307
                         }
308
                         T1v = T1m + T1u;
309
                         T1I = T1B + T1H;
310
                         T7m = T1v - T1I;
311
                         T7j = T4u + T4w;
312
                         T7k = T4C + T4E;
313
                         T7l = T7j - T7k;
314
                         {
315
                              E T4x, T4y, T4A, T4F;
316
                              T4x = T4u - T4w;
317
                              T4y = T1B - T1H;
318
                              T4z = T4x - T4y;
319
                              T6x = T4x + T4y;
320
                              T4A = T1m - T1u;
321
                              T4F = T4C - T4E;
322
                              T4G = T4A + T4F;
323
                              T6y = T4A - T4F;
324
                         }
325
                    }
326
                    {
327
                         E T1P, T4J, T2i, T4T, T21, T4L, T28, T4R;
328
                         {
329
                              E T1L, T1M, T1O, T4I;
330
                              T1L = ri[WS(rs, 30)];
331
                              T1M = T1K * T1L;
332
                              T1O = ii[WS(rs, 30)];
333
                              T4I = T1K * T1O;
334
                              T1P = FMA(T1N, T1O, T1M);
335
                              T4J = FNMS(T1N, T1L, T4I);
336
                         }
337
                         {
338
                              E T2d, T2e, T2h, T4S;
339
                              T2d = ri[WS(rs, 22)];
340
                              T2e = T2c * T2d;
341
                              T2h = ii[WS(rs, 22)];
342
                              T4S = T2c * T2h;
343
                              T2i = FMA(T2g, T2h, T2e);
344
                              T4T = FNMS(T2g, T2d, T4S);
345
                         }
346
                         {
347
                              E T1W, T1X, T20, T4K;
348
                              T1W = ri[WS(rs, 14)];
349
                              T1X = T1V * T1W;
350
                              T20 = ii[WS(rs, 14)];
351
                              T4K = T1V * T20;
352
                              T21 = FMA(T1Z, T20, T1X);
353
                              T4L = FNMS(T1Z, T1W, T4K);
354
                         }
355
                         {
356
                              E T24, T25, T27, T4Q;
357
                              T24 = ri[WS(rs, 6)];
358
                              T25 = T23 * T24;
359
                              T27 = ii[WS(rs, 6)];
360
                              T4Q = T23 * T27;
361
                              T28 = FMA(T26, T27, T25);
362
                              T4R = FNMS(T26, T24, T4Q);
363
                         }
364
                         T22 = T1P + T21;
365
                         T2j = T28 + T2i;
366
                         T7o = T22 - T2j;
367
                         T7p = T4J + T4L;
368
                         T7q = T4R + T4T;
369
                         T7r = T7p - T7q;
370
                         {
371
                              E T4M, T4N, T4P, T4U;
372
                              T4M = T4J - T4L;
373
                              T4N = T28 - T2i;
374
                              T4O = T4M - T4N;
375
                              T6A = T4M + T4N;
376
                              T4P = T1P - T21;
377
                              T4U = T4R - T4T;
378
                              T4V = T4P + T4U;
379
                              T6B = T4P - T4U;
380
                         }
381
                    }
382
                    {
383
                         E T3l, T5z, T3E, T5Z, T3t, T5B, T3y, T5X;
384
                         {
385
                              E T3g, T3h, T3k, T5y;
386
                              T3g = ri[WS(rs, 31)];
387
                              T3h = T3f * T3g;
388
                              T3k = ii[WS(rs, 31)];
389
                              T5y = T3f * T3k;
390
                              T3l = FMA(T3j, T3k, T3h);
391
                              T5z = FNMS(T3j, T3g, T5y);
392
                         }
393
                         {
394
                              E T3A, T3B, T3D, T5Y;
395
                              T3A = ri[WS(rs, 23)];
396
                              T3B = T3z * T3A;
397
                              T3D = ii[WS(rs, 23)];
398
                              T5Y = T3z * T3D;
399
                              T3E = FMA(T3C, T3D, T3B);
400
                              T5Z = FNMS(T3C, T3A, T5Y);
401
                         }
402
                         {
403
                              E T3o, T3p, T3s, T5A;
404
                              T3o = ri[WS(rs, 15)];
405
                              T3p = T3n * T3o;
406
                              T3s = ii[WS(rs, 15)];
407
                              T5A = T3n * T3s;
408
                              T3t = FMA(T3r, T3s, T3p);
409
                              T5B = FNMS(T3r, T3o, T5A);
410
                         }
411
                         {
412
                              E T3v, T3w, T3x, T5W;
413
                              T3v = ri[WS(rs, 7)];
414
                              T3w = TP * T3v;
415
                              T3x = ii[WS(rs, 7)];
416
                              T5W = TP * T3x;
417
                              T3y = FMA(TR, T3x, T3w);
418
                              T5X = FNMS(TR, T3v, T5W);
419
                         }
420
                         {
421
                              E T3u, T3F, T7G, T7H;
422
                              T3u = T3l + T3t;
423
                              T3F = T3y + T3E;
424
                              T3G = T3u + T3F;
425
                              T7L = T3u - T3F;
426
                              T7G = T5z + T5B;
427
                              T7H = T5X + T5Z;
428
                              T7I = T7G - T7H;
429
                              T8n = T7G + T7H;
430
                         }
431
                         {
432
                              E T5C, T5D, T5V, T60;
433
                              T5C = T5z - T5B;
434
                              T5D = T3y - T3E;
435
                              T5E = T5C - T5D;
436
                              T6P = T5C + T5D;
437
                              T5V = T3l - T3t;
438
                              T60 = T5X - T5Z;
439
                              T61 = T5V + T60;
440
                              T6M = T5V - T60;
441
                         }
442
                    }
443
                    {
444
                         E T2q, T50, T2L, T5q, T2y, T52, T2D, T5o;
445
                         {
446
                              E T2n, T2o, T2p, T4Z;
447
                              T2n = ri[WS(rs, 1)];
448
                              T2o = T2 * T2n;
449
                              T2p = ii[WS(rs, 1)];
450
                              T4Z = T2 * T2p;
451
                              T2q = FMA(T5, T2p, T2o);
452
                              T50 = FNMS(T5, T2n, T4Z);
453
                         }
454
                         {
455
                              E T2G, T2H, T2K, T5p;
456
                              T2G = ri[WS(rs, 25)];
457
                              T2H = T2F * T2G;
458
                              T2K = ii[WS(rs, 25)];
459
                              T5p = T2F * T2K;
460
                              T2L = FMA(T2J, T2K, T2H);
461
                              T5q = FNMS(T2J, T2G, T5p);
462
                         }
463
                         {
464
                              E T2t, T2u, T2x, T51;
465
                              T2t = ri[WS(rs, 17)];
466
                              T2u = T2s * T2t;
467
                              T2x = ii[WS(rs, 17)];
468
                              T51 = T2s * T2x;
469
                              T2y = FMA(T2w, T2x, T2u);
470
                              T52 = FNMS(T2w, T2t, T51);
471
                         }
472
                         {
473
                              E T2A, T2B, T2C, T5n;
474
                              T2A = ri[WS(rs, 9)];
475
                              T2B = T8 * T2A;
476
                              T2C = ii[WS(rs, 9)];
477
                              T5n = T8 * T2C;
478
                              T2D = FMA(Tc, T2C, T2B);
479
                              T5o = FNMS(Tc, T2A, T5n);
480
                         }
481
                         {
482
                              E T2z, T2M, T7v, T7w;
483
                              T2z = T2q + T2y;
484
                              T2M = T2D + T2L;
485
                              T2N = T2z + T2M;
486
                              T7A = T2z - T2M;
487
                              T7v = T50 + T52;
488
                              T7w = T5o + T5q;
489
                              T7x = T7v - T7w;
490
                              T8i = T7v + T7w;
491
                         }
492
                         {
493
                              E T53, T54, T5m, T5r;
494
                              T53 = T50 - T52;
495
                              T54 = T2D - T2L;
496
                              T55 = T53 - T54;
497
                              T6I = T53 + T54;
498
                              T5m = T2q - T2y;
499
                              T5r = T5o - T5q;
500
                              T5s = T5m + T5r;
501
                              T6F = T5m - T5r;
502
                         }
503
                    }
504
                    {
505
                         E T3K, T5G, T41, T5Q, T3S, T5I, T3X, T5O;
506
                         {
507
                              E T3H, T3I, T3J, T5F;
508
                              T3H = ri[WS(rs, 3)];
509
                              T3I = T3 * T3H;
510
                              T3J = ii[WS(rs, 3)];
511
                              T5F = T3 * T3J;
512
                              T3K = FMA(T6, T3J, T3I);
513
                              T5G = FNMS(T6, T3H, T5F);
514
                         }
515
                         {
516
                              E T3Y, T3Z, T40, T5P;
517
                              T3Y = ri[WS(rs, 11)];
518
                              T3Z = Td * T3Y;
519
                              T40 = ii[WS(rs, 11)];
520
                              T5P = Td * T40;
521
                              T41 = FMA(Th, T40, T3Z);
522
                              T5Q = FNMS(Th, T3Y, T5P);
523
                         }
524
                         {
525
                              E T3N, T3O, T3R, T5H;
526
                              T3N = ri[WS(rs, 19)];
527
                              T3O = T3M * T3N;
528
                              T3R = ii[WS(rs, 19)];
529
                              T5H = T3M * T3R;
530
                              T3S = FMA(T3Q, T3R, T3O);
531
                              T5I = FNMS(T3Q, T3N, T5H);
532
                         }
533
                         {
534
                              E T3U, T3V, T3W, T5N;
535
                              T3U = ri[WS(rs, 27)];
536
                              T3V = Te * T3U;
537
                              T3W = ii[WS(rs, 27)];
538
                              T5N = Te * T3W;
539
                              T3X = FMA(Ti, T3W, T3V);
540
                              T5O = FNMS(Ti, T3U, T5N);
541
                         }
542
                         {
543
                              E T3T, T42, T7M, T7N;
544
                              T3T = T3K + T3S;
545
                              T42 = T3X + T41;
546
                              T43 = T3T + T42;
547
                              T7J = T42 - T3T;
548
                              T7M = T5G + T5I;
549
                              T7N = T5O + T5Q;
550
                              T7O = T7M - T7N;
551
                              T8o = T7M + T7N;
552
                         }
553
                         {
554
                              E T5J, T5K, T5M, T5R;
555
                              T5J = T5G - T5I;
556
                              T5K = T3K - T3S;
557
                              T5L = T5J - T5K;
558
                              T62 = T5K + T5J;
559
                              T5M = T3X - T41;
560
                              T5R = T5O - T5Q;
561
                              T5S = T5M + T5R;
562
                              T63 = T5M - T5R;
563
                         }
564
                    }
565
                    {
566
                         E T2R, T57, T3a, T5h, T2Z, T59, T36, T5f;
567
                         {
568
                              E T2O, T2P, T2Q, T56;
569
                              T2O = ri[WS(rs, 5)];
570
                              T2P = T29 * T2O;
571
                              T2Q = ii[WS(rs, 5)];
572
                              T56 = T29 * T2Q;
573
                              T2R = FMA(T2b, T2Q, T2P);
574
                              T57 = FNMS(T2b, T2O, T56);
575
                         }
576
                         {
577
                              E T37, T38, T39, T5g;
578
                              T37 = ri[WS(rs, 13)];
579
                              T38 = T1R * T37;
580
                              T39 = ii[WS(rs, 13)];
581
                              T5g = T1R * T39;
582
                              T3a = FMA(T1U, T39, T38);
583
                              T5h = FNMS(T1U, T37, T5g);
584
                         }
585
                         {
586
                              E T2U, T2V, T2Y, T58;
587
                              T2U = ri[WS(rs, 21)];
588
                              T2V = T2T * T2U;
589
                              T2Y = ii[WS(rs, 21)];
590
                              T58 = T2T * T2Y;
591
                              T2Z = FMA(T2X, T2Y, T2V);
592
                              T59 = FNMS(T2X, T2U, T58);
593
                         }
594
                         {
595
                              E T32, T33, T35, T5e;
596
                              T32 = ri[WS(rs, 29)];
597
                              T33 = T31 * T32;
598
                              T35 = ii[WS(rs, 29)];
599
                              T5e = T31 * T35;
600
                              T36 = FMA(T34, T35, T33);
601
                              T5f = FNMS(T34, T32, T5e);
602
                         }
603
                         {
604
                              E T30, T3b, T7B, T7C;
605
                              T30 = T2R + T2Z;
606
                              T3b = T36 + T3a;
607
                              T3c = T30 + T3b;
608
                              T7y = T3b - T30;
609
                              T7B = T57 + T59;
610
                              T7C = T5f + T5h;
611
                              T7D = T7B - T7C;
612
                              T8j = T7B + T7C;
613
                         }
614
                         {
615
                              E T5a, T5b, T5d, T5i;
616
                              T5a = T57 - T59;
617
                              T5b = T2R - T2Z;
618
                              T5c = T5a - T5b;
619
                              T5t = T5b + T5a;
620
                              T5d = T36 - T3a;
621
                              T5i = T5f - T5h;
622
                              T5j = T5d + T5i;
623
                              T5u = T5d - T5i;
624
                         }
625
                    }
626
                    {
627
                         E T1i, T8c, T8z, T8A, T8J, T8O, T2l, T8N, T45, T8L, T8l, T8t, T8q, T8u, T8f;
628
                         E T8B;
629
                         {
630
                              E TI, T1h, T8x, T8y;
631
                              TI = Tq + TH;
632
                              T1h = TZ + T1g;
633
                              T1i = TI + T1h;
634
                              T8c = TI - T1h;
635
                              T8x = T8i + T8j;
636
                              T8y = T8n + T8o;
637
                              T8z = T8x - T8y;
638
                              T8A = T8x + T8y;
639
                         }
640
                         {
641
                              E T8C, T8I, T1J, T2k;
642
                              T8C = T7f + T7g;
643
                              T8I = T8D + T8H;
644
                              T8J = T8C + T8I;
645
                              T8O = T8I - T8C;
646
                              T1J = T1v + T1I;
647
                              T2k = T22 + T2j;
648
                              T2l = T1J + T2k;
649
                              T8N = T2k - T1J;
650
                         }
651
                         {
652
                              E T3d, T44, T8h, T8k;
653
                              T3d = T2N + T3c;
654
                              T44 = T3G + T43;
655
                              T45 = T3d + T44;
656
                              T8L = T44 - T3d;
657
                              T8h = T2N - T3c;
658
                              T8k = T8i - T8j;
659
                              T8l = T8h + T8k;
660
                              T8t = T8k - T8h;
661
                         }
662
                         {
663
                              E T8m, T8p, T8d, T8e;
664
                              T8m = T3G - T43;
665
                              T8p = T8n - T8o;
666
                              T8q = T8m - T8p;
667
                              T8u = T8m + T8p;
668
                              T8d = T7j + T7k;
669
                              T8e = T7p + T7q;
670
                              T8f = T8d - T8e;
671
                              T8B = T8d + T8e;
672
                         }
673
                         {
674
                              E T2m, T8K, T8w, T8M;
675
                              T2m = T1i + T2l;
676
                              ri[WS(rs, 16)] = T2m - T45;
677
                              ri[0] = T2m + T45;
678
                              T8K = T8B + T8J;
679
                              ii[0] = T8A + T8K;
680
                              ii[WS(rs, 16)] = T8K - T8A;
681
                              T8w = T1i - T2l;
682
                              ri[WS(rs, 24)] = T8w - T8z;
683
                              ri[WS(rs, 8)] = T8w + T8z;
684
                              T8M = T8J - T8B;
685
                              ii[WS(rs, 8)] = T8L + T8M;
686
                              ii[WS(rs, 24)] = T8M - T8L;
687
                         }
688
                         {
689
                              E T8g, T8r, T8P, T8Q;
690
                              T8g = T8c + T8f;
691
                              T8r = T8l + T8q;
692
                              ri[WS(rs, 20)] = FNMS(KP707106781, T8r, T8g);
693
                              ri[WS(rs, 4)] = FMA(KP707106781, T8r, T8g);
694
                              T8P = T8N + T8O;
695
                              T8Q = T8t + T8u;
696
                              ii[WS(rs, 4)] = FMA(KP707106781, T8Q, T8P);
697
                              ii[WS(rs, 20)] = FNMS(KP707106781, T8Q, T8P);
698
                         }
699
                         {
700
                              E T8s, T8v, T8R, T8S;
701
                              T8s = T8c - T8f;
702
                              T8v = T8t - T8u;
703
                              ri[WS(rs, 28)] = FNMS(KP707106781, T8v, T8s);
704
                              ri[WS(rs, 12)] = FMA(KP707106781, T8v, T8s);
705
                              T8R = T8O - T8N;
706
                              T8S = T8q - T8l;
707
                              ii[WS(rs, 12)] = FMA(KP707106781, T8S, T8R);
708
                              ii[WS(rs, 28)] = FNMS(KP707106781, T8S, T8R);
709
                         }
710
                    }
711
                    {
712
                         E T7i, T7W, T86, T8a, T8V, T91, T7t, T8W, T7F, T7T, T7Z, T92, T83, T89, T7Q;
713
                         E T7U;
714
                         {
715
                              E T7e, T7h, T84, T85;
716
                              T7e = Tq - TH;
717
                              T7h = T7f - T7g;
718
                              T7i = T7e - T7h;
719
                              T7W = T7e + T7h;
720
                              T84 = T7L + T7O;
721
                              T85 = T7I + T7J;
722
                              T86 = FNMS(KP414213562, T85, T84);
723
                              T8a = FMA(KP414213562, T84, T85);
724
                         }
725
                         {
726
                              E T8T, T8U, T7n, T7s;
727
                              T8T = T1g - TZ;
728
                              T8U = T8H - T8D;
729
                              T8V = T8T + T8U;
730
                              T91 = T8U - T8T;
731
                              T7n = T7l - T7m;
732
                              T7s = T7o + T7r;
733
                              T7t = T7n - T7s;
734
                              T8W = T7n + T7s;
735
                         }
736
                         {
737
                              E T7z, T7E, T7X, T7Y;
738
                              T7z = T7x - T7y;
739
                              T7E = T7A - T7D;
740
                              T7F = FMA(KP414213562, T7E, T7z);
741
                              T7T = FNMS(KP414213562, T7z, T7E);
742
                              T7X = T7m + T7l;
743
                              T7Y = T7o - T7r;
744
                              T7Z = T7X + T7Y;
745
                              T92 = T7Y - T7X;
746
                         }
747
                         {
748
                              E T81, T82, T7K, T7P;
749
                              T81 = T7A + T7D;
750
                              T82 = T7x + T7y;
751
                              T83 = FMA(KP414213562, T82, T81);
752
                              T89 = FNMS(KP414213562, T81, T82);
753
                              T7K = T7I - T7J;
754
                              T7P = T7L - T7O;
755
                              T7Q = FNMS(KP414213562, T7P, T7K);
756
                              T7U = FMA(KP414213562, T7K, T7P);
757
                         }
758
                         {
759
                              E T7u, T7R, T93, T94;
760
                              T7u = FMA(KP707106781, T7t, T7i);
761
                              T7R = T7F - T7Q;
762
                              ri[WS(rs, 22)] = FNMS(KP923879532, T7R, T7u);
763
                              ri[WS(rs, 6)] = FMA(KP923879532, T7R, T7u);
764
                              T93 = FMA(KP707106781, T92, T91);
765
                              T94 = T7U - T7T;
766
                              ii[WS(rs, 6)] = FMA(KP923879532, T94, T93);
767
                              ii[WS(rs, 22)] = FNMS(KP923879532, T94, T93);
768
                         }
769
                         {
770
                              E T7S, T7V, T95, T96;
771
                              T7S = FNMS(KP707106781, T7t, T7i);
772
                              T7V = T7T + T7U;
773
                              ri[WS(rs, 14)] = FNMS(KP923879532, T7V, T7S);
774
                              ri[WS(rs, 30)] = FMA(KP923879532, T7V, T7S);
775
                              T95 = FNMS(KP707106781, T92, T91);
776
                              T96 = T7F + T7Q;
777
                              ii[WS(rs, 14)] = FNMS(KP923879532, T96, T95);
778
                              ii[WS(rs, 30)] = FMA(KP923879532, T96, T95);
779
                         }
780
                         {
781
                              E T80, T87, T8X, T8Y;
782
                              T80 = FMA(KP707106781, T7Z, T7W);
783
                              T87 = T83 + T86;
784
                              ri[WS(rs, 18)] = FNMS(KP923879532, T87, T80);
785
                              ri[WS(rs, 2)] = FMA(KP923879532, T87, T80);
786
                              T8X = FMA(KP707106781, T8W, T8V);
787
                              T8Y = T89 + T8a;
788
                              ii[WS(rs, 2)] = FMA(KP923879532, T8Y, T8X);
789
                              ii[WS(rs, 18)] = FNMS(KP923879532, T8Y, T8X);
790
                         }
791
                         {
792
                              E T88, T8b, T8Z, T90;
793
                              T88 = FNMS(KP707106781, T7Z, T7W);
794
                              T8b = T89 - T8a;
795
                              ri[WS(rs, 26)] = FNMS(KP923879532, T8b, T88);
796
                              ri[WS(rs, 10)] = FMA(KP923879532, T8b, T88);
797
                              T8Z = FNMS(KP707106781, T8W, T8V);
798
                              T90 = T86 - T83;
799
                              ii[WS(rs, 10)] = FMA(KP923879532, T90, T8Z);
800
                              ii[WS(rs, 26)] = FNMS(KP923879532, T90, T8Z);
801
                         }
802
                    }
803
                    {
804
                         E T4s, T6c, T4X, T9c, T9b, T9h, T6f, T9i, T66, T6q, T6a, T6m, T5x, T6p, T69;
805
                         E T6j;
806
                         {
807
                              E T4c, T4r, T6d, T6e;
808
                              T4c = T46 + T4b;
809
                              T4r = T4j + T4q;
810
                              T4s = FNMS(KP707106781, T4r, T4c);
811
                              T6c = FMA(KP707106781, T4r, T4c);
812
                              {
813
                                   E T4H, T4W, T99, T9a;
814
                                   T4H = FNMS(KP414213562, T4G, T4z);
815
                                   T4W = FMA(KP414213562, T4V, T4O);
816
                                   T4X = T4H - T4W;
817
                                   T9c = T4H + T4W;
818
                                   T99 = T97 - T98;
819
                                   T9a = T6t + T6u;
820
                                   T9b = FMA(KP707106781, T9a, T99);
821
                                   T9h = FNMS(KP707106781, T9a, T99);
822
                              }
823
                              T6d = FMA(KP414213562, T4z, T4G);
824
                              T6e = FNMS(KP414213562, T4O, T4V);
825
                              T6f = T6d + T6e;
826
                              T9i = T6e - T6d;
827
                              {
828
                                   E T5U, T6l, T65, T6k, T5T, T64;
829
                                   T5T = T5L + T5S;
830
                                   T5U = FNMS(KP707106781, T5T, T5E);
831
                                   T6l = FMA(KP707106781, T5T, T5E);
832
                                   T64 = T62 + T63;
833
                                   T65 = FNMS(KP707106781, T64, T61);
834
                                   T6k = FMA(KP707106781, T64, T61);
835
                                   T66 = FNMS(KP668178637, T65, T5U);
836
                                   T6q = FMA(KP198912367, T6k, T6l);
837
                                   T6a = FMA(KP668178637, T5U, T65);
838
                                   T6m = FNMS(KP198912367, T6l, T6k);
839
                              }
840
                              {
841
                                   E T5l, T6i, T5w, T6h, T5k, T5v;
842
                                   T5k = T5c + T5j;
843
                                   T5l = FNMS(KP707106781, T5k, T55);
844
                                   T6i = FMA(KP707106781, T5k, T55);
845
                                   T5v = T5t + T5u;
846
                                   T5w = FNMS(KP707106781, T5v, T5s);
847
                                   T6h = FMA(KP707106781, T5v, T5s);
848
                                   T5x = FMA(KP668178637, T5w, T5l);
849
                                   T6p = FNMS(KP198912367, T6h, T6i);
850
                                   T69 = FNMS(KP668178637, T5l, T5w);
851
                                   T6j = FMA(KP198912367, T6i, T6h);
852
                              }
853
                         }
854
                         {
855
                              E T4Y, T67, T9j, T9k;
856
                              T4Y = FMA(KP923879532, T4X, T4s);
857
                              T67 = T5x - T66;
858
                              ri[WS(rs, 21)] = FNMS(KP831469612, T67, T4Y);
859
                              ri[WS(rs, 5)] = FMA(KP831469612, T67, T4Y);
860
                              T9j = FMA(KP923879532, T9i, T9h);
861
                              T9k = T6a - T69;
862
                              ii[WS(rs, 5)] = FMA(KP831469612, T9k, T9j);
863
                              ii[WS(rs, 21)] = FNMS(KP831469612, T9k, T9j);
864
                         }
865
                         {
866
                              E T68, T6b, T9l, T9m;
867
                              T68 = FNMS(KP923879532, T4X, T4s);
868
                              T6b = T69 + T6a;
869
                              ri[WS(rs, 13)] = FNMS(KP831469612, T6b, T68);
870
                              ri[WS(rs, 29)] = FMA(KP831469612, T6b, T68);
871
                              T9l = FNMS(KP923879532, T9i, T9h);
872
                              T9m = T5x + T66;
873
                              ii[WS(rs, 13)] = FNMS(KP831469612, T9m, T9l);
874
                              ii[WS(rs, 29)] = FMA(KP831469612, T9m, T9l);
875
                         }
876
                         {
877
                              E T6g, T6n, T9d, T9e;
878
                              T6g = FMA(KP923879532, T6f, T6c);
879
                              T6n = T6j + T6m;
880
                              ri[WS(rs, 17)] = FNMS(KP980785280, T6n, T6g);
881
                              ri[WS(rs, 1)] = FMA(KP980785280, T6n, T6g);
882
                              T9d = FMA(KP923879532, T9c, T9b);
883
                              T9e = T6p + T6q;
884
                              ii[WS(rs, 1)] = FMA(KP980785280, T9e, T9d);
885
                              ii[WS(rs, 17)] = FNMS(KP980785280, T9e, T9d);
886
                         }
887
                         {
888
                              E T6o, T6r, T9f, T9g;
889
                              T6o = FNMS(KP923879532, T6f, T6c);
890
                              T6r = T6p - T6q;
891
                              ri[WS(rs, 25)] = FNMS(KP980785280, T6r, T6o);
892
                              ri[WS(rs, 9)] = FMA(KP980785280, T6r, T6o);
893
                              T9f = FNMS(KP923879532, T9c, T9b);
894
                              T9g = T6m - T6j;
895
                              ii[WS(rs, 9)] = FMA(KP980785280, T9g, T9f);
896
                              ii[WS(rs, 25)] = FNMS(KP980785280, T9g, T9f);
897
                         }
898
                    }
899
                    {
900
                         E T6w, T6Y, T6D, T9w, T9p, T9v, T71, T9q, T6S, T7c, T6W, T78, T6L, T7b, T6V;
901
                         E T75;
902
                         {
903
                              E T6s, T6v, T6Z, T70;
904
                              T6s = T46 - T4b;
905
                              T6v = T6t - T6u;
906
                              T6w = FMA(KP707106781, T6v, T6s);
907
                              T6Y = FNMS(KP707106781, T6v, T6s);
908
                              {
909
                                   E T6z, T6C, T9n, T9o;
910
                                   T6z = FMA(KP414213562, T6y, T6x);
911
                                   T6C = FNMS(KP414213562, T6B, T6A);
912
                                   T6D = T6z - T6C;
913
                                   T9w = T6z + T6C;
914
                                   T9n = T98 + T97;
915
                                   T9o = T4q - T4j;
916
                                   T9p = FMA(KP707106781, T9o, T9n);
917
                                   T9v = FNMS(KP707106781, T9o, T9n);
918
                              }
919
                              T6Z = FNMS(KP414213562, T6x, T6y);
920
                              T70 = FMA(KP414213562, T6A, T6B);
921
                              T71 = T6Z + T70;
922
                              T9q = T70 - T6Z;
923
                              {
924
                                   E T6O, T77, T6R, T76, T6N, T6Q;
925
                                   T6N = T5S - T5L;
926
                                   T6O = FNMS(KP707106781, T6N, T6M);
927
                                   T77 = FMA(KP707106781, T6N, T6M);
928
                                   T6Q = T62 - T63;
929
                                   T6R = FNMS(KP707106781, T6Q, T6P);
930
                                   T76 = FMA(KP707106781, T6Q, T6P);
931
                                   T6S = FNMS(KP668178637, T6R, T6O);
932
                                   T7c = FMA(KP198912367, T76, T77);
933
                                   T6W = FMA(KP668178637, T6O, T6R);
934
                                   T78 = FNMS(KP198912367, T77, T76);
935
                              }
936
                              {
937
                                   E T6H, T74, T6K, T73, T6G, T6J;
938
                                   T6G = T5j - T5c;
939
                                   T6H = FNMS(KP707106781, T6G, T6F);
940
                                   T74 = FMA(KP707106781, T6G, T6F);
941
                                   T6J = T5t - T5u;
942
                                   T6K = FNMS(KP707106781, T6J, T6I);
943
                                   T73 = FMA(KP707106781, T6J, T6I);
944
                                   T6L = FMA(KP668178637, T6K, T6H);
945
                                   T7b = FNMS(KP198912367, T73, T74);
946
                                   T6V = FNMS(KP668178637, T6H, T6K);
947
                                   T75 = FMA(KP198912367, T74, T73);
948
                              }
949
                         }
950
                         {
951
                              E T6E, T6T, T9r, T9s;
952
                              T6E = FMA(KP923879532, T6D, T6w);
953
                              T6T = T6L + T6S;
954
                              ri[WS(rs, 19)] = FNMS(KP831469612, T6T, T6E);
955
                              ri[WS(rs, 3)] = FMA(KP831469612, T6T, T6E);
956
                              T9r = FMA(KP923879532, T9q, T9p);
957
                              T9s = T6V + T6W;
958
                              ii[WS(rs, 3)] = FMA(KP831469612, T9s, T9r);
959
                              ii[WS(rs, 19)] = FNMS(KP831469612, T9s, T9r);
960
                         }
961
                         {
962
                              E T6U, T6X, T9t, T9u;
963
                              T6U = FNMS(KP923879532, T6D, T6w);
964
                              T6X = T6V - T6W;
965
                              ri[WS(rs, 27)] = FNMS(KP831469612, T6X, T6U);
966
                              ri[WS(rs, 11)] = FMA(KP831469612, T6X, T6U);
967
                              T9t = FNMS(KP923879532, T9q, T9p);
968
                              T9u = T6S - T6L;
969
                              ii[WS(rs, 11)] = FMA(KP831469612, T9u, T9t);
970
                              ii[WS(rs, 27)] = FNMS(KP831469612, T9u, T9t);
971
                         }
972
                         {
973
                              E T72, T79, T9x, T9y;
974
                              T72 = FNMS(KP923879532, T71, T6Y);
975
                              T79 = T75 - T78;
976
                              ri[WS(rs, 23)] = FNMS(KP980785280, T79, T72);
977
                              ri[WS(rs, 7)] = FMA(KP980785280, T79, T72);
978
                              T9x = FNMS(KP923879532, T9w, T9v);
979
                              T9y = T7c - T7b;
980
                              ii[WS(rs, 7)] = FMA(KP980785280, T9y, T9x);
981
                              ii[WS(rs, 23)] = FNMS(KP980785280, T9y, T9x);
982
                         }
983
                         {
984
                              E T7a, T7d, T9z, T9A;
985
                              T7a = FMA(KP923879532, T71, T6Y);
986
                              T7d = T7b + T7c;
987
                              ri[WS(rs, 15)] = FNMS(KP980785280, T7d, T7a);
988
                              ri[WS(rs, 31)] = FMA(KP980785280, T7d, T7a);
989
                              T9z = FMA(KP923879532, T9w, T9v);
990
                              T9A = T75 + T78;
991
                              ii[WS(rs, 15)] = FNMS(KP980785280, T9A, T9z);
992
                              ii[WS(rs, 31)] = FMA(KP980785280, T9A, T9z);
993
                         }
994
                    }
995
               }
996
          }
997
     }
998
}
999

    
1000
static const tw_instr twinstr[] = {
1001
     {TW_CEXP, 0, 1},
1002
     {TW_CEXP, 0, 3},
1003
     {TW_CEXP, 0, 9},
1004
     {TW_CEXP, 0, 27},
1005
     {TW_NEXT, 1, 0}
1006
};
1007

    
1008
static const ct_desc desc = { 32, "t2_32", twinstr, &GENUS, {236, 98, 252, 0}, 0, 0, 0 };
1009

    
1010
void X(codelet_t2_32) (planner *p) {
1011
     X(kdft_dit_register) (p, t2_32, &desc);
1012
}
1013
#else
1014

    
1015
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -name t2_32 -include dft/scalar/t.h */
1016

    
1017
/*
1018
 * This function contains 488 FP additions, 280 FP multiplications,
1019
 * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
1020
 * 158 stack variables, 7 constants, and 128 memory accesses
1021
 */
1022
#include "dft/scalar/t.h"
1023

    
1024
static void t2_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
1025
{
1026
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
1027
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
1028
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
1029
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
1030
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
1031
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
1032
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
1033
     {
1034
          INT m;
1035
          for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(64, rs)) {
1036
               E T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y;
1037
               E T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d;
1038
               E Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C;
1039
               E T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25;
1040
               E T1S, T23;
1041
               {
1042
                    E Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF;
1043
                    E T10;
1044
                    {
1045
                         E T4, Tc, T7, Tb;
1046
                         T2 = W[0];
1047
                         T5 = W[1];
1048
                         T3 = W[2];
1049
                         T6 = W[3];
1050
                         T4 = T2 * T3;
1051
                         Tc = T5 * T3;
1052
                         T7 = T5 * T6;
1053
                         Tb = T2 * T6;
1054
                         T8 = T4 + T7;
1055
                         TM = T4 - T7;
1056
                         TO = Tb + Tc;
1057
                         Td = Tb - Tc;
1058
                         T9 = W[4];
1059
                         Ts = T2 * T9;
1060
                         T1d = T6 * T9;
1061
                         Tx = T5 * T9;
1062
                         T18 = T3 * T9;
1063
                         Te = W[5];
1064
                         Tt = T5 * Te;
1065
                         T1c = T3 * Te;
1066
                         Tw = T2 * Te;
1067
                         T19 = T6 * Te;
1068
                         Th = W[6];
1069
                         TB = T3 * Th;
1070
                         T14 = T5 * Th;
1071
                         TG = T6 * Th;
1072
                         TZ = T2 * Th;
1073
                         Tl = W[7];
1074
                         TC = T6 * Tl;
1075
                         T13 = T2 * Tl;
1076
                         TF = T3 * Tl;
1077
                         T10 = T5 * Tl;
1078
                    }
1079
                    TD = TB + TC;
1080
                    TH = TF - TG;
1081
                    T1y = TZ + T10;
1082
                    T1H = TF + TG;
1083
                    T15 = T13 + T14;
1084
                    T1A = T13 - T14;
1085
                    T11 = TZ - T10;
1086
                    T1F = TB - TC;
1087
                    T1n = FMA(T9, Th, Te * Tl);
1088
                    T1p = FNMS(Te, Th, T9 * Tl);
1089
                    {
1090
                         E T2o, T2p, T2s, T2t;
1091
                         T2o = T8 * Th;
1092
                         T2p = Td * Tl;
1093
                         T2q = T2o + T2p;
1094
                         T2I = T2o - T2p;
1095
                         T2s = T8 * Tl;
1096
                         T2t = Td * Th;
1097
                         T2u = T2s - T2t;
1098
                         T2K = T2s + T2t;
1099
                    }
1100
                    {
1101
                         E T2T, T2U, T2X, T2Y;
1102
                         T2T = TM * Th;
1103
                         T2U = TO * Tl;
1104
                         T2V = T2T - T2U;
1105
                         T3b = T2T + T2U;
1106
                         T2X = TM * Tl;
1107
                         T2Y = TO * Th;
1108
                         T2Z = T2X + T2Y;
1109
                         T3d = T2X - T2Y;
1110
                         Tu = Ts + Tt;
1111
                         Ty = Tw - Tx;
1112
                         T3l = FMA(Tu, Th, Ty * Tl);
1113
                         T3n = FNMS(Ty, Th, Tu * Tl);
1114
                    }
1115
                    T1t = Ts - Tt;
1116
                    T1v = Tw + Tx;
1117
                    T2f = FMA(T1t, Th, T1v * Tl);
1118
                    T2h = FNMS(T1v, Th, T1t * Tl);
1119
                    T1a = T18 - T19;
1120
                    T1e = T1c + T1d;
1121
                    T32 = FMA(T1a, Th, T1e * Tl);
1122
                    T34 = FNMS(T1e, Th, T1a * Tl);
1123
                    T1W = T18 + T19;
1124
                    T1Y = T1c - T1d;
1125
                    T2C = FMA(T1W, Th, T1Y * Tl);
1126
                    T2E = FNMS(T1Y, Th, T1W * Tl);
1127
                    {
1128
                         E Ta, Tf, Ti, Tj;
1129
                         Ta = T8 * T9;
1130
                         Tf = Td * Te;
1131
                         Tg = Ta - Tf;
1132
                         TR = Ta + Tf;
1133
                         Ti = T8 * Te;
1134
                         Tj = Td * T9;
1135
                         Tk = Ti + Tj;
1136
                         TS = Ti - Tj;
1137
                    }
1138
                    Tm = FMA(Tg, Th, Tk * Tl);
1139
                    TV = FNMS(TS, Th, TR * Tl);
1140
                    To = FNMS(Tk, Th, Tg * Tl);
1141
                    TT = FMA(TR, Th, TS * Tl);
1142
                    {
1143
                         E T1K, T1L, T1N, T1O;
1144
                         T1K = TM * T9;
1145
                         T1L = TO * Te;
1146
                         T1M = T1K - T1L;
1147
                         T21 = T1K + T1L;
1148
                         T1N = TM * Te;
1149
                         T1O = TO * T9;
1150
                         T1P = T1N + T1O;
1151
                         T22 = T1N - T1O;
1152
                    }
1153
                    T1Q = FMA(T1M, Th, T1P * Tl);
1154
                    T25 = FNMS(T22, Th, T21 * Tl);
1155
                    T1S = FNMS(T1P, Th, T1M * Tl);
1156
                    T23 = FMA(T21, Th, T22 * Tl);
1157
               }
1158
               {
1159
                    E TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5J, T4B;
1160
                    E T5G, T3h, T6H, T6O, T7o, T4L, T5N, T52, T5Q, T1i, T7V, T6i, T7D, T3K, T5u;
1161
                    E T3P, T5v, T1E, T6n, T6m, T7e, T3W, T5y, T41, T5z, T29, T6p, T6s, T7f, T47;
1162
                    E T5B, T4c, T5C, T2R, T6z, T6E, T7k, T4v, T5H, T4E, T5K, T3y, T6P, T6K, T7p;
1163
                    E T4W, T5R, T55, T5O;
1164
                    {
1165
                         E T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp;
1166
                         T1 = ri[0];
1167
                         T7G = ii[0];
1168
                         Tn = ri[WS(rs, 16)];
1169
                         Tp = ii[WS(rs, 16)];
1170
                         Tq = FMA(Tm, Tn, To * Tp);
1171
                         T7F = FNMS(To, Tn, Tm * Tp);
1172
                         {
1173
                              E Tv, Tz, TE, TI;
1174
                              Tv = ri[WS(rs, 8)];
1175
                              Tz = ii[WS(rs, 8)];
1176
                              TA = FMA(Tu, Tv, Ty * Tz);
1177
                              T3C = FNMS(Ty, Tv, Tu * Tz);
1178
                              TE = ri[WS(rs, 24)];
1179
                              TI = ii[WS(rs, 24)];
1180
                              TJ = FMA(TD, TE, TH * TI);
1181
                              T3D = FNMS(TH, TE, TD * TI);
1182
                         }
1183
                         {
1184
                              E Tr, TK, T8a, T8b;
1185
                              Tr = T1 + Tq;
1186
                              TK = TA + TJ;
1187
                              TL = Tr + TK;
1188
                              T6f = Tr - TK;
1189
                              T8a = T7G - T7F;
1190
                              T8b = TA - TJ;
1191
                              T8c = T8a - T8b;
1192
                              T8q = T8b + T8a;
1193
                         }
1194
                         {
1195
                              E T3B, T3E, T7E, T7H;
1196
                              T3B = T1 - Tq;
1197
                              T3E = T3C - T3D;
1198
                              T3F = T3B - T3E;
1199
                              T5t = T3B + T3E;
1200
                              T7E = T3C + T3D;
1201
                              T7H = T7F + T7G;
1202
                              T7I = T7E + T7H;
1203
                              T7W = T7H - T7E;
1204
                         }
1205
                    }
1206
                    {
1207
                         E T2e, T4g, T2w, T4z, T2j, T4h, T2n, T4y;
1208
                         {
1209
                              E T2c, T2d, T2r, T2v;
1210
                              T2c = ri[WS(rs, 1)];
1211
                              T2d = ii[WS(rs, 1)];
1212
                              T2e = FMA(T2, T2c, T5 * T2d);
1213
                              T4g = FNMS(T5, T2c, T2 * T2d);
1214
                              T2r = ri[WS(rs, 25)];
1215
                              T2v = ii[WS(rs, 25)];
1216
                              T2w = FMA(T2q, T2r, T2u * T2v);
1217
                              T4z = FNMS(T2u, T2r, T2q * T2v);
1218
                         }
1219
                         {
1220
                              E T2g, T2i, T2l, T2m;
1221
                              T2g = ri[WS(rs, 17)];
1222
                              T2i = ii[WS(rs, 17)];
1223
                              T2j = FMA(T2f, T2g, T2h * T2i);
1224
                              T4h = FNMS(T2h, T2g, T2f * T2i);
1225
                              T2l = ri[WS(rs, 9)];
1226
                              T2m = ii[WS(rs, 9)];
1227
                              T2n = FMA(T9, T2l, Te * T2m);
1228
                              T4y = FNMS(Te, T2l, T9 * T2m);
1229
                         }
1230
                         {
1231
                              E T2k, T2x, T6w, T6x;
1232
                              T2k = T2e + T2j;
1233
                              T2x = T2n + T2w;
1234
                              T2y = T2k + T2x;
1235
                              T6B = T2k - T2x;
1236
                              T6w = T4g + T4h;
1237
                              T6x = T4y + T4z;
1238
                              T6y = T6w - T6x;
1239
                              T7j = T6w + T6x;
1240
                         }
1241
                         {
1242
                              E T4i, T4j, T4x, T4A;
1243
                              T4i = T4g - T4h;
1244
                              T4j = T2n - T2w;
1245
                              T4k = T4i + T4j;
1246
                              T5J = T4i - T4j;
1247
                              T4x = T2e - T2j;
1248
                              T4A = T4y - T4z;
1249
                              T4B = T4x - T4A;
1250
                              T5G = T4x + T4A;
1251
                         }
1252
                    }
1253
                    {
1254
                         E T31, T4Y, T3f, T4J, T36, T4Z, T3a, T4I;
1255
                         {
1256
                              E T2W, T30, T3c, T3e;
1257
                              T2W = ri[WS(rs, 31)];
1258
                              T30 = ii[WS(rs, 31)];
1259
                              T31 = FMA(T2V, T2W, T2Z * T30);
1260
                              T4Y = FNMS(T2Z, T2W, T2V * T30);
1261
                              T3c = ri[WS(rs, 23)];
1262
                              T3e = ii[WS(rs, 23)];
1263
                              T3f = FMA(T3b, T3c, T3d * T3e);
1264
                              T4J = FNMS(T3d, T3c, T3b * T3e);
1265
                         }
1266
                         {
1267
                              E T33, T35, T38, T39;
1268
                              T33 = ri[WS(rs, 15)];
1269
                              T35 = ii[WS(rs, 15)];
1270
                              T36 = FMA(T32, T33, T34 * T35);
1271
                              T4Z = FNMS(T34, T33, T32 * T35);
1272
                              T38 = ri[WS(rs, 7)];
1273
                              T39 = ii[WS(rs, 7)];
1274
                              T3a = FMA(TR, T38, TS * T39);
1275
                              T4I = FNMS(TS, T38, TR * T39);
1276
                         }
1277
                         {
1278
                              E T37, T3g, T6M, T6N;
1279
                              T37 = T31 + T36;
1280
                              T3g = T3a + T3f;
1281
                              T3h = T37 + T3g;
1282
                              T6H = T37 - T3g;
1283
                              T6M = T4Y + T4Z;
1284
                              T6N = T4I + T4J;
1285
                              T6O = T6M - T6N;
1286
                              T7o = T6M + T6N;
1287
                         }
1288
                         {
1289
                              E T4H, T4K, T50, T51;
1290
                              T4H = T31 - T36;
1291
                              T4K = T4I - T4J;
1292
                              T4L = T4H - T4K;
1293
                              T5N = T4H + T4K;
1294
                              T50 = T4Y - T4Z;
1295
                              T51 = T3a - T3f;
1296
                              T52 = T50 + T51;
1297
                              T5Q = T50 - T51;
1298
                         }
1299
                    }
1300
                    {
1301
                         E TQ, T3G, T1g, T3N, TX, T3H, T17, T3M;
1302
                         {
1303
                              E TN, TP, T1b, T1f;
1304
                              TN = ri[WS(rs, 4)];
1305
                              TP = ii[WS(rs, 4)];
1306
                              TQ = FMA(TM, TN, TO * TP);
1307
                              T3G = FNMS(TO, TN, TM * TP);
1308
                              T1b = ri[WS(rs, 12)];
1309
                              T1f = ii[WS(rs, 12)];
1310
                              T1g = FMA(T1a, T1b, T1e * T1f);
1311
                              T3N = FNMS(T1e, T1b, T1a * T1f);
1312
                         }
1313
                         {
1314
                              E TU, TW, T12, T16;
1315
                              TU = ri[WS(rs, 20)];
1316
                              TW = ii[WS(rs, 20)];
1317
                              TX = FMA(TT, TU, TV * TW);
1318
                              T3H = FNMS(TV, TU, TT * TW);
1319
                              T12 = ri[WS(rs, 28)];
1320
                              T16 = ii[WS(rs, 28)];
1321
                              T17 = FMA(T11, T12, T15 * T16);
1322
                              T3M = FNMS(T15, T12, T11 * T16);
1323
                         }
1324
                         {
1325
                              E TY, T1h, T6g, T6h;
1326
                              TY = TQ + TX;
1327
                              T1h = T17 + T1g;
1328
                              T1i = TY + T1h;
1329
                              T7V = T1h - TY;
1330
                              T6g = T3G + T3H;
1331
                              T6h = T3M + T3N;
1332
                              T6i = T6g - T6h;
1333
                              T7D = T6g + T6h;
1334
                         }
1335
                         {
1336
                              E T3I, T3J, T3L, T3O;
1337
                              T3I = T3G - T3H;
1338
                              T3J = TQ - TX;
1339
                              T3K = T3I - T3J;
1340
                              T5u = T3J + T3I;
1341
                              T3L = T17 - T1g;
1342
                              T3O = T3M - T3N;
1343
                              T3P = T3L + T3O;
1344
                              T5v = T3L - T3O;
1345
                         }
1346
                    }
1347
                    {
1348
                         E T1m, T3S, T1C, T3Z, T1r, T3T, T1x, T3Y;
1349
                         {
1350
                              E T1k, T1l, T1z, T1B;
1351
                              T1k = ri[WS(rs, 2)];
1352
                              T1l = ii[WS(rs, 2)];
1353
                              T1m = FMA(T8, T1k, Td * T1l);
1354
                              T3S = FNMS(Td, T1k, T8 * T1l);
1355
                              T1z = ri[WS(rs, 26)];
1356
                              T1B = ii[WS(rs, 26)];
1357
                              T1C = FMA(T1y, T1z, T1A * T1B);
1358
                              T3Z = FNMS(T1A, T1z, T1y * T1B);
1359
                         }
1360
                         {
1361
                              E T1o, T1q, T1u, T1w;
1362
                              T1o = ri[WS(rs, 18)];
1363
                              T1q = ii[WS(rs, 18)];
1364
                              T1r = FMA(T1n, T1o, T1p * T1q);
1365
                              T3T = FNMS(T1p, T1o, T1n * T1q);
1366
                              T1u = ri[WS(rs, 10)];
1367
                              T1w = ii[WS(rs, 10)];
1368
                              T1x = FMA(T1t, T1u, T1v * T1w);
1369
                              T3Y = FNMS(T1v, T1u, T1t * T1w);
1370
                         }
1371
                         {
1372
                              E T1s, T1D, T6k, T6l;
1373
                              T1s = T1m + T1r;
1374
                              T1D = T1x + T1C;
1375
                              T1E = T1s + T1D;
1376
                              T6n = T1s - T1D;
1377
                              T6k = T3S + T3T;
1378
                              T6l = T3Y + T3Z;
1379
                              T6m = T6k - T6l;
1380
                              T7e = T6k + T6l;
1381
                         }
1382
                         {
1383
                              E T3U, T3V, T3X, T40;
1384
                              T3U = T3S - T3T;
1385
                              T3V = T1x - T1C;
1386
                              T3W = T3U + T3V;
1387
                              T5y = T3U - T3V;
1388
                              T3X = T1m - T1r;
1389
                              T40 = T3Y - T3Z;
1390
                              T41 = T3X - T40;
1391
                              T5z = T3X + T40;
1392
                         }
1393
                    }
1394
                    {
1395
                         E T1J, T43, T27, T4a, T1U, T44, T20, T49;
1396
                         {
1397
                              E T1G, T1I, T24, T26;
1398
                              T1G = ri[WS(rs, 30)];
1399
                              T1I = ii[WS(rs, 30)];
1400
                              T1J = FMA(T1F, T1G, T1H * T1I);
1401
                              T43 = FNMS(T1H, T1G, T1F * T1I);
1402
                              T24 = ri[WS(rs, 22)];
1403
                              T26 = ii[WS(rs, 22)];
1404
                              T27 = FMA(T23, T24, T25 * T26);
1405
                              T4a = FNMS(T25, T24, T23 * T26);
1406
                         }
1407
                         {
1408
                              E T1R, T1T, T1X, T1Z;
1409
                              T1R = ri[WS(rs, 14)];
1410
                              T1T = ii[WS(rs, 14)];
1411
                              T1U = FMA(T1Q, T1R, T1S * T1T);
1412
                              T44 = FNMS(T1S, T1R, T1Q * T1T);
1413
                              T1X = ri[WS(rs, 6)];
1414
                              T1Z = ii[WS(rs, 6)];
1415
                              T20 = FMA(T1W, T1X, T1Y * T1Z);
1416
                              T49 = FNMS(T1Y, T1X, T1W * T1Z);
1417
                         }
1418
                         {
1419
                              E T1V, T28, T6q, T6r;
1420
                              T1V = T1J + T1U;
1421
                              T28 = T20 + T27;
1422
                              T29 = T1V + T28;
1423
                              T6p = T1V - T28;
1424
                              T6q = T43 + T44;
1425
                              T6r = T49 + T4a;
1426
                              T6s = T6q - T6r;
1427
                              T7f = T6q + T6r;
1428
                         }
1429
                         {
1430
                              E T45, T46, T48, T4b;
1431
                              T45 = T43 - T44;
1432
                              T46 = T20 - T27;
1433
                              T47 = T45 + T46;
1434
                              T5B = T45 - T46;
1435
                              T48 = T1J - T1U;
1436
                              T4b = T49 - T4a;
1437
                              T4c = T48 - T4b;
1438
                              T5C = T48 + T4b;
1439
                         }
1440
                    }
1441
                    {
1442
                         E T2B, T4r, T2G, T4s, T4q, T4t, T2M, T4m, T2P, T4n, T4l, T4o;
1443
                         {
1444
                              E T2z, T2A, T2D, T2F;
1445
                              T2z = ri[WS(rs, 5)];
1446
                              T2A = ii[WS(rs, 5)];
1447
                              T2B = FMA(T21, T2z, T22 * T2A);
1448
                              T4r = FNMS(T22, T2z, T21 * T2A);
1449
                              T2D = ri[WS(rs, 21)];
1450
                              T2F = ii[WS(rs, 21)];
1451
                              T2G = FMA(T2C, T2D, T2E * T2F);
1452
                              T4s = FNMS(T2E, T2D, T2C * T2F);
1453
                         }
1454
                         T4q = T2B - T2G;
1455
                         T4t = T4r - T4s;
1456
                         {
1457
                              E T2J, T2L, T2N, T2O;
1458
                              T2J = ri[WS(rs, 29)];
1459
                              T2L = ii[WS(rs, 29)];
1460
                              T2M = FMA(T2I, T2J, T2K * T2L);
1461
                              T4m = FNMS(T2K, T2J, T2I * T2L);
1462
                              T2N = ri[WS(rs, 13)];
1463
                              T2O = ii[WS(rs, 13)];
1464
                              T2P = FMA(T1M, T2N, T1P * T2O);
1465
                              T4n = FNMS(T1P, T2N, T1M * T2O);
1466
                         }
1467
                         T4l = T2M - T2P;
1468
                         T4o = T4m - T4n;
1469
                         {
1470
                              E T2H, T2Q, T6C, T6D;
1471
                              T2H = T2B + T2G;
1472
                              T2Q = T2M + T2P;
1473
                              T2R = T2H + T2Q;
1474
                              T6z = T2Q - T2H;
1475
                              T6C = T4r + T4s;
1476
                              T6D = T4m + T4n;
1477
                              T6E = T6C - T6D;
1478
                              T7k = T6C + T6D;
1479
                         }
1480
                         {
1481
                              E T4p, T4u, T4C, T4D;
1482
                              T4p = T4l - T4o;
1483
                              T4u = T4q + T4t;
1484
                              T4v = KP707106781 * (T4p - T4u);
1485
                              T5H = KP707106781 * (T4u + T4p);
1486
                              T4C = T4t - T4q;
1487
                              T4D = T4l + T4o;
1488
                              T4E = KP707106781 * (T4C - T4D);
1489
                              T5K = KP707106781 * (T4C + T4D);
1490
                         }
1491
                    }
1492
                    {
1493
                         E T3k, T4M, T3p, T4N, T4O, T4P, T3t, T4S, T3w, T4T, T4R, T4U;
1494
                         {
1495
                              E T3i, T3j, T3m, T3o;
1496
                              T3i = ri[WS(rs, 3)];
1497
                              T3j = ii[WS(rs, 3)];
1498
                              T3k = FMA(T3, T3i, T6 * T3j);
1499
                              T4M = FNMS(T6, T3i, T3 * T3j);
1500
                              T3m = ri[WS(rs, 19)];
1501
                              T3o = ii[WS(rs, 19)];
1502
                              T3p = FMA(T3l, T3m, T3n * T3o);
1503
                              T4N = FNMS(T3n, T3m, T3l * T3o);
1504
                         }
1505
                         T4O = T4M - T4N;
1506
                         T4P = T3k - T3p;
1507
                         {
1508
                              E T3r, T3s, T3u, T3v;
1509
                              T3r = ri[WS(rs, 27)];
1510
                              T3s = ii[WS(rs, 27)];
1511
                              T3t = FMA(Th, T3r, Tl * T3s);
1512
                              T4S = FNMS(Tl, T3r, Th * T3s);
1513
                              T3u = ri[WS(rs, 11)];
1514
                              T3v = ii[WS(rs, 11)];
1515
                              T3w = FMA(Tg, T3u, Tk * T3v);
1516
                              T4T = FNMS(Tk, T3u, Tg * T3v);
1517
                         }
1518
                         T4R = T3t - T3w;
1519
                         T4U = T4S - T4T;
1520
                         {
1521
                              E T3q, T3x, T6I, T6J;
1522
                              T3q = T3k + T3p;
1523
                              T3x = T3t + T3w;
1524
                              T3y = T3q + T3x;
1525
                              T6P = T3x - T3q;
1526
                              T6I = T4M + T4N;
1527
                              T6J = T4S + T4T;
1528
                              T6K = T6I - T6J;
1529
                              T7p = T6I + T6J;
1530
                         }
1531
                         {
1532
                              E T4Q, T4V, T53, T54;
1533
                              T4Q = T4O - T4P;
1534
                              T4V = T4R + T4U;
1535
                              T4W = KP707106781 * (T4Q - T4V);
1536
                              T5R = KP707106781 * (T4Q + T4V);
1537
                              T53 = T4R - T4U;
1538
                              T54 = T4P + T4O;
1539
                              T55 = KP707106781 * (T53 - T54);
1540
                              T5O = KP707106781 * (T54 + T53);
1541
                         }
1542
                    }
1543
                    {
1544
                         E T2b, T7x, T7K, T7M, T3A, T7L, T7A, T7B;
1545
                         {
1546
                              E T1j, T2a, T7C, T7J;
1547
                              T1j = TL + T1i;
1548
                              T2a = T1E + T29;
1549
                              T2b = T1j + T2a;
1550
                              T7x = T1j - T2a;
1551
                              T7C = T7e + T7f;
1552
                              T7J = T7D + T7I;
1553
                              T7K = T7C + T7J;
1554
                              T7M = T7J - T7C;
1555
                         }
1556
                         {
1557
                              E T2S, T3z, T7y, T7z;
1558
                              T2S = T2y + T2R;
1559
                              T3z = T3h + T3y;
1560
                              T3A = T2S + T3z;
1561
                              T7L = T3z - T2S;
1562
                              T7y = T7j + T7k;
1563
                              T7z = T7o + T7p;
1564
                              T7A = T7y - T7z;
1565
                              T7B = T7y + T7z;
1566
                         }
1567
                         ri[WS(rs, 16)] = T2b - T3A;
1568
                         ii[WS(rs, 16)] = T7K - T7B;
1569
                         ri[0] = T2b + T3A;
1570
                         ii[0] = T7B + T7K;
1571
                         ri[WS(rs, 24)] = T7x - T7A;
1572
                         ii[WS(rs, 24)] = T7M - T7L;
1573
                         ri[WS(rs, 8)] = T7x + T7A;
1574
                         ii[WS(rs, 8)] = T7L + T7M;
1575
                    }
1576
                    {
1577
                         E T7h, T7t, T7Q, T7S, T7m, T7u, T7r, T7v;
1578
                         {
1579
                              E T7d, T7g, T7O, T7P;
1580
                              T7d = TL - T1i;
1581
                              T7g = T7e - T7f;
1582
                              T7h = T7d + T7g;
1583
                              T7t = T7d - T7g;
1584
                              T7O = T29 - T1E;
1585
                              T7P = T7I - T7D;
1586
                              T7Q = T7O + T7P;
1587
                              T7S = T7P - T7O;
1588
                         }
1589
                         {
1590
                              E T7i, T7l, T7n, T7q;
1591
                              T7i = T2y - T2R;
1592
                              T7l = T7j - T7k;
1593
                              T7m = T7i + T7l;
1594
                              T7u = T7l - T7i;
1595
                              T7n = T3h - T3y;
1596
                              T7q = T7o - T7p;
1597
                              T7r = T7n - T7q;
1598
                              T7v = T7n + T7q;
1599
                         }
1600
                         {
1601
                              E T7s, T7N, T7w, T7R;
1602
                              T7s = KP707106781 * (T7m + T7r);
1603
                              ri[WS(rs, 20)] = T7h - T7s;
1604
                              ri[WS(rs, 4)] = T7h + T7s;
1605
                              T7N = KP707106781 * (T7u + T7v);
1606
                              ii[WS(rs, 4)] = T7N + T7Q;
1607
                              ii[WS(rs, 20)] = T7Q - T7N;
1608
                              T7w = KP707106781 * (T7u - T7v);
1609
                              ri[WS(rs, 28)] = T7t - T7w;
1610
                              ri[WS(rs, 12)] = T7t + T7w;
1611
                              T7R = KP707106781 * (T7r - T7m);
1612
                              ii[WS(rs, 12)] = T7R + T7S;
1613
                              ii[WS(rs, 28)] = T7S - T7R;
1614
                         }
1615
                    }
1616
                    {
1617
                         E T6j, T7X, T83, T6X, T6u, T7U, T77, T7b, T70, T82, T6G, T6U, T74, T7a, T6R;
1618
                         E T6V;
1619
                         {
1620
                              E T6o, T6t, T6A, T6F;
1621
                              T6j = T6f - T6i;
1622
                              T7X = T7V + T7W;
1623
                              T83 = T7W - T7V;
1624
                              T6X = T6f + T6i;
1625
                              T6o = T6m - T6n;
1626
                              T6t = T6p + T6s;
1627
                              T6u = KP707106781 * (T6o - T6t);
1628
                              T7U = KP707106781 * (T6o + T6t);
1629
                              {
1630
                                   E T75, T76, T6Y, T6Z;
1631
                                   T75 = T6H + T6K;
1632
                                   T76 = T6O + T6P;
1633
                                   T77 = FNMS(KP382683432, T76, KP923879532 * T75);
1634
                                   T7b = FMA(KP923879532, T76, KP382683432 * T75);
1635
                                   T6Y = T6n + T6m;
1636
                                   T6Z = T6p - T6s;
1637
                                   T70 = KP707106781 * (T6Y + T6Z);
1638
                                   T82 = KP707106781 * (T6Z - T6Y);
1639
                              }
1640
                              T6A = T6y - T6z;
1641
                              T6F = T6B - T6E;
1642
                              T6G = FMA(KP923879532, T6A, KP382683432 * T6F);
1643
                              T6U = FNMS(KP923879532, T6F, KP382683432 * T6A);
1644
                              {
1645
                                   E T72, T73, T6L, T6Q;
1646
                                   T72 = T6y + T6z;
1647
                                   T73 = T6B + T6E;
1648
                                   T74 = FMA(KP382683432, T72, KP923879532 * T73);
1649
                                   T7a = FNMS(KP382683432, T73, KP923879532 * T72);
1650
                                   T6L = T6H - T6K;
1651
                                   T6Q = T6O - T6P;
1652
                                   T6R = FNMS(KP923879532, T6Q, KP382683432 * T6L);
1653
                                   T6V = FMA(KP382683432, T6Q, KP923879532 * T6L);
1654
                              }
1655
                         }
1656
                         {
1657
                              E T6v, T6S, T81, T84;
1658
                              T6v = T6j + T6u;
1659
                              T6S = T6G + T6R;
1660
                              ri[WS(rs, 22)] = T6v - T6S;
1661
                              ri[WS(rs, 6)] = T6v + T6S;
1662
                              T81 = T6U + T6V;
1663
                              T84 = T82 + T83;
1664
                              ii[WS(rs, 6)] = T81 + T84;
1665
                              ii[WS(rs, 22)] = T84 - T81;
1666
                         }
1667
                         {
1668
                              E T6T, T6W, T85, T86;
1669
                              T6T = T6j - T6u;
1670
                              T6W = T6U - T6V;
1671
                              ri[WS(rs, 30)] = T6T - T6W;
1672
                              ri[WS(rs, 14)] = T6T + T6W;
1673
                              T85 = T6R - T6G;
1674
                              T86 = T83 - T82;
1675
                              ii[WS(rs, 14)] = T85 + T86;
1676
                              ii[WS(rs, 30)] = T86 - T85;
1677
                         }
1678
                         {
1679
                              E T71, T78, T7T, T7Y;
1680
                              T71 = T6X + T70;
1681
                              T78 = T74 + T77;
1682
                              ri[WS(rs, 18)] = T71 - T78;
1683
                              ri[WS(rs, 2)] = T71 + T78;
1684
                              T7T = T7a + T7b;
1685
                              T7Y = T7U + T7X;
1686
                              ii[WS(rs, 2)] = T7T + T7Y;
1687
                              ii[WS(rs, 18)] = T7Y - T7T;
1688
                         }
1689
                         {
1690
                              E T79, T7c, T7Z, T80;
1691
                              T79 = T6X - T70;
1692
                              T7c = T7a - T7b;
1693
                              ri[WS(rs, 26)] = T79 - T7c;
1694
                              ri[WS(rs, 10)] = T79 + T7c;
1695
                              T7Z = T77 - T74;
1696
                              T80 = T7X - T7U;
1697
                              ii[WS(rs, 10)] = T7Z + T80;
1698
                              ii[WS(rs, 26)] = T80 - T7Z;
1699
                         }
1700
                    }
1701
                    {
1702
                         E T3R, T5d, T8r, T8x, T4e, T8o, T5n, T5r, T4G, T5a, T5g, T8w, T5k, T5q, T57;
1703
                         E T5b, T3Q, T8p;
1704
                         T3Q = KP707106781 * (T3K - T3P);
1705
                         T3R = T3F - T3Q;
1706
                         T5d = T3F + T3Q;
1707
                         T8p = KP707106781 * (T5v - T5u);
1708
                         T8r = T8p + T8q;
1709
                         T8x = T8q - T8p;
1710
                         {
1711
                              E T42, T4d, T5l, T5m;
1712
                              T42 = FNMS(KP923879532, T41, KP382683432 * T3W);
1713
                              T4d = FMA(KP382683432, T47, KP923879532 * T4c);
1714
                              T4e = T42 - T4d;
1715
                              T8o = T42 + T4d;
1716
                              T5l = T4L + T4W;
1717
                              T5m = T52 + T55;
1718
                              T5n = FNMS(KP555570233, T5m, KP831469612 * T5l);
1719
                              T5r = FMA(KP831469612, T5m, KP555570233 * T5l);
1720
                         }
1721
                         {
1722
                              E T4w, T4F, T5e, T5f;
1723
                              T4w = T4k - T4v;
1724
                              T4F = T4B - T4E;
1725
                              T4G = FMA(KP980785280, T4w, KP195090322 * T4F);
1726
                              T5a = FNMS(KP980785280, T4F, KP195090322 * T4w);
1727
                              T5e = FMA(KP923879532, T3W, KP382683432 * T41);
1728
                              T5f = FNMS(KP923879532, T47, KP382683432 * T4c);
1729
                              T5g = T5e + T5f;
1730
                              T8w = T5f - T5e;
1731
                         }
1732
                         {
1733
                              E T5i, T5j, T4X, T56;
1734
                              T5i = T4k + T4v;
1735
                              T5j = T4B + T4E;
1736
                              T5k = FMA(KP555570233, T5i, KP831469612 * T5j);
1737
                              T5q = FNMS(KP555570233, T5j, KP831469612 * T5i);
1738
                              T4X = T4L - T4W;
1739
                              T56 = T52 - T55;
1740
                              T57 = FNMS(KP980785280, T56, KP195090322 * T4X);
1741
                              T5b = FMA(KP195090322, T56, KP980785280 * T4X);
1742
                         }
1743
                         {
1744
                              E T4f, T58, T8v, T8y;
1745
                              T4f = T3R + T4e;
1746
                              T58 = T4G + T57;
1747
                              ri[WS(rs, 23)] = T4f - T58;
1748
                              ri[WS(rs, 7)] = T4f + T58;
1749
                              T8v = T5a + T5b;
1750
                              T8y = T8w + T8x;
1751
                              ii[WS(rs, 7)] = T8v + T8y;
1752
                              ii[WS(rs, 23)] = T8y - T8v;
1753
                         }
1754
                         {
1755
                              E T59, T5c, T8z, T8A;
1756
                              T59 = T3R - T4e;
1757
                              T5c = T5a - T5b;
1758
                              ri[WS(rs, 31)] = T59 - T5c;
1759
                              ri[WS(rs, 15)] = T59 + T5c;
1760
                              T8z = T57 - T4G;
1761
                              T8A = T8x - T8w;
1762
                              ii[WS(rs, 15)] = T8z + T8A;
1763
                              ii[WS(rs, 31)] = T8A - T8z;
1764
                         }
1765
                         {
1766
                              E T5h, T5o, T8n, T8s;
1767
                              T5h = T5d + T5g;
1768
                              T5o = T5k + T5n;
1769
                              ri[WS(rs, 19)] = T5h - T5o;
1770
                              ri[WS(rs, 3)] = T5h + T5o;
1771
                              T8n = T5q + T5r;
1772
                              T8s = T8o + T8r;
1773
                              ii[WS(rs, 3)] = T8n + T8s;
1774
                              ii[WS(rs, 19)] = T8s - T8n;
1775
                         }
1776
                         {
1777
                              E T5p, T5s, T8t, T8u;
1778
                              T5p = T5d - T5g;
1779
                              T5s = T5q - T5r;
1780
                              ri[WS(rs, 27)] = T5p - T5s;
1781
                              ri[WS(rs, 11)] = T5p + T5s;
1782
                              T8t = T5n - T5k;
1783
                              T8u = T8r - T8o;
1784
                              ii[WS(rs, 11)] = T8t + T8u;
1785
                              ii[WS(rs, 27)] = T8u - T8t;
1786
                         }
1787
                    }
1788
                    {
1789
                         E T5x, T5Z, T8d, T8j, T5E, T88, T69, T6d, T5M, T5W, T62, T8i, T66, T6c, T5T;
1790
                         E T5X, T5w, T89;
1791
                         T5w = KP707106781 * (T5u + T5v);
1792
                         T5x = T5t - T5w;
1793
                         T5Z = T5t + T5w;
1794
                         T89 = KP707106781 * (T3K + T3P);
1795
                         T8d = T89 + T8c;
1796
                         T8j = T8c - T89;
1797
                         {
1798
                              E T5A, T5D, T67, T68;
1799
                              T5A = FNMS(KP382683432, T5z, KP923879532 * T5y);
1800
                              T5D = FMA(KP923879532, T5B, KP382683432 * T5C);
1801
                              T5E = T5A - T5D;
1802
                              T88 = T5A + T5D;
1803
                              T67 = T5N + T5O;
1804
                              T68 = T5Q + T5R;
1805
                              T69 = FNMS(KP195090322, T68, KP980785280 * T67);
1806
                              T6d = FMA(KP195090322, T67, KP980785280 * T68);
1807
                         }
1808
                         {
1809
                              E T5I, T5L, T60, T61;
1810
                              T5I = T5G - T5H;
1811
                              T5L = T5J - T5K;
1812
                              T5M = FMA(KP555570233, T5I, KP831469612 * T5L);
1813
                              T5W = FNMS(KP831469612, T5I, KP555570233 * T5L);
1814
                              T60 = FMA(KP382683432, T5y, KP923879532 * T5z);
1815
                              T61 = FNMS(KP382683432, T5B, KP923879532 * T5C);
1816
                              T62 = T60 + T61;
1817
                              T8i = T61 - T60;
1818
                         }
1819
                         {
1820
                              E T64, T65, T5P, T5S;
1821
                              T64 = T5G + T5H;
1822
                              T65 = T5J + T5K;
1823
                              T66 = FMA(KP980785280, T64, KP195090322 * T65);
1824
                              T6c = FNMS(KP195090322, T64, KP980785280 * T65);
1825
                              T5P = T5N - T5O;
1826
                              T5S = T5Q - T5R;
1827
                              T5T = FNMS(KP831469612, T5S, KP555570233 * T5P);
1828
                              T5X = FMA(KP831469612, T5P, KP555570233 * T5S);
1829
                         }
1830
                         {
1831
                              E T5F, T5U, T8h, T8k;
1832
                              T5F = T5x + T5E;
1833
                              T5U = T5M + T5T;
1834
                              ri[WS(rs, 21)] = T5F - T5U;
1835
                              ri[WS(rs, 5)] = T5F + T5U;
1836
                              T8h = T5W + T5X;
1837
                              T8k = T8i + T8j;
1838
                              ii[WS(rs, 5)] = T8h + T8k;
1839
                              ii[WS(rs, 21)] = T8k - T8h;
1840
                         }
1841
                         {
1842
                              E T5V, T5Y, T8l, T8m;
1843
                              T5V = T5x - T5E;
1844
                              T5Y = T5W - T5X;
1845
                              ri[WS(rs, 29)] = T5V - T5Y;
1846
                              ri[WS(rs, 13)] = T5V + T5Y;
1847
                              T8l = T5T - T5M;
1848
                              T8m = T8j - T8i;
1849
                              ii[WS(rs, 13)] = T8l + T8m;
1850
                              ii[WS(rs, 29)] = T8m - T8l;
1851
                         }
1852
                         {
1853
                              E T63, T6a, T87, T8e;
1854
                              T63 = T5Z + T62;
1855
                              T6a = T66 + T69;
1856
                              ri[WS(rs, 17)] = T63 - T6a;
1857
                              ri[WS(rs, 1)] = T63 + T6a;
1858
                              T87 = T6c + T6d;
1859
                              T8e = T88 + T8d;
1860
                              ii[WS(rs, 1)] = T87 + T8e;
1861
                              ii[WS(rs, 17)] = T8e - T87;
1862
                         }
1863
                         {
1864
                              E T6b, T6e, T8f, T8g;
1865
                              T6b = T5Z - T62;
1866
                              T6e = T6c - T6d;
1867
                              ri[WS(rs, 25)] = T6b - T6e;
1868
                              ri[WS(rs, 9)] = T6b + T6e;
1869
                              T8f = T69 - T66;
1870
                              T8g = T8d - T88;
1871
                              ii[WS(rs, 9)] = T8f + T8g;
1872
                              ii[WS(rs, 25)] = T8g - T8f;
1873
                         }
1874
                    }
1875
               }
1876
          }
1877
     }
1878
}
1879

    
1880
static const tw_instr twinstr[] = {
1881
     {TW_CEXP, 0, 1},
1882
     {TW_CEXP, 0, 3},
1883
     {TW_CEXP, 0, 9},
1884
     {TW_CEXP, 0, 27},
1885
     {TW_NEXT, 1, 0}
1886
};
1887

    
1888
static const ct_desc desc = { 32, "t2_32", twinstr, &GENUS, {376, 168, 112, 0}, 0, 0, 0 };
1889

    
1890
void X(codelet_t2_32) (planner *p) {
1891
     X(kdft_dit_register) (p, t2_32, &desc);
1892
}
1893
#endif