To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_32.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (33.3 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:11 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 32 -name n1_32 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 372 FP additions, 136 FP multiplications,
32
 * (or, 236 additions, 0 multiplications, 136 fused multiply/add),
33
 * 100 stack variables, 7 constants, and 128 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
40
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
41
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
42
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
43
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
44
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
45
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
46
     {
47
          INT i;
48
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) {
49
               E T7, T4r, T4Z, T18, T1z, T3t, T3T, T2T, Te, T1f, T50, T4s, T2W, T3u, T1G;
50
               E T3U, Tm, T1n, T1O, T2Z, T3y, T3X, T4w, T53, Tt, T1u, T1V, T2Y, T3B, T3W;
51
               E T4z, T52, T2t, T3L, T3O, T2K, TR, TY, T5F, T5G, T5H, T5I, T4R, T5k, T2E;
52
               E T3M, T4W, T5j, T2N, T3P, T22, T3E, T3H, T2j, TC, TJ, T5A, T5B, T5C, T5D;
53
               E T4G, T5h, T2d, T3F, T4L, T5g, T2m, T3I;
54
               {
55
                    E T3, T1x, T14, T2R, T6, T2S, T17, T1y;
56
                    {
57
                         E T1, T2, T12, T13;
58
                         T1 = ri[0];
59
                         T2 = ri[WS(is, 16)];
60
                         T3 = T1 + T2;
61
                         T1x = T1 - T2;
62
                         T12 = ii[0];
63
                         T13 = ii[WS(is, 16)];
64
                         T14 = T12 + T13;
65
                         T2R = T12 - T13;
66
                    }
67
                    {
68
                         E T4, T5, T15, T16;
69
                         T4 = ri[WS(is, 8)];
70
                         T5 = ri[WS(is, 24)];
71
                         T6 = T4 + T5;
72
                         T2S = T4 - T5;
73
                         T15 = ii[WS(is, 8)];
74
                         T16 = ii[WS(is, 24)];
75
                         T17 = T15 + T16;
76
                         T1y = T15 - T16;
77
                    }
78
                    T7 = T3 + T6;
79
                    T4r = T3 - T6;
80
                    T4Z = T14 - T17;
81
                    T18 = T14 + T17;
82
                    T1z = T1x + T1y;
83
                    T3t = T1x - T1y;
84
                    T3T = T2S + T2R;
85
                    T2T = T2R - T2S;
86
               }
87
               {
88
                    E Ta, T1A, T1b, T1B, Td, T1D, T1e, T1E;
89
                    {
90
                         E T8, T9, T19, T1a;
91
                         T8 = ri[WS(is, 4)];
92
                         T9 = ri[WS(is, 20)];
93
                         Ta = T8 + T9;
94
                         T1A = T8 - T9;
95
                         T19 = ii[WS(is, 4)];
96
                         T1a = ii[WS(is, 20)];
97
                         T1b = T19 + T1a;
98
                         T1B = T19 - T1a;
99
                    }
100
                    {
101
                         E Tb, Tc, T1c, T1d;
102
                         Tb = ri[WS(is, 28)];
103
                         Tc = ri[WS(is, 12)];
104
                         Td = Tb + Tc;
105
                         T1D = Tb - Tc;
106
                         T1c = ii[WS(is, 28)];
107
                         T1d = ii[WS(is, 12)];
108
                         T1e = T1c + T1d;
109
                         T1E = T1c - T1d;
110
                    }
111
                    Te = Ta + Td;
112
                    T1f = T1b + T1e;
113
                    T50 = Td - Ta;
114
                    T4s = T1b - T1e;
115
                    {
116
                         E T2U, T2V, T1C, T1F;
117
                         T2U = T1B - T1A;
118
                         T2V = T1D + T1E;
119
                         T2W = T2U + T2V;
120
                         T3u = T2U - T2V;
121
                         T1C = T1A + T1B;
122
                         T1F = T1D - T1E;
123
                         T1G = T1C + T1F;
124
                         T3U = T1F - T1C;
125
                    }
126
               }
127
               {
128
                    E Ti, T1L, T1j, T1I, Tl, T1J, T1m, T1M, T1K, T1N;
129
                    {
130
                         E Tg, Th, T1h, T1i;
131
                         Tg = ri[WS(is, 2)];
132
                         Th = ri[WS(is, 18)];
133
                         Ti = Tg + Th;
134
                         T1L = Tg - Th;
135
                         T1h = ii[WS(is, 2)];
136
                         T1i = ii[WS(is, 18)];
137
                         T1j = T1h + T1i;
138
                         T1I = T1h - T1i;
139
                    }
140
                    {
141
                         E Tj, Tk, T1k, T1l;
142
                         Tj = ri[WS(is, 10)];
143
                         Tk = ri[WS(is, 26)];
144
                         Tl = Tj + Tk;
145
                         T1J = Tj - Tk;
146
                         T1k = ii[WS(is, 10)];
147
                         T1l = ii[WS(is, 26)];
148
                         T1m = T1k + T1l;
149
                         T1M = T1k - T1l;
150
                    }
151
                    Tm = Ti + Tl;
152
                    T1n = T1j + T1m;
153
                    T1K = T1I - T1J;
154
                    T1N = T1L + T1M;
155
                    T1O = FNMS(KP414213562, T1N, T1K);
156
                    T2Z = FMA(KP414213562, T1K, T1N);
157
                    {
158
                         E T3w, T3x, T4u, T4v;
159
                         T3w = T1J + T1I;
160
                         T3x = T1L - T1M;
161
                         T3y = FMA(KP414213562, T3x, T3w);
162
                         T3X = FNMS(KP414213562, T3w, T3x);
163
                         T4u = T1j - T1m;
164
                         T4v = Ti - Tl;
165
                         T4w = T4u - T4v;
166
                         T53 = T4v + T4u;
167
                    }
168
               }
169
               {
170
                    E Tp, T1S, T1q, T1P, Ts, T1Q, T1t, T1T, T1R, T1U;
171
                    {
172
                         E Tn, To, T1o, T1p;
173
                         Tn = ri[WS(is, 30)];
174
                         To = ri[WS(is, 14)];
175
                         Tp = Tn + To;
176
                         T1S = Tn - To;
177
                         T1o = ii[WS(is, 30)];
178
                         T1p = ii[WS(is, 14)];
179
                         T1q = T1o + T1p;
180
                         T1P = T1o - T1p;
181
                    }
182
                    {
183
                         E Tq, Tr, T1r, T1s;
184
                         Tq = ri[WS(is, 6)];
185
                         Tr = ri[WS(is, 22)];
186
                         Ts = Tq + Tr;
187
                         T1Q = Tq - Tr;
188
                         T1r = ii[WS(is, 6)];
189
                         T1s = ii[WS(is, 22)];
190
                         T1t = T1r + T1s;
191
                         T1T = T1r - T1s;
192
                    }
193
                    Tt = Tp + Ts;
194
                    T1u = T1q + T1t;
195
                    T1R = T1P - T1Q;
196
                    T1U = T1S + T1T;
197
                    T1V = FMA(KP414213562, T1U, T1R);
198
                    T2Y = FNMS(KP414213562, T1R, T1U);
199
                    {
200
                         E T3z, T3A, T4x, T4y;
201
                         T3z = T1Q + T1P;
202
                         T3A = T1S - T1T;
203
                         T3B = FNMS(KP414213562, T3A, T3z);
204
                         T3W = FMA(KP414213562, T3z, T3A);
205
                         T4x = Tp - Ts;
206
                         T4y = T1q - T1t;
207
                         T4z = T4x + T4y;
208
                         T52 = T4x - T4y;
209
                    }
210
               }
211
               {
212
                    E TN, T2G, T2r, T4N, TQ, T2s, T2J, T4O, TU, T2x, T2w, T4T, TX, T2z, T2C;
213
                    E T4U;
214
                    {
215
                         E TL, TM, T2p, T2q;
216
                         TL = ri[WS(is, 31)];
217
                         TM = ri[WS(is, 15)];
218
                         TN = TL + TM;
219
                         T2G = TL - TM;
220
                         T2p = ii[WS(is, 31)];
221
                         T2q = ii[WS(is, 15)];
222
                         T2r = T2p - T2q;
223
                         T4N = T2p + T2q;
224
                    }
225
                    {
226
                         E TO, TP, T2H, T2I;
227
                         TO = ri[WS(is, 7)];
228
                         TP = ri[WS(is, 23)];
229
                         TQ = TO + TP;
230
                         T2s = TO - TP;
231
                         T2H = ii[WS(is, 7)];
232
                         T2I = ii[WS(is, 23)];
233
                         T2J = T2H - T2I;
234
                         T4O = T2H + T2I;
235
                    }
236
                    {
237
                         E TS, TT, T2u, T2v;
238
                         TS = ri[WS(is, 3)];
239
                         TT = ri[WS(is, 19)];
240
                         TU = TS + TT;
241
                         T2x = TS - TT;
242
                         T2u = ii[WS(is, 3)];
243
                         T2v = ii[WS(is, 19)];
244
                         T2w = T2u - T2v;
245
                         T4T = T2u + T2v;
246
                    }
247
                    {
248
                         E TV, TW, T2A, T2B;
249
                         TV = ri[WS(is, 27)];
250
                         TW = ri[WS(is, 11)];
251
                         TX = TV + TW;
252
                         T2z = TV - TW;
253
                         T2A = ii[WS(is, 27)];
254
                         T2B = ii[WS(is, 11)];
255
                         T2C = T2A - T2B;
256
                         T4U = T2A + T2B;
257
                    }
258
                    T2t = T2r - T2s;
259
                    T3L = T2G - T2J;
260
                    T3O = T2s + T2r;
261
                    T2K = T2G + T2J;
262
                    TR = TN + TQ;
263
                    TY = TU + TX;
264
                    T5F = TR - TY;
265
                    {
266
                         E T4P, T4Q, T2y, T2D;
267
                         T5G = T4N + T4O;
268
                         T5H = T4T + T4U;
269
                         T5I = T5G - T5H;
270
                         T4P = T4N - T4O;
271
                         T4Q = TX - TU;
272
                         T4R = T4P - T4Q;
273
                         T5k = T4Q + T4P;
274
                         T2y = T2w - T2x;
275
                         T2D = T2z + T2C;
276
                         T2E = T2y + T2D;
277
                         T3M = T2D - T2y;
278
                         {
279
                              E T4S, T4V, T2L, T2M;
280
                              T4S = TN - TQ;
281
                              T4V = T4T - T4U;
282
                              T4W = T4S - T4V;
283
                              T5j = T4S + T4V;
284
                              T2L = T2x + T2w;
285
                              T2M = T2z - T2C;
286
                              T2N = T2L + T2M;
287
                              T3P = T2L - T2M;
288
                         }
289
                    }
290
               }
291
               {
292
                    E Ty, T2f, T20, T4C, TB, T21, T2i, T4D, TF, T26, T25, T4I, TI, T28, T2b;
293
                    E T4J;
294
                    {
295
                         E Tw, Tx, T1Y, T1Z;
296
                         Tw = ri[WS(is, 1)];
297
                         Tx = ri[WS(is, 17)];
298
                         Ty = Tw + Tx;
299
                         T2f = Tw - Tx;
300
                         T1Y = ii[WS(is, 1)];
301
                         T1Z = ii[WS(is, 17)];
302
                         T20 = T1Y - T1Z;
303
                         T4C = T1Y + T1Z;
304
                    }
305
                    {
306
                         E Tz, TA, T2g, T2h;
307
                         Tz = ri[WS(is, 9)];
308
                         TA = ri[WS(is, 25)];
309
                         TB = Tz + TA;
310
                         T21 = Tz - TA;
311
                         T2g = ii[WS(is, 9)];
312
                         T2h = ii[WS(is, 25)];
313
                         T2i = T2g - T2h;
314
                         T4D = T2g + T2h;
315
                    }
316
                    {
317
                         E TD, TE, T23, T24;
318
                         TD = ri[WS(is, 5)];
319
                         TE = ri[WS(is, 21)];
320
                         TF = TD + TE;
321
                         T26 = TD - TE;
322
                         T23 = ii[WS(is, 5)];
323
                         T24 = ii[WS(is, 21)];
324
                         T25 = T23 - T24;
325
                         T4I = T23 + T24;
326
                    }
327
                    {
328
                         E TG, TH, T29, T2a;
329
                         TG = ri[WS(is, 29)];
330
                         TH = ri[WS(is, 13)];
331
                         TI = TG + TH;
332
                         T28 = TG - TH;
333
                         T29 = ii[WS(is, 29)];
334
                         T2a = ii[WS(is, 13)];
335
                         T2b = T29 - T2a;
336
                         T4J = T29 + T2a;
337
                    }
338
                    T22 = T20 - T21;
339
                    T3E = T2f - T2i;
340
                    T3H = T21 + T20;
341
                    T2j = T2f + T2i;
342
                    TC = Ty + TB;
343
                    TJ = TF + TI;
344
                    T5A = TC - TJ;
345
                    {
346
                         E T4E, T4F, T27, T2c;
347
                         T5B = T4C + T4D;
348
                         T5C = T4I + T4J;
349
                         T5D = T5B - T5C;
350
                         T4E = T4C - T4D;
351
                         T4F = TI - TF;
352
                         T4G = T4E - T4F;
353
                         T5h = T4F + T4E;
354
                         T27 = T25 - T26;
355
                         T2c = T28 + T2b;
356
                         T2d = T27 + T2c;
357
                         T3F = T2c - T27;
358
                         {
359
                              E T4H, T4K, T2k, T2l;
360
                              T4H = Ty - TB;
361
                              T4K = T4I - T4J;
362
                              T4L = T4H - T4K;
363
                              T5g = T4H + T4K;
364
                              T2k = T26 + T25;
365
                              T2l = T28 - T2b;
366
                              T2m = T2k + T2l;
367
                              T3I = T2k - T2l;
368
                         }
369
                    }
370
               }
371
               {
372
                    E T4B, T5b, T5a, T5c, T4Y, T56, T55, T57;
373
                    {
374
                         E T4t, T4A, T58, T59;
375
                         T4t = T4r - T4s;
376
                         T4A = T4w - T4z;
377
                         T4B = FMA(KP707106781, T4A, T4t);
378
                         T5b = FNMS(KP707106781, T4A, T4t);
379
                         T58 = FMA(KP414213562, T4R, T4W);
380
                         T59 = FNMS(KP414213562, T4G, T4L);
381
                         T5a = T58 - T59;
382
                         T5c = T59 + T58;
383
                    }
384
                    {
385
                         E T4M, T4X, T51, T54;
386
                         T4M = FMA(KP414213562, T4L, T4G);
387
                         T4X = FNMS(KP414213562, T4W, T4R);
388
                         T4Y = T4M - T4X;
389
                         T56 = T4M + T4X;
390
                         T51 = T4Z - T50;
391
                         T54 = T52 - T53;
392
                         T55 = FNMS(KP707106781, T54, T51);
393
                         T57 = FMA(KP707106781, T54, T51);
394
                    }
395
                    ro[WS(os, 22)] = FNMS(KP923879532, T4Y, T4B);
396
                    io[WS(os, 22)] = FNMS(KP923879532, T5a, T57);
397
                    ro[WS(os, 6)] = FMA(KP923879532, T4Y, T4B);
398
                    io[WS(os, 6)] = FMA(KP923879532, T5a, T57);
399
                    io[WS(os, 14)] = FNMS(KP923879532, T56, T55);
400
                    ro[WS(os, 14)] = FNMS(KP923879532, T5c, T5b);
401
                    io[WS(os, 30)] = FMA(KP923879532, T56, T55);
402
                    ro[WS(os, 30)] = FMA(KP923879532, T5c, T5b);
403
               }
404
               {
405
                    E T5f, T5r, T5u, T5w, T5m, T5q, T5p, T5v;
406
                    {
407
                         E T5d, T5e, T5s, T5t;
408
                         T5d = T4r + T4s;
409
                         T5e = T53 + T52;
410
                         T5f = FMA(KP707106781, T5e, T5d);
411
                         T5r = FNMS(KP707106781, T5e, T5d);
412
                         T5s = FNMS(KP414213562, T5g, T5h);
413
                         T5t = FMA(KP414213562, T5j, T5k);
414
                         T5u = T5s - T5t;
415
                         T5w = T5s + T5t;
416
                    }
417
                    {
418
                         E T5i, T5l, T5n, T5o;
419
                         T5i = FMA(KP414213562, T5h, T5g);
420
                         T5l = FNMS(KP414213562, T5k, T5j);
421
                         T5m = T5i + T5l;
422
                         T5q = T5l - T5i;
423
                         T5n = T50 + T4Z;
424
                         T5o = T4w + T4z;
425
                         T5p = FNMS(KP707106781, T5o, T5n);
426
                         T5v = FMA(KP707106781, T5o, T5n);
427
                    }
428
                    ro[WS(os, 18)] = FNMS(KP923879532, T5m, T5f);
429
                    io[WS(os, 18)] = FNMS(KP923879532, T5w, T5v);
430
                    ro[WS(os, 2)] = FMA(KP923879532, T5m, T5f);
431
                    io[WS(os, 2)] = FMA(KP923879532, T5w, T5v);
432
                    io[WS(os, 26)] = FNMS(KP923879532, T5q, T5p);
433
                    ro[WS(os, 26)] = FNMS(KP923879532, T5u, T5r);
434
                    io[WS(os, 10)] = FMA(KP923879532, T5q, T5p);
435
                    ro[WS(os, 10)] = FMA(KP923879532, T5u, T5r);
436
               }
437
               {
438
                    E T5z, T5P, T5S, T5U, T5K, T5O, T5N, T5T;
439
                    {
440
                         E T5x, T5y, T5Q, T5R;
441
                         T5x = T7 - Te;
442
                         T5y = T1n - T1u;
443
                         T5z = T5x + T5y;
444
                         T5P = T5x - T5y;
445
                         T5Q = T5D - T5A;
446
                         T5R = T5F + T5I;
447
                         T5S = T5Q - T5R;
448
                         T5U = T5Q + T5R;
449
                    }
450
                    {
451
                         E T5E, T5J, T5L, T5M;
452
                         T5E = T5A + T5D;
453
                         T5J = T5F - T5I;
454
                         T5K = T5E + T5J;
455
                         T5O = T5J - T5E;
456
                         T5L = T18 - T1f;
457
                         T5M = Tt - Tm;
458
                         T5N = T5L - T5M;
459
                         T5T = T5M + T5L;
460
                    }
461
                    ro[WS(os, 20)] = FNMS(KP707106781, T5K, T5z);
462
                    io[WS(os, 20)] = FNMS(KP707106781, T5U, T5T);
463
                    ro[WS(os, 4)] = FMA(KP707106781, T5K, T5z);
464
                    io[WS(os, 4)] = FMA(KP707106781, T5U, T5T);
465
                    io[WS(os, 28)] = FNMS(KP707106781, T5O, T5N);
466
                    ro[WS(os, 28)] = FNMS(KP707106781, T5S, T5P);
467
                    io[WS(os, 12)] = FMA(KP707106781, T5O, T5N);
468
                    ro[WS(os, 12)] = FMA(KP707106781, T5S, T5P);
469
               }
470
               {
471
                    E Tv, T5V, T5Y, T60, T10, T11, T1w, T5Z;
472
                    {
473
                         E Tf, Tu, T5W, T5X;
474
                         Tf = T7 + Te;
475
                         Tu = Tm + Tt;
476
                         Tv = Tf + Tu;
477
                         T5V = Tf - Tu;
478
                         T5W = T5B + T5C;
479
                         T5X = T5G + T5H;
480
                         T5Y = T5W - T5X;
481
                         T60 = T5W + T5X;
482
                    }
483
                    {
484
                         E TK, TZ, T1g, T1v;
485
                         TK = TC + TJ;
486
                         TZ = TR + TY;
487
                         T10 = TK + TZ;
488
                         T11 = TZ - TK;
489
                         T1g = T18 + T1f;
490
                         T1v = T1n + T1u;
491
                         T1w = T1g - T1v;
492
                         T5Z = T1g + T1v;
493
                    }
494
                    ro[WS(os, 16)] = Tv - T10;
495
                    io[WS(os, 16)] = T5Z - T60;
496
                    ro[0] = Tv + T10;
497
                    io[0] = T5Z + T60;
498
                    io[WS(os, 8)] = T11 + T1w;
499
                    ro[WS(os, 8)] = T5V + T5Y;
500
                    io[WS(os, 24)] = T1w - T11;
501
                    ro[WS(os, 24)] = T5V - T5Y;
502
               }
503
               {
504
                    E T1X, T37, T31, T33, T2o, T35, T2P, T34;
505
                    {
506
                         E T1H, T1W, T2X, T30;
507
                         T1H = FNMS(KP707106781, T1G, T1z);
508
                         T1W = T1O - T1V;
509
                         T1X = FMA(KP923879532, T1W, T1H);
510
                         T37 = FNMS(KP923879532, T1W, T1H);
511
                         T2X = FNMS(KP707106781, T2W, T2T);
512
                         T30 = T2Y - T2Z;
513
                         T31 = FNMS(KP923879532, T30, T2X);
514
                         T33 = FMA(KP923879532, T30, T2X);
515
                    }
516
                    {
517
                         E T2e, T2n, T2F, T2O;
518
                         T2e = FNMS(KP707106781, T2d, T22);
519
                         T2n = FNMS(KP707106781, T2m, T2j);
520
                         T2o = FMA(KP668178637, T2n, T2e);
521
                         T35 = FNMS(KP668178637, T2e, T2n);
522
                         T2F = FNMS(KP707106781, T2E, T2t);
523
                         T2O = FNMS(KP707106781, T2N, T2K);
524
                         T2P = FNMS(KP668178637, T2O, T2F);
525
                         T34 = FMA(KP668178637, T2F, T2O);
526
                    }
527
                    {
528
                         E T2Q, T36, T32, T38;
529
                         T2Q = T2o - T2P;
530
                         ro[WS(os, 21)] = FNMS(KP831469612, T2Q, T1X);
531
                         ro[WS(os, 5)] = FMA(KP831469612, T2Q, T1X);
532
                         T36 = T34 - T35;
533
                         io[WS(os, 21)] = FNMS(KP831469612, T36, T33);
534
                         io[WS(os, 5)] = FMA(KP831469612, T36, T33);
535
                         T32 = T2o + T2P;
536
                         io[WS(os, 13)] = FNMS(KP831469612, T32, T31);
537
                         io[WS(os, 29)] = FMA(KP831469612, T32, T31);
538
                         T38 = T35 + T34;
539
                         ro[WS(os, 13)] = FNMS(KP831469612, T38, T37);
540
                         ro[WS(os, 29)] = FMA(KP831469612, T38, T37);
541
                    }
542
               }
543
               {
544
                    E T3D, T41, T3Z, T45, T3K, T42, T3R, T43;
545
                    {
546
                         E T3v, T3C, T3V, T3Y;
547
                         T3v = FMA(KP707106781, T3u, T3t);
548
                         T3C = T3y - T3B;
549
                         T3D = FMA(KP923879532, T3C, T3v);
550
                         T41 = FNMS(KP923879532, T3C, T3v);
551
                         T3V = FMA(KP707106781, T3U, T3T);
552
                         T3Y = T3W - T3X;
553
                         T3Z = FNMS(KP923879532, T3Y, T3V);
554
                         T45 = FMA(KP923879532, T3Y, T3V);
555
                    }
556
                    {
557
                         E T3G, T3J, T3N, T3Q;
558
                         T3G = FNMS(KP707106781, T3F, T3E);
559
                         T3J = FNMS(KP707106781, T3I, T3H);
560
                         T3K = FMA(KP668178637, T3J, T3G);
561
                         T42 = FNMS(KP668178637, T3G, T3J);
562
                         T3N = FNMS(KP707106781, T3M, T3L);
563
                         T3Q = FNMS(KP707106781, T3P, T3O);
564
                         T3R = FNMS(KP668178637, T3Q, T3N);
565
                         T43 = FMA(KP668178637, T3N, T3Q);
566
                    }
567
                    {
568
                         E T3S, T46, T40, T44;
569
                         T3S = T3K + T3R;
570
                         ro[WS(os, 19)] = FNMS(KP831469612, T3S, T3D);
571
                         ro[WS(os, 3)] = FMA(KP831469612, T3S, T3D);
572
                         T46 = T42 + T43;
573
                         io[WS(os, 19)] = FNMS(KP831469612, T46, T45);
574
                         io[WS(os, 3)] = FMA(KP831469612, T46, T45);
575
                         T40 = T3R - T3K;
576
                         io[WS(os, 27)] = FNMS(KP831469612, T40, T3Z);
577
                         io[WS(os, 11)] = FMA(KP831469612, T40, T3Z);
578
                         T44 = T42 - T43;
579
                         ro[WS(os, 27)] = FNMS(KP831469612, T44, T41);
580
                         ro[WS(os, 11)] = FMA(KP831469612, T44, T41);
581
                    }
582
               }
583
               {
584
                    E T49, T4p, T4j, T4l, T4c, T4n, T4f, T4m;
585
                    {
586
                         E T47, T48, T4h, T4i;
587
                         T47 = FNMS(KP707106781, T3u, T3t);
588
                         T48 = T3X + T3W;
589
                         T49 = FNMS(KP923879532, T48, T47);
590
                         T4p = FMA(KP923879532, T48, T47);
591
                         T4h = FNMS(KP707106781, T3U, T3T);
592
                         T4i = T3y + T3B;
593
                         T4j = FMA(KP923879532, T4i, T4h);
594
                         T4l = FNMS(KP923879532, T4i, T4h);
595
                    }
596
                    {
597
                         E T4a, T4b, T4d, T4e;
598
                         T4a = FMA(KP707106781, T3I, T3H);
599
                         T4b = FMA(KP707106781, T3F, T3E);
600
                         T4c = FMA(KP198912367, T4b, T4a);
601
                         T4n = FNMS(KP198912367, T4a, T4b);
602
                         T4d = FMA(KP707106781, T3P, T3O);
603
                         T4e = FMA(KP707106781, T3M, T3L);
604
                         T4f = FNMS(KP198912367, T4e, T4d);
605
                         T4m = FMA(KP198912367, T4d, T4e);
606
                    }
607
                    {
608
                         E T4g, T4o, T4k, T4q;
609
                         T4g = T4c - T4f;
610
                         ro[WS(os, 23)] = FNMS(KP980785280, T4g, T49);
611
                         ro[WS(os, 7)] = FMA(KP980785280, T4g, T49);
612
                         T4o = T4m - T4n;
613
                         io[WS(os, 23)] = FNMS(KP980785280, T4o, T4l);
614
                         io[WS(os, 7)] = FMA(KP980785280, T4o, T4l);
615
                         T4k = T4c + T4f;
616
                         io[WS(os, 15)] = FNMS(KP980785280, T4k, T4j);
617
                         io[WS(os, 31)] = FMA(KP980785280, T4k, T4j);
618
                         T4q = T4n + T4m;
619
                         ro[WS(os, 15)] = FNMS(KP980785280, T4q, T4p);
620
                         ro[WS(os, 31)] = FMA(KP980785280, T4q, T4p);
621
                    }
622
               }
623
               {
624
                    E T3b, T3n, T3l, T3r, T3e, T3o, T3h, T3p;
625
                    {
626
                         E T39, T3a, T3j, T3k;
627
                         T39 = FMA(KP707106781, T1G, T1z);
628
                         T3a = T2Z + T2Y;
629
                         T3b = FMA(KP923879532, T3a, T39);
630
                         T3n = FNMS(KP923879532, T3a, T39);
631
                         T3j = FMA(KP707106781, T2W, T2T);
632
                         T3k = T1O + T1V;
633
                         T3l = FNMS(KP923879532, T3k, T3j);
634
                         T3r = FMA(KP923879532, T3k, T3j);
635
                    }
636
                    {
637
                         E T3c, T3d, T3f, T3g;
638
                         T3c = FMA(KP707106781, T2m, T2j);
639
                         T3d = FMA(KP707106781, T2d, T22);
640
                         T3e = FMA(KP198912367, T3d, T3c);
641
                         T3o = FNMS(KP198912367, T3c, T3d);
642
                         T3f = FMA(KP707106781, T2N, T2K);
643
                         T3g = FMA(KP707106781, T2E, T2t);
644
                         T3h = FNMS(KP198912367, T3g, T3f);
645
                         T3p = FMA(KP198912367, T3f, T3g);
646
                    }
647
                    {
648
                         E T3i, T3s, T3m, T3q;
649
                         T3i = T3e + T3h;
650
                         ro[WS(os, 17)] = FNMS(KP980785280, T3i, T3b);
651
                         ro[WS(os, 1)] = FMA(KP980785280, T3i, T3b);
652
                         T3s = T3o + T3p;
653
                         io[WS(os, 17)] = FNMS(KP980785280, T3s, T3r);
654
                         io[WS(os, 1)] = FMA(KP980785280, T3s, T3r);
655
                         T3m = T3h - T3e;
656
                         io[WS(os, 25)] = FNMS(KP980785280, T3m, T3l);
657
                         io[WS(os, 9)] = FMA(KP980785280, T3m, T3l);
658
                         T3q = T3o - T3p;
659
                         ro[WS(os, 25)] = FNMS(KP980785280, T3q, T3n);
660
                         ro[WS(os, 9)] = FMA(KP980785280, T3q, T3n);
661
                    }
662
               }
663
          }
664
     }
665
}
666

    
667
static const kdft_desc desc = { 32, "n1_32", {236, 0, 136, 0}, &GENUS, 0, 0, 0, 0 };
668

    
669
void X(codelet_n1_32) (planner *p) {
670
     X(kdft_register) (p, n1_32, &desc);
671
}
672

    
673
#else
674

    
675
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 32 -name n1_32 -include dft/scalar/n.h */
676

    
677
/*
678
 * This function contains 372 FP additions, 84 FP multiplications,
679
 * (or, 340 additions, 52 multiplications, 32 fused multiply/add),
680
 * 100 stack variables, 7 constants, and 128 memory accesses
681
 */
682
#include "dft/scalar/n.h"
683

    
684
static void n1_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
685
{
686
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
687
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
688
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
689
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
690
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
691
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
692
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
693
     {
694
          INT i;
695
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(128, is), MAKE_VOLATILE_STRIDE(128, os)) {
696
               E T7, T4r, T4Z, T18, T1z, T3t, T3T, T2T, Te, T1f, T50, T4s, T2W, T3u, T1G;
697
               E T3U, Tm, T1n, T1O, T2Z, T3y, T3X, T4w, T53, Tt, T1u, T1V, T2Y, T3B, T3W;
698
               E T4z, T52, T2t, T3L, T3O, T2K, TR, TY, T5F, T5G, T5H, T5I, T4R, T5j, T2E;
699
               E T3P, T4W, T5k, T2N, T3M, T22, T3E, T3H, T2j, TC, TJ, T5A, T5B, T5C, T5D;
700
               E T4G, T5g, T2d, T3F, T4L, T5h, T2m, T3I;
701
               {
702
                    E T3, T1x, T14, T2S, T6, T2R, T17, T1y;
703
                    {
704
                         E T1, T2, T12, T13;
705
                         T1 = ri[0];
706
                         T2 = ri[WS(is, 16)];
707
                         T3 = T1 + T2;
708
                         T1x = T1 - T2;
709
                         T12 = ii[0];
710
                         T13 = ii[WS(is, 16)];
711
                         T14 = T12 + T13;
712
                         T2S = T12 - T13;
713
                    }
714
                    {
715
                         E T4, T5, T15, T16;
716
                         T4 = ri[WS(is, 8)];
717
                         T5 = ri[WS(is, 24)];
718
                         T6 = T4 + T5;
719
                         T2R = T4 - T5;
720
                         T15 = ii[WS(is, 8)];
721
                         T16 = ii[WS(is, 24)];
722
                         T17 = T15 + T16;
723
                         T1y = T15 - T16;
724
                    }
725
                    T7 = T3 + T6;
726
                    T4r = T3 - T6;
727
                    T4Z = T14 - T17;
728
                    T18 = T14 + T17;
729
                    T1z = T1x - T1y;
730
                    T3t = T1x + T1y;
731
                    T3T = T2S - T2R;
732
                    T2T = T2R + T2S;
733
               }
734
               {
735
                    E Ta, T1B, T1b, T1A, Td, T1D, T1e, T1E;
736
                    {
737
                         E T8, T9, T19, T1a;
738
                         T8 = ri[WS(is, 4)];
739
                         T9 = ri[WS(is, 20)];
740
                         Ta = T8 + T9;
741
                         T1B = T8 - T9;
742
                         T19 = ii[WS(is, 4)];
743
                         T1a = ii[WS(is, 20)];
744
                         T1b = T19 + T1a;
745
                         T1A = T19 - T1a;
746
                    }
747
                    {
748
                         E Tb, Tc, T1c, T1d;
749
                         Tb = ri[WS(is, 28)];
750
                         Tc = ri[WS(is, 12)];
751
                         Td = Tb + Tc;
752
                         T1D = Tb - Tc;
753
                         T1c = ii[WS(is, 28)];
754
                         T1d = ii[WS(is, 12)];
755
                         T1e = T1c + T1d;
756
                         T1E = T1c - T1d;
757
                    }
758
                    Te = Ta + Td;
759
                    T1f = T1b + T1e;
760
                    T50 = Td - Ta;
761
                    T4s = T1b - T1e;
762
                    {
763
                         E T2U, T2V, T1C, T1F;
764
                         T2U = T1D - T1E;
765
                         T2V = T1B + T1A;
766
                         T2W = KP707106781 * (T2U - T2V);
767
                         T3u = KP707106781 * (T2V + T2U);
768
                         T1C = T1A - T1B;
769
                         T1F = T1D + T1E;
770
                         T1G = KP707106781 * (T1C - T1F);
771
                         T3U = KP707106781 * (T1C + T1F);
772
                    }
773
               }
774
               {
775
                    E Ti, T1L, T1j, T1J, Tl, T1I, T1m, T1M, T1K, T1N;
776
                    {
777
                         E Tg, Th, T1h, T1i;
778
                         Tg = ri[WS(is, 2)];
779
                         Th = ri[WS(is, 18)];
780
                         Ti = Tg + Th;
781
                         T1L = Tg - Th;
782
                         T1h = ii[WS(is, 2)];
783
                         T1i = ii[WS(is, 18)];
784
                         T1j = T1h + T1i;
785
                         T1J = T1h - T1i;
786
                    }
787
                    {
788
                         E Tj, Tk, T1k, T1l;
789
                         Tj = ri[WS(is, 10)];
790
                         Tk = ri[WS(is, 26)];
791
                         Tl = Tj + Tk;
792
                         T1I = Tj - Tk;
793
                         T1k = ii[WS(is, 10)];
794
                         T1l = ii[WS(is, 26)];
795
                         T1m = T1k + T1l;
796
                         T1M = T1k - T1l;
797
                    }
798
                    Tm = Ti + Tl;
799
                    T1n = T1j + T1m;
800
                    T1K = T1I + T1J;
801
                    T1N = T1L - T1M;
802
                    T1O = FNMS(KP923879532, T1N, KP382683432 * T1K);
803
                    T2Z = FMA(KP923879532, T1K, KP382683432 * T1N);
804
                    {
805
                         E T3w, T3x, T4u, T4v;
806
                         T3w = T1J - T1I;
807
                         T3x = T1L + T1M;
808
                         T3y = FNMS(KP382683432, T3x, KP923879532 * T3w);
809
                         T3X = FMA(KP382683432, T3w, KP923879532 * T3x);
810
                         T4u = T1j - T1m;
811
                         T4v = Ti - Tl;
812
                         T4w = T4u - T4v;
813
                         T53 = T4v + T4u;
814
                    }
815
               }
816
               {
817
                    E Tp, T1S, T1q, T1Q, Ts, T1P, T1t, T1T, T1R, T1U;
818
                    {
819
                         E Tn, To, T1o, T1p;
820
                         Tn = ri[WS(is, 30)];
821
                         To = ri[WS(is, 14)];
822
                         Tp = Tn + To;
823
                         T1S = Tn - To;
824
                         T1o = ii[WS(is, 30)];
825
                         T1p = ii[WS(is, 14)];
826
                         T1q = T1o + T1p;
827
                         T1Q = T1o - T1p;
828
                    }
829
                    {
830
                         E Tq, Tr, T1r, T1s;
831
                         Tq = ri[WS(is, 6)];
832
                         Tr = ri[WS(is, 22)];
833
                         Ts = Tq + Tr;
834
                         T1P = Tq - Tr;
835
                         T1r = ii[WS(is, 6)];
836
                         T1s = ii[WS(is, 22)];
837
                         T1t = T1r + T1s;
838
                         T1T = T1r - T1s;
839
                    }
840
                    Tt = Tp + Ts;
841
                    T1u = T1q + T1t;
842
                    T1R = T1P + T1Q;
843
                    T1U = T1S - T1T;
844
                    T1V = FMA(KP382683432, T1R, KP923879532 * T1U);
845
                    T2Y = FNMS(KP923879532, T1R, KP382683432 * T1U);
846
                    {
847
                         E T3z, T3A, T4x, T4y;
848
                         T3z = T1Q - T1P;
849
                         T3A = T1S + T1T;
850
                         T3B = FMA(KP923879532, T3z, KP382683432 * T3A);
851
                         T3W = FNMS(KP382683432, T3z, KP923879532 * T3A);
852
                         T4x = Tp - Ts;
853
                         T4y = T1q - T1t;
854
                         T4z = T4x + T4y;
855
                         T52 = T4x - T4y;
856
                    }
857
               }
858
               {
859
                    E TN, T2p, T2J, T4S, TQ, T2G, T2s, T4T, TU, T2x, T2w, T4O, TX, T2z, T2C;
860
                    E T4P;
861
                    {
862
                         E TL, TM, T2H, T2I;
863
                         TL = ri[WS(is, 31)];
864
                         TM = ri[WS(is, 15)];
865
                         TN = TL + TM;
866
                         T2p = TL - TM;
867
                         T2H = ii[WS(is, 31)];
868
                         T2I = ii[WS(is, 15)];
869
                         T2J = T2H - T2I;
870
                         T4S = T2H + T2I;
871
                    }
872
                    {
873
                         E TO, TP, T2q, T2r;
874
                         TO = ri[WS(is, 7)];
875
                         TP = ri[WS(is, 23)];
876
                         TQ = TO + TP;
877
                         T2G = TO - TP;
878
                         T2q = ii[WS(is, 7)];
879
                         T2r = ii[WS(is, 23)];
880
                         T2s = T2q - T2r;
881
                         T4T = T2q + T2r;
882
                    }
883
                    {
884
                         E TS, TT, T2u, T2v;
885
                         TS = ri[WS(is, 3)];
886
                         TT = ri[WS(is, 19)];
887
                         TU = TS + TT;
888
                         T2x = TS - TT;
889
                         T2u = ii[WS(is, 3)];
890
                         T2v = ii[WS(is, 19)];
891
                         T2w = T2u - T2v;
892
                         T4O = T2u + T2v;
893
                    }
894
                    {
895
                         E TV, TW, T2A, T2B;
896
                         TV = ri[WS(is, 27)];
897
                         TW = ri[WS(is, 11)];
898
                         TX = TV + TW;
899
                         T2z = TV - TW;
900
                         T2A = ii[WS(is, 27)];
901
                         T2B = ii[WS(is, 11)];
902
                         T2C = T2A - T2B;
903
                         T4P = T2A + T2B;
904
                    }
905
                    T2t = T2p - T2s;
906
                    T3L = T2p + T2s;
907
                    T3O = T2J - T2G;
908
                    T2K = T2G + T2J;
909
                    TR = TN + TQ;
910
                    TY = TU + TX;
911
                    T5F = TR - TY;
912
                    {
913
                         E T4N, T4Q, T2y, T2D;
914
                         T5G = T4S + T4T;
915
                         T5H = T4O + T4P;
916
                         T5I = T5G - T5H;
917
                         T4N = TN - TQ;
918
                         T4Q = T4O - T4P;
919
                         T4R = T4N - T4Q;
920
                         T5j = T4N + T4Q;
921
                         T2y = T2w - T2x;
922
                         T2D = T2z + T2C;
923
                         T2E = KP707106781 * (T2y - T2D);
924
                         T3P = KP707106781 * (T2y + T2D);
925
                         {
926
                              E T4U, T4V, T2L, T2M;
927
                              T4U = T4S - T4T;
928
                              T4V = TX - TU;
929
                              T4W = T4U - T4V;
930
                              T5k = T4V + T4U;
931
                              T2L = T2z - T2C;
932
                              T2M = T2x + T2w;
933
                              T2N = KP707106781 * (T2L - T2M);
934
                              T3M = KP707106781 * (T2M + T2L);
935
                         }
936
                    }
937
               }
938
               {
939
                    E Ty, T2f, T21, T4C, TB, T1Y, T2i, T4D, TF, T28, T2b, T4I, TI, T23, T26;
940
                    E T4J;
941
                    {
942
                         E Tw, Tx, T1Z, T20;
943
                         Tw = ri[WS(is, 1)];
944
                         Tx = ri[WS(is, 17)];
945
                         Ty = Tw + Tx;
946
                         T2f = Tw - Tx;
947
                         T1Z = ii[WS(is, 1)];
948
                         T20 = ii[WS(is, 17)];
949
                         T21 = T1Z - T20;
950
                         T4C = T1Z + T20;
951
                    }
952
                    {
953
                         E Tz, TA, T2g, T2h;
954
                         Tz = ri[WS(is, 9)];
955
                         TA = ri[WS(is, 25)];
956
                         TB = Tz + TA;
957
                         T1Y = Tz - TA;
958
                         T2g = ii[WS(is, 9)];
959
                         T2h = ii[WS(is, 25)];
960
                         T2i = T2g - T2h;
961
                         T4D = T2g + T2h;
962
                    }
963
                    {
964
                         E TD, TE, T29, T2a;
965
                         TD = ri[WS(is, 5)];
966
                         TE = ri[WS(is, 21)];
967
                         TF = TD + TE;
968
                         T28 = TD - TE;
969
                         T29 = ii[WS(is, 5)];
970
                         T2a = ii[WS(is, 21)];
971
                         T2b = T29 - T2a;
972
                         T4I = T29 + T2a;
973
                    }
974
                    {
975
                         E TG, TH, T24, T25;
976
                         TG = ri[WS(is, 29)];
977
                         TH = ri[WS(is, 13)];
978
                         TI = TG + TH;
979
                         T23 = TG - TH;
980
                         T24 = ii[WS(is, 29)];
981
                         T25 = ii[WS(is, 13)];
982
                         T26 = T24 - T25;
983
                         T4J = T24 + T25;
984
                    }
985
                    T22 = T1Y + T21;
986
                    T3E = T2f + T2i;
987
                    T3H = T21 - T1Y;
988
                    T2j = T2f - T2i;
989
                    TC = Ty + TB;
990
                    TJ = TF + TI;
991
                    T5A = TC - TJ;
992
                    {
993
                         E T4E, T4F, T27, T2c;
994
                         T5B = T4C + T4D;
995
                         T5C = T4I + T4J;
996
                         T5D = T5B - T5C;
997
                         T4E = T4C - T4D;
998
                         T4F = TI - TF;
999
                         T4G = T4E - T4F;
1000
                         T5g = T4F + T4E;
1001
                         T27 = T23 - T26;
1002
                         T2c = T28 + T2b;
1003
                         T2d = KP707106781 * (T27 - T2c);
1004
                         T3F = KP707106781 * (T2c + T27);
1005
                         {
1006
                              E T4H, T4K, T2k, T2l;
1007
                              T4H = Ty - TB;
1008
                              T4K = T4I - T4J;
1009
                              T4L = T4H - T4K;
1010
                              T5h = T4H + T4K;
1011
                              T2k = T2b - T28;
1012
                              T2l = T23 + T26;
1013
                              T2m = KP707106781 * (T2k - T2l);
1014
                              T3I = KP707106781 * (T2k + T2l);
1015
                         }
1016
                    }
1017
               }
1018
               {
1019
                    E T4B, T57, T5a, T5c, T4Y, T56, T55, T5b;
1020
                    {
1021
                         E T4t, T4A, T58, T59;
1022
                         T4t = T4r - T4s;
1023
                         T4A = KP707106781 * (T4w - T4z);
1024
                         T4B = T4t + T4A;
1025
                         T57 = T4t - T4A;
1026
                         T58 = FNMS(KP923879532, T4L, KP382683432 * T4G);
1027
                         T59 = FMA(KP382683432, T4W, KP923879532 * T4R);
1028
                         T5a = T58 - T59;
1029
                         T5c = T58 + T59;
1030
                    }
1031
                    {
1032
                         E T4M, T4X, T51, T54;
1033
                         T4M = FMA(KP923879532, T4G, KP382683432 * T4L);
1034
                         T4X = FNMS(KP923879532, T4W, KP382683432 * T4R);
1035
                         T4Y = T4M + T4X;
1036
                         T56 = T4X - T4M;
1037
                         T51 = T4Z - T50;
1038
                         T54 = KP707106781 * (T52 - T53);
1039
                         T55 = T51 - T54;
1040
                         T5b = T51 + T54;
1041
                    }
1042
                    ro[WS(os, 22)] = T4B - T4Y;
1043
                    io[WS(os, 22)] = T5b - T5c;
1044
                    ro[WS(os, 6)] = T4B + T4Y;
1045
                    io[WS(os, 6)] = T5b + T5c;
1046
                    io[WS(os, 30)] = T55 - T56;
1047
                    ro[WS(os, 30)] = T57 - T5a;
1048
                    io[WS(os, 14)] = T55 + T56;
1049
                    ro[WS(os, 14)] = T57 + T5a;
1050
               }
1051
               {
1052
                    E T5f, T5r, T5u, T5w, T5m, T5q, T5p, T5v;
1053
                    {
1054
                         E T5d, T5e, T5s, T5t;
1055
                         T5d = T4r + T4s;
1056
                         T5e = KP707106781 * (T53 + T52);
1057
                         T5f = T5d + T5e;
1058
                         T5r = T5d - T5e;
1059
                         T5s = FNMS(KP382683432, T5h, KP923879532 * T5g);
1060
                         T5t = FMA(KP923879532, T5k, KP382683432 * T5j);
1061
                         T5u = T5s - T5t;
1062
                         T5w = T5s + T5t;
1063
                    }
1064
                    {
1065
                         E T5i, T5l, T5n, T5o;
1066
                         T5i = FMA(KP382683432, T5g, KP923879532 * T5h);
1067
                         T5l = FNMS(KP382683432, T5k, KP923879532 * T5j);
1068
                         T5m = T5i + T5l;
1069
                         T5q = T5l - T5i;
1070
                         T5n = T50 + T4Z;
1071
                         T5o = KP707106781 * (T4w + T4z);
1072
                         T5p = T5n - T5o;
1073
                         T5v = T5n + T5o;
1074
                    }
1075
                    ro[WS(os, 18)] = T5f - T5m;
1076
                    io[WS(os, 18)] = T5v - T5w;
1077
                    ro[WS(os, 2)] = T5f + T5m;
1078
                    io[WS(os, 2)] = T5v + T5w;
1079
                    io[WS(os, 26)] = T5p - T5q;
1080
                    ro[WS(os, 26)] = T5r - T5u;
1081
                    io[WS(os, 10)] = T5p + T5q;
1082
                    ro[WS(os, 10)] = T5r + T5u;
1083
               }
1084
               {
1085
                    E T5z, T5P, T5S, T5U, T5K, T5O, T5N, T5T;
1086
                    {
1087
                         E T5x, T5y, T5Q, T5R;
1088
                         T5x = T7 - Te;
1089
                         T5y = T1n - T1u;
1090
                         T5z = T5x + T5y;
1091
                         T5P = T5x - T5y;
1092
                         T5Q = T5D - T5A;
1093
                         T5R = T5F + T5I;
1094
                         T5S = KP707106781 * (T5Q - T5R);
1095
                         T5U = KP707106781 * (T5Q + T5R);
1096
                    }
1097
                    {
1098
                         E T5E, T5J, T5L, T5M;
1099
                         T5E = T5A + T5D;
1100
                         T5J = T5F - T5I;
1101
                         T5K = KP707106781 * (T5E + T5J);
1102
                         T5O = KP707106781 * (T5J - T5E);
1103
                         T5L = T18 - T1f;
1104
                         T5M = Tt - Tm;
1105
                         T5N = T5L - T5M;
1106
                         T5T = T5M + T5L;
1107
                    }
1108
                    ro[WS(os, 20)] = T5z - T5K;
1109
                    io[WS(os, 20)] = T5T - T5U;
1110
                    ro[WS(os, 4)] = T5z + T5K;
1111
                    io[WS(os, 4)] = T5T + T5U;
1112
                    io[WS(os, 28)] = T5N - T5O;
1113
                    ro[WS(os, 28)] = T5P - T5S;
1114
                    io[WS(os, 12)] = T5N + T5O;
1115
                    ro[WS(os, 12)] = T5P + T5S;
1116
               }
1117
               {
1118
                    E Tv, T5V, T5Y, T60, T10, T11, T1w, T5Z;
1119
                    {
1120
                         E Tf, Tu, T5W, T5X;
1121
                         Tf = T7 + Te;
1122
                         Tu = Tm + Tt;
1123
                         Tv = Tf + Tu;
1124
                         T5V = Tf - Tu;
1125
                         T5W = T5B + T5C;
1126
                         T5X = T5G + T5H;
1127
                         T5Y = T5W - T5X;
1128
                         T60 = T5W + T5X;
1129
                    }
1130
                    {
1131
                         E TK, TZ, T1g, T1v;
1132
                         TK = TC + TJ;
1133
                         TZ = TR + TY;
1134
                         T10 = TK + TZ;
1135
                         T11 = TZ - TK;
1136
                         T1g = T18 + T1f;
1137
                         T1v = T1n + T1u;
1138
                         T1w = T1g - T1v;
1139
                         T5Z = T1g + T1v;
1140
                    }
1141
                    ro[WS(os, 16)] = Tv - T10;
1142
                    io[WS(os, 16)] = T5Z - T60;
1143
                    ro[0] = Tv + T10;
1144
                    io[0] = T5Z + T60;
1145
                    io[WS(os, 8)] = T11 + T1w;
1146
                    ro[WS(os, 8)] = T5V + T5Y;
1147
                    io[WS(os, 24)] = T1w - T11;
1148
                    ro[WS(os, 24)] = T5V - T5Y;
1149
               }
1150
               {
1151
                    E T1X, T33, T31, T37, T2o, T34, T2P, T35;
1152
                    {
1153
                         E T1H, T1W, T2X, T30;
1154
                         T1H = T1z - T1G;
1155
                         T1W = T1O - T1V;
1156
                         T1X = T1H + T1W;
1157
                         T33 = T1H - T1W;
1158
                         T2X = T2T - T2W;
1159
                         T30 = T2Y - T2Z;
1160
                         T31 = T2X - T30;
1161
                         T37 = T2X + T30;
1162
                    }
1163
                    {
1164
                         E T2e, T2n, T2F, T2O;
1165
                         T2e = T22 - T2d;
1166
                         T2n = T2j - T2m;
1167
                         T2o = FMA(KP980785280, T2e, KP195090322 * T2n);
1168
                         T34 = FNMS(KP980785280, T2n, KP195090322 * T2e);
1169
                         T2F = T2t - T2E;
1170
                         T2O = T2K - T2N;
1171
                         T2P = FNMS(KP980785280, T2O, KP195090322 * T2F);
1172
                         T35 = FMA(KP195090322, T2O, KP980785280 * T2F);
1173
                    }
1174
                    {
1175
                         E T2Q, T38, T32, T36;
1176
                         T2Q = T2o + T2P;
1177
                         ro[WS(os, 23)] = T1X - T2Q;
1178
                         ro[WS(os, 7)] = T1X + T2Q;
1179
                         T38 = T34 + T35;
1180
                         io[WS(os, 23)] = T37 - T38;
1181
                         io[WS(os, 7)] = T37 + T38;
1182
                         T32 = T2P - T2o;
1183
                         io[WS(os, 31)] = T31 - T32;
1184
                         io[WS(os, 15)] = T31 + T32;
1185
                         T36 = T34 - T35;
1186
                         ro[WS(os, 31)] = T33 - T36;
1187
                         ro[WS(os, 15)] = T33 + T36;
1188
                    }
1189
               }
1190
               {
1191
                    E T3D, T41, T3Z, T45, T3K, T42, T3R, T43;
1192
                    {
1193
                         E T3v, T3C, T3V, T3Y;
1194
                         T3v = T3t - T3u;
1195
                         T3C = T3y - T3B;
1196
                         T3D = T3v + T3C;
1197
                         T41 = T3v - T3C;
1198
                         T3V = T3T - T3U;
1199
                         T3Y = T3W - T3X;
1200
                         T3Z = T3V - T3Y;
1201
                         T45 = T3V + T3Y;
1202
                    }
1203
                    {
1204
                         E T3G, T3J, T3N, T3Q;
1205
                         T3G = T3E - T3F;
1206
                         T3J = T3H - T3I;
1207
                         T3K = FMA(KP555570233, T3G, KP831469612 * T3J);
1208
                         T42 = FNMS(KP831469612, T3G, KP555570233 * T3J);
1209
                         T3N = T3L - T3M;
1210
                         T3Q = T3O - T3P;
1211
                         T3R = FNMS(KP831469612, T3Q, KP555570233 * T3N);
1212
                         T43 = FMA(KP831469612, T3N, KP555570233 * T3Q);
1213
                    }
1214
                    {
1215
                         E T3S, T46, T40, T44;
1216
                         T3S = T3K + T3R;
1217
                         ro[WS(os, 21)] = T3D - T3S;
1218
                         ro[WS(os, 5)] = T3D + T3S;
1219
                         T46 = T42 + T43;
1220
                         io[WS(os, 21)] = T45 - T46;
1221
                         io[WS(os, 5)] = T45 + T46;
1222
                         T40 = T3R - T3K;
1223
                         io[WS(os, 29)] = T3Z - T40;
1224
                         io[WS(os, 13)] = T3Z + T40;
1225
                         T44 = T42 - T43;
1226
                         ro[WS(os, 29)] = T41 - T44;
1227
                         ro[WS(os, 13)] = T41 + T44;
1228
                    }
1229
               }
1230
               {
1231
                    E T49, T4l, T4j, T4p, T4c, T4m, T4f, T4n;
1232
                    {
1233
                         E T47, T48, T4h, T4i;
1234
                         T47 = T3t + T3u;
1235
                         T48 = T3X + T3W;
1236
                         T49 = T47 + T48;
1237
                         T4l = T47 - T48;
1238
                         T4h = T3T + T3U;
1239
                         T4i = T3y + T3B;
1240
                         T4j = T4h - T4i;
1241
                         T4p = T4h + T4i;
1242
                    }
1243
                    {
1244
                         E T4a, T4b, T4d, T4e;
1245
                         T4a = T3E + T3F;
1246
                         T4b = T3H + T3I;
1247
                         T4c = FMA(KP980785280, T4a, KP195090322 * T4b);
1248
                         T4m = FNMS(KP195090322, T4a, KP980785280 * T4b);
1249
                         T4d = T3L + T3M;
1250
                         T4e = T3O + T3P;
1251
                         T4f = FNMS(KP195090322, T4e, KP980785280 * T4d);
1252
                         T4n = FMA(KP195090322, T4d, KP980785280 * T4e);
1253
                    }
1254
                    {
1255
                         E T4g, T4q, T4k, T4o;
1256
                         T4g = T4c + T4f;
1257
                         ro[WS(os, 17)] = T49 - T4g;
1258
                         ro[WS(os, 1)] = T49 + T4g;
1259
                         T4q = T4m + T4n;
1260
                         io[WS(os, 17)] = T4p - T4q;
1261
                         io[WS(os, 1)] = T4p + T4q;
1262
                         T4k = T4f - T4c;
1263
                         io[WS(os, 25)] = T4j - T4k;
1264
                         io[WS(os, 9)] = T4j + T4k;
1265
                         T4o = T4m - T4n;
1266
                         ro[WS(os, 25)] = T4l - T4o;
1267
                         ro[WS(os, 9)] = T4l + T4o;
1268
                    }
1269
               }
1270
               {
1271
                    E T3b, T3n, T3l, T3r, T3e, T3o, T3h, T3p;
1272
                    {
1273
                         E T39, T3a, T3j, T3k;
1274
                         T39 = T1z + T1G;
1275
                         T3a = T2Z + T2Y;
1276
                         T3b = T39 + T3a;
1277
                         T3n = T39 - T3a;
1278
                         T3j = T2T + T2W;
1279
                         T3k = T1O + T1V;
1280
                         T3l = T3j - T3k;
1281
                         T3r = T3j + T3k;
1282
                    }
1283
                    {
1284
                         E T3c, T3d, T3f, T3g;
1285
                         T3c = T22 + T2d;
1286
                         T3d = T2j + T2m;
1287
                         T3e = FMA(KP555570233, T3c, KP831469612 * T3d);
1288
                         T3o = FNMS(KP555570233, T3d, KP831469612 * T3c);
1289
                         T3f = T2t + T2E;
1290
                         T3g = T2K + T2N;
1291
                         T3h = FNMS(KP555570233, T3g, KP831469612 * T3f);
1292
                         T3p = FMA(KP831469612, T3g, KP555570233 * T3f);
1293
                    }
1294
                    {
1295
                         E T3i, T3s, T3m, T3q;
1296
                         T3i = T3e + T3h;
1297
                         ro[WS(os, 19)] = T3b - T3i;
1298
                         ro[WS(os, 3)] = T3b + T3i;
1299
                         T3s = T3o + T3p;
1300
                         io[WS(os, 19)] = T3r - T3s;
1301
                         io[WS(os, 3)] = T3r + T3s;
1302
                         T3m = T3h - T3e;
1303
                         io[WS(os, 27)] = T3l - T3m;
1304
                         io[WS(os, 11)] = T3l + T3m;
1305
                         T3q = T3o - T3p;
1306
                         ro[WS(os, 27)] = T3n - T3q;
1307
                         ro[WS(os, 11)] = T3n + T3q;
1308
                    }
1309
               }
1310
          }
1311
     }
1312
}
1313

    
1314
static const kdft_desc desc = { 32, "n1_32", {340, 52, 32, 0}, &GENUS, 0, 0, 0, 0 };
1315

    
1316
void X(codelet_n1_32) (planner *p) {
1317
     X(kdft_register) (p, n1_32, &desc);
1318
}
1319

    
1320
#endif