To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_20.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (19.8 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:12 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 20 -name n1_20 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 208 FP additions, 72 FP multiplications,
32
 * (or, 136 additions, 0 multiplications, 72 fused multiply/add),
33
 * 81 stack variables, 4 constants, and 80 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
42
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
43
     {
44
          INT i;
45
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(80, is), MAKE_VOLATILE_STRIDE(80, os)) {
46
               E T7, T2N, T3b, TD, TP, T1R, T2f, T1d, Tt, TA, TB, T2w, T2z, T2P, T35;
47
               E T36, T3d, TH, TI, TJ, T15, T1a, T1b, T1s, T1x, T1T, T29, T2a, T2h, T1h;
48
               E T1i, T1j, Te, Tl, Tm, T2D, T2G, T2O, T32, T33, T3c, TE, TF, TG, TU;
49
               E TZ, T10, T1D, T1I, T1S, T26, T27, T2g, T1e, T1f, T1g;
50
               {
51
                    E T3, T1N, TN, T2L, T6, TO, T1Q, T2M;
52
                    {
53
                         E T1, T2, TL, TM;
54
                         T1 = ri[0];
55
                         T2 = ri[WS(is, 10)];
56
                         T3 = T1 + T2;
57
                         T1N = T1 - T2;
58
                         TL = ii[0];
59
                         TM = ii[WS(is, 10)];
60
                         TN = TL - TM;
61
                         T2L = TL + TM;
62
                    }
63
                    {
64
                         E T4, T5, T1O, T1P;
65
                         T4 = ri[WS(is, 5)];
66
                         T5 = ri[WS(is, 15)];
67
                         T6 = T4 + T5;
68
                         TO = T4 - T5;
69
                         T1O = ii[WS(is, 5)];
70
                         T1P = ii[WS(is, 15)];
71
                         T1Q = T1O - T1P;
72
                         T2M = T1O + T1P;
73
                    }
74
                    T7 = T3 - T6;
75
                    T2N = T2L - T2M;
76
                    T3b = T2L + T2M;
77
                    TD = T3 + T6;
78
                    TP = TN - TO;
79
                    T1R = T1N - T1Q;
80
                    T2f = T1N + T1Q;
81
                    T1d = TO + TN;
82
               }
83
               {
84
                    E Tp, T1o, T13, T2u, Ts, T14, T1r, T2v, Tw, T1t, T18, T2x, Tz, T19, T1w;
85
                    E T2y;
86
                    {
87
                         E Tn, To, T11, T12;
88
                         Tn = ri[WS(is, 8)];
89
                         To = ri[WS(is, 18)];
90
                         Tp = Tn + To;
91
                         T1o = Tn - To;
92
                         T11 = ii[WS(is, 8)];
93
                         T12 = ii[WS(is, 18)];
94
                         T13 = T11 - T12;
95
                         T2u = T11 + T12;
96
                    }
97
                    {
98
                         E Tq, Tr, T1p, T1q;
99
                         Tq = ri[WS(is, 13)];
100
                         Tr = ri[WS(is, 3)];
101
                         Ts = Tq + Tr;
102
                         T14 = Tq - Tr;
103
                         T1p = ii[WS(is, 13)];
104
                         T1q = ii[WS(is, 3)];
105
                         T1r = T1p - T1q;
106
                         T2v = T1p + T1q;
107
                    }
108
                    {
109
                         E Tu, Tv, T16, T17;
110
                         Tu = ri[WS(is, 12)];
111
                         Tv = ri[WS(is, 2)];
112
                         Tw = Tu + Tv;
113
                         T1t = Tu - Tv;
114
                         T16 = ii[WS(is, 12)];
115
                         T17 = ii[WS(is, 2)];
116
                         T18 = T16 - T17;
117
                         T2x = T16 + T17;
118
                    }
119
                    {
120
                         E Tx, Ty, T1u, T1v;
121
                         Tx = ri[WS(is, 17)];
122
                         Ty = ri[WS(is, 7)];
123
                         Tz = Tx + Ty;
124
                         T19 = Tx - Ty;
125
                         T1u = ii[WS(is, 17)];
126
                         T1v = ii[WS(is, 7)];
127
                         T1w = T1u - T1v;
128
                         T2y = T1u + T1v;
129
                    }
130
                    Tt = Tp - Ts;
131
                    TA = Tw - Tz;
132
                    TB = Tt + TA;
133
                    T2w = T2u - T2v;
134
                    T2z = T2x - T2y;
135
                    T2P = T2w + T2z;
136
                    T35 = T2u + T2v;
137
                    T36 = T2x + T2y;
138
                    T3d = T35 + T36;
139
                    TH = Tp + Ts;
140
                    TI = Tw + Tz;
141
                    TJ = TH + TI;
142
                    T15 = T13 - T14;
143
                    T1a = T18 - T19;
144
                    T1b = T15 + T1a;
145
                    T1s = T1o - T1r;
146
                    T1x = T1t - T1w;
147
                    T1T = T1s + T1x;
148
                    T29 = T1o + T1r;
149
                    T2a = T1t + T1w;
150
                    T2h = T29 + T2a;
151
                    T1h = T14 + T13;
152
                    T1i = T19 + T18;
153
                    T1j = T1h + T1i;
154
               }
155
               {
156
                    E Ta, T1z, TS, T2B, Td, TT, T1C, T2C, Th, T1E, TX, T2E, Tk, TY, T1H;
157
                    E T2F;
158
                    {
159
                         E T8, T9, TQ, TR;
160
                         T8 = ri[WS(is, 4)];
161
                         T9 = ri[WS(is, 14)];
162
                         Ta = T8 + T9;
163
                         T1z = T8 - T9;
164
                         TQ = ii[WS(is, 4)];
165
                         TR = ii[WS(is, 14)];
166
                         TS = TQ - TR;
167
                         T2B = TQ + TR;
168
                    }
169
                    {
170
                         E Tb, Tc, T1A, T1B;
171
                         Tb = ri[WS(is, 9)];
172
                         Tc = ri[WS(is, 19)];
173
                         Td = Tb + Tc;
174
                         TT = Tb - Tc;
175
                         T1A = ii[WS(is, 9)];
176
                         T1B = ii[WS(is, 19)];
177
                         T1C = T1A - T1B;
178
                         T2C = T1A + T1B;
179
                    }
180
                    {
181
                         E Tf, Tg, TV, TW;
182
                         Tf = ri[WS(is, 16)];
183
                         Tg = ri[WS(is, 6)];
184
                         Th = Tf + Tg;
185
                         T1E = Tf - Tg;
186
                         TV = ii[WS(is, 16)];
187
                         TW = ii[WS(is, 6)];
188
                         TX = TV - TW;
189
                         T2E = TV + TW;
190
                    }
191
                    {
192
                         E Ti, Tj, T1F, T1G;
193
                         Ti = ri[WS(is, 1)];
194
                         Tj = ri[WS(is, 11)];
195
                         Tk = Ti + Tj;
196
                         TY = Ti - Tj;
197
                         T1F = ii[WS(is, 1)];
198
                         T1G = ii[WS(is, 11)];
199
                         T1H = T1F - T1G;
200
                         T2F = T1F + T1G;
201
                    }
202
                    Te = Ta - Td;
203
                    Tl = Th - Tk;
204
                    Tm = Te + Tl;
205
                    T2D = T2B - T2C;
206
                    T2G = T2E - T2F;
207
                    T2O = T2D + T2G;
208
                    T32 = T2B + T2C;
209
                    T33 = T2E + T2F;
210
                    T3c = T32 + T33;
211
                    TE = Ta + Td;
212
                    TF = Th + Tk;
213
                    TG = TE + TF;
214
                    TU = TS - TT;
215
                    TZ = TX - TY;
216
                    T10 = TU + TZ;
217
                    T1D = T1z - T1C;
218
                    T1I = T1E - T1H;
219
                    T1S = T1D + T1I;
220
                    T26 = T1z + T1C;
221
                    T27 = T1E + T1H;
222
                    T2g = T26 + T27;
223
                    T1e = TT + TS;
224
                    T1f = TY + TX;
225
                    T1g = T1e + T1f;
226
               }
227
               {
228
                    E T2s, TC, T2r, T2I, T2K, T2A, T2H, T2J, T2t;
229
                    T2s = Tm - TB;
230
                    TC = Tm + TB;
231
                    T2r = FNMS(KP250000000, TC, T7);
232
                    T2A = T2w - T2z;
233
                    T2H = T2D - T2G;
234
                    T2I = FNMS(KP618033988, T2H, T2A);
235
                    T2K = FMA(KP618033988, T2A, T2H);
236
                    ro[WS(os, 10)] = T7 + TC;
237
                    T2J = FMA(KP559016994, T2s, T2r);
238
                    ro[WS(os, 14)] = FNMS(KP951056516, T2K, T2J);
239
                    ro[WS(os, 6)] = FMA(KP951056516, T2K, T2J);
240
                    T2t = FNMS(KP559016994, T2s, T2r);
241
                    ro[WS(os, 2)] = FNMS(KP951056516, T2I, T2t);
242
                    ro[WS(os, 18)] = FMA(KP951056516, T2I, T2t);
243
               }
244
               {
245
                    E T2S, T2Q, T2R, T2W, T2Y, T2U, T2V, T2X, T2T;
246
                    T2S = T2O - T2P;
247
                    T2Q = T2O + T2P;
248
                    T2R = FNMS(KP250000000, T2Q, T2N);
249
                    T2U = Tt - TA;
250
                    T2V = Te - Tl;
251
                    T2W = FNMS(KP618033988, T2V, T2U);
252
                    T2Y = FMA(KP618033988, T2U, T2V);
253
                    io[WS(os, 10)] = T2N + T2Q;
254
                    T2X = FMA(KP559016994, T2S, T2R);
255
                    io[WS(os, 6)] = FNMS(KP951056516, T2Y, T2X);
256
                    io[WS(os, 14)] = FMA(KP951056516, T2Y, T2X);
257
                    T2T = FNMS(KP559016994, T2S, T2R);
258
                    io[WS(os, 2)] = FMA(KP951056516, T2W, T2T);
259
                    io[WS(os, 18)] = FNMS(KP951056516, T2W, T2T);
260
               }
261
               {
262
                    E T30, TK, T2Z, T38, T3a, T34, T37, T39, T31;
263
                    T30 = TG - TJ;
264
                    TK = TG + TJ;
265
                    T2Z = FNMS(KP250000000, TK, TD);
266
                    T34 = T32 - T33;
267
                    T37 = T35 - T36;
268
                    T38 = FMA(KP618033988, T37, T34);
269
                    T3a = FNMS(KP618033988, T34, T37);
270
                    ro[0] = TD + TK;
271
                    T39 = FNMS(KP559016994, T30, T2Z);
272
                    ro[WS(os, 12)] = FNMS(KP951056516, T3a, T39);
273
                    ro[WS(os, 8)] = FMA(KP951056516, T3a, T39);
274
                    T31 = FMA(KP559016994, T30, T2Z);
275
                    ro[WS(os, 4)] = FNMS(KP951056516, T38, T31);
276
                    ro[WS(os, 16)] = FMA(KP951056516, T38, T31);
277
               }
278
               {
279
                    E T3g, T3e, T3f, T3k, T3m, T3i, T3j, T3l, T3h;
280
                    T3g = T3c - T3d;
281
                    T3e = T3c + T3d;
282
                    T3f = FNMS(KP250000000, T3e, T3b);
283
                    T3i = TE - TF;
284
                    T3j = TH - TI;
285
                    T3k = FMA(KP618033988, T3j, T3i);
286
                    T3m = FNMS(KP618033988, T3i, T3j);
287
                    io[0] = T3b + T3e;
288
                    T3l = FNMS(KP559016994, T3g, T3f);
289
                    io[WS(os, 8)] = FNMS(KP951056516, T3m, T3l);
290
                    io[WS(os, 12)] = FMA(KP951056516, T3m, T3l);
291
                    T3h = FMA(KP559016994, T3g, T3f);
292
                    io[WS(os, 4)] = FMA(KP951056516, T3k, T3h);
293
                    io[WS(os, 16)] = FNMS(KP951056516, T3k, T3h);
294
               }
295
               {
296
                    E T24, T1c, T23, T2c, T2e, T28, T2b, T2d, T25;
297
                    T24 = T10 - T1b;
298
                    T1c = T10 + T1b;
299
                    T23 = FNMS(KP250000000, T1c, TP);
300
                    T28 = T26 - T27;
301
                    T2b = T29 - T2a;
302
                    T2c = FMA(KP618033988, T2b, T28);
303
                    T2e = FNMS(KP618033988, T28, T2b);
304
                    io[WS(os, 5)] = TP + T1c;
305
                    T2d = FNMS(KP559016994, T24, T23);
306
                    io[WS(os, 13)] = FNMS(KP951056516, T2e, T2d);
307
                    io[WS(os, 17)] = FMA(KP951056516, T2e, T2d);
308
                    T25 = FMA(KP559016994, T24, T23);
309
                    io[WS(os, 1)] = FNMS(KP951056516, T2c, T25);
310
                    io[WS(os, 9)] = FMA(KP951056516, T2c, T25);
311
               }
312
               {
313
                    E T2k, T2i, T2j, T2o, T2q, T2m, T2n, T2p, T2l;
314
                    T2k = T2g - T2h;
315
                    T2i = T2g + T2h;
316
                    T2j = FNMS(KP250000000, T2i, T2f);
317
                    T2m = TU - TZ;
318
                    T2n = T15 - T1a;
319
                    T2o = FMA(KP618033988, T2n, T2m);
320
                    T2q = FNMS(KP618033988, T2m, T2n);
321
                    ro[WS(os, 5)] = T2f + T2i;
322
                    T2p = FNMS(KP559016994, T2k, T2j);
323
                    ro[WS(os, 13)] = FMA(KP951056516, T2q, T2p);
324
                    ro[WS(os, 17)] = FNMS(KP951056516, T2q, T2p);
325
                    T2l = FMA(KP559016994, T2k, T2j);
326
                    ro[WS(os, 1)] = FMA(KP951056516, T2o, T2l);
327
                    ro[WS(os, 9)] = FNMS(KP951056516, T2o, T2l);
328
               }
329
               {
330
                    E T1m, T1k, T1l, T1K, T1M, T1y, T1J, T1L, T1n;
331
                    T1m = T1g - T1j;
332
                    T1k = T1g + T1j;
333
                    T1l = FNMS(KP250000000, T1k, T1d);
334
                    T1y = T1s - T1x;
335
                    T1J = T1D - T1I;
336
                    T1K = FNMS(KP618033988, T1J, T1y);
337
                    T1M = FMA(KP618033988, T1y, T1J);
338
                    io[WS(os, 15)] = T1d + T1k;
339
                    T1L = FMA(KP559016994, T1m, T1l);
340
                    io[WS(os, 11)] = FNMS(KP951056516, T1M, T1L);
341
                    io[WS(os, 19)] = FMA(KP951056516, T1M, T1L);
342
                    T1n = FNMS(KP559016994, T1m, T1l);
343
                    io[WS(os, 3)] = FNMS(KP951056516, T1K, T1n);
344
                    io[WS(os, 7)] = FMA(KP951056516, T1K, T1n);
345
               }
346
               {
347
                    E T1W, T1U, T1V, T20, T22, T1Y, T1Z, T21, T1X;
348
                    T1W = T1S - T1T;
349
                    T1U = T1S + T1T;
350
                    T1V = FNMS(KP250000000, T1U, T1R);
351
                    T1Y = T1h - T1i;
352
                    T1Z = T1e - T1f;
353
                    T20 = FNMS(KP618033988, T1Z, T1Y);
354
                    T22 = FMA(KP618033988, T1Y, T1Z);
355
                    ro[WS(os, 15)] = T1R + T1U;
356
                    T21 = FMA(KP559016994, T1W, T1V);
357
                    ro[WS(os, 11)] = FMA(KP951056516, T22, T21);
358
                    ro[WS(os, 19)] = FNMS(KP951056516, T22, T21);
359
                    T1X = FNMS(KP559016994, T1W, T1V);
360
                    ro[WS(os, 3)] = FMA(KP951056516, T20, T1X);
361
                    ro[WS(os, 7)] = FNMS(KP951056516, T20, T1X);
362
               }
363
          }
364
     }
365
}
366

    
367
static const kdft_desc desc = { 20, "n1_20", {136, 0, 72, 0}, &GENUS, 0, 0, 0, 0 };
368

    
369
void X(codelet_n1_20) (planner *p) {
370
     X(kdft_register) (p, n1_20, &desc);
371
}
372

    
373
#else
374

    
375
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 20 -name n1_20 -include dft/scalar/n.h */
376

    
377
/*
378
 * This function contains 208 FP additions, 48 FP multiplications,
379
 * (or, 184 additions, 24 multiplications, 24 fused multiply/add),
380
 * 81 stack variables, 4 constants, and 80 memory accesses
381
 */
382
#include "dft/scalar/n.h"
383

    
384
static void n1_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
385
{
386
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
387
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
388
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
389
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
390
     {
391
          INT i;
392
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(80, is), MAKE_VOLATILE_STRIDE(80, os)) {
393
               E T7, T2Q, T3h, TD, TP, T1U, T2l, T1d, Tt, TA, TB, T2w, T2z, T2S, T35;
394
               E T36, T3f, TH, TI, TJ, T15, T1a, T1b, T1s, T1x, T1W, T29, T2a, T2j, T1h;
395
               E T1i, T1j, Te, Tl, Tm, T2D, T2G, T2R, T32, T33, T3e, TE, TF, TG, TU;
396
               E TZ, T10, T1D, T1I, T1V, T26, T27, T2i, T1e, T1f, T1g;
397
               {
398
                    E T3, T1Q, TN, T2O, T6, TO, T1T, T2P;
399
                    {
400
                         E T1, T2, TL, TM;
401
                         T1 = ri[0];
402
                         T2 = ri[WS(is, 10)];
403
                         T3 = T1 + T2;
404
                         T1Q = T1 - T2;
405
                         TL = ii[0];
406
                         TM = ii[WS(is, 10)];
407
                         TN = TL - TM;
408
                         T2O = TL + TM;
409
                    }
410
                    {
411
                         E T4, T5, T1R, T1S;
412
                         T4 = ri[WS(is, 5)];
413
                         T5 = ri[WS(is, 15)];
414
                         T6 = T4 + T5;
415
                         TO = T4 - T5;
416
                         T1R = ii[WS(is, 5)];
417
                         T1S = ii[WS(is, 15)];
418
                         T1T = T1R - T1S;
419
                         T2P = T1R + T1S;
420
                    }
421
                    T7 = T3 - T6;
422
                    T2Q = T2O - T2P;
423
                    T3h = T2O + T2P;
424
                    TD = T3 + T6;
425
                    TP = TN - TO;
426
                    T1U = T1Q - T1T;
427
                    T2l = T1Q + T1T;
428
                    T1d = TO + TN;
429
               }
430
               {
431
                    E Tp, T1o, T13, T2u, Ts, T14, T1r, T2v, Tw, T1t, T18, T2x, Tz, T19, T1w;
432
                    E T2y;
433
                    {
434
                         E Tn, To, T11, T12;
435
                         Tn = ri[WS(is, 8)];
436
                         To = ri[WS(is, 18)];
437
                         Tp = Tn + To;
438
                         T1o = Tn - To;
439
                         T11 = ii[WS(is, 8)];
440
                         T12 = ii[WS(is, 18)];
441
                         T13 = T11 - T12;
442
                         T2u = T11 + T12;
443
                    }
444
                    {
445
                         E Tq, Tr, T1p, T1q;
446
                         Tq = ri[WS(is, 13)];
447
                         Tr = ri[WS(is, 3)];
448
                         Ts = Tq + Tr;
449
                         T14 = Tq - Tr;
450
                         T1p = ii[WS(is, 13)];
451
                         T1q = ii[WS(is, 3)];
452
                         T1r = T1p - T1q;
453
                         T2v = T1p + T1q;
454
                    }
455
                    {
456
                         E Tu, Tv, T16, T17;
457
                         Tu = ri[WS(is, 12)];
458
                         Tv = ri[WS(is, 2)];
459
                         Tw = Tu + Tv;
460
                         T1t = Tu - Tv;
461
                         T16 = ii[WS(is, 12)];
462
                         T17 = ii[WS(is, 2)];
463
                         T18 = T16 - T17;
464
                         T2x = T16 + T17;
465
                    }
466
                    {
467
                         E Tx, Ty, T1u, T1v;
468
                         Tx = ri[WS(is, 17)];
469
                         Ty = ri[WS(is, 7)];
470
                         Tz = Tx + Ty;
471
                         T19 = Tx - Ty;
472
                         T1u = ii[WS(is, 17)];
473
                         T1v = ii[WS(is, 7)];
474
                         T1w = T1u - T1v;
475
                         T2y = T1u + T1v;
476
                    }
477
                    Tt = Tp - Ts;
478
                    TA = Tw - Tz;
479
                    TB = Tt + TA;
480
                    T2w = T2u - T2v;
481
                    T2z = T2x - T2y;
482
                    T2S = T2w + T2z;
483
                    T35 = T2u + T2v;
484
                    T36 = T2x + T2y;
485
                    T3f = T35 + T36;
486
                    TH = Tp + Ts;
487
                    TI = Tw + Tz;
488
                    TJ = TH + TI;
489
                    T15 = T13 - T14;
490
                    T1a = T18 - T19;
491
                    T1b = T15 + T1a;
492
                    T1s = T1o - T1r;
493
                    T1x = T1t - T1w;
494
                    T1W = T1s + T1x;
495
                    T29 = T1o + T1r;
496
                    T2a = T1t + T1w;
497
                    T2j = T29 + T2a;
498
                    T1h = T14 + T13;
499
                    T1i = T19 + T18;
500
                    T1j = T1h + T1i;
501
               }
502
               {
503
                    E Ta, T1z, TS, T2B, Td, TT, T1C, T2C, Th, T1E, TX, T2E, Tk, TY, T1H;
504
                    E T2F;
505
                    {
506
                         E T8, T9, TQ, TR;
507
                         T8 = ri[WS(is, 4)];
508
                         T9 = ri[WS(is, 14)];
509
                         Ta = T8 + T9;
510
                         T1z = T8 - T9;
511
                         TQ = ii[WS(is, 4)];
512
                         TR = ii[WS(is, 14)];
513
                         TS = TQ - TR;
514
                         T2B = TQ + TR;
515
                    }
516
                    {
517
                         E Tb, Tc, T1A, T1B;
518
                         Tb = ri[WS(is, 9)];
519
                         Tc = ri[WS(is, 19)];
520
                         Td = Tb + Tc;
521
                         TT = Tb - Tc;
522
                         T1A = ii[WS(is, 9)];
523
                         T1B = ii[WS(is, 19)];
524
                         T1C = T1A - T1B;
525
                         T2C = T1A + T1B;
526
                    }
527
                    {
528
                         E Tf, Tg, TV, TW;
529
                         Tf = ri[WS(is, 16)];
530
                         Tg = ri[WS(is, 6)];
531
                         Th = Tf + Tg;
532
                         T1E = Tf - Tg;
533
                         TV = ii[WS(is, 16)];
534
                         TW = ii[WS(is, 6)];
535
                         TX = TV - TW;
536
                         T2E = TV + TW;
537
                    }
538
                    {
539
                         E Ti, Tj, T1F, T1G;
540
                         Ti = ri[WS(is, 1)];
541
                         Tj = ri[WS(is, 11)];
542
                         Tk = Ti + Tj;
543
                         TY = Ti - Tj;
544
                         T1F = ii[WS(is, 1)];
545
                         T1G = ii[WS(is, 11)];
546
                         T1H = T1F - T1G;
547
                         T2F = T1F + T1G;
548
                    }
549
                    Te = Ta - Td;
550
                    Tl = Th - Tk;
551
                    Tm = Te + Tl;
552
                    T2D = T2B - T2C;
553
                    T2G = T2E - T2F;
554
                    T2R = T2D + T2G;
555
                    T32 = T2B + T2C;
556
                    T33 = T2E + T2F;
557
                    T3e = T32 + T33;
558
                    TE = Ta + Td;
559
                    TF = Th + Tk;
560
                    TG = TE + TF;
561
                    TU = TS - TT;
562
                    TZ = TX - TY;
563
                    T10 = TU + TZ;
564
                    T1D = T1z - T1C;
565
                    T1I = T1E - T1H;
566
                    T1V = T1D + T1I;
567
                    T26 = T1z + T1C;
568
                    T27 = T1E + T1H;
569
                    T2i = T26 + T27;
570
                    T1e = TT + TS;
571
                    T1f = TY + TX;
572
                    T1g = T1e + T1f;
573
               }
574
               {
575
                    E T2s, TC, T2r, T2I, T2K, T2A, T2H, T2J, T2t;
576
                    T2s = KP559016994 * (Tm - TB);
577
                    TC = Tm + TB;
578
                    T2r = FNMS(KP250000000, TC, T7);
579
                    T2A = T2w - T2z;
580
                    T2H = T2D - T2G;
581
                    T2I = FNMS(KP587785252, T2H, KP951056516 * T2A);
582
                    T2K = FMA(KP951056516, T2H, KP587785252 * T2A);
583
                    ro[WS(os, 10)] = T7 + TC;
584
                    T2J = T2s + T2r;
585
                    ro[WS(os, 14)] = T2J - T2K;
586
                    ro[WS(os, 6)] = T2J + T2K;
587
                    T2t = T2r - T2s;
588
                    ro[WS(os, 2)] = T2t - T2I;
589
                    ro[WS(os, 18)] = T2t + T2I;
590
               }
591
               {
592
                    E T2V, T2T, T2U, T2N, T2Y, T2L, T2M, T2X, T2W;
593
                    T2V = KP559016994 * (T2R - T2S);
594
                    T2T = T2R + T2S;
595
                    T2U = FNMS(KP250000000, T2T, T2Q);
596
                    T2L = Tt - TA;
597
                    T2M = Te - Tl;
598
                    T2N = FNMS(KP587785252, T2M, KP951056516 * T2L);
599
                    T2Y = FMA(KP951056516, T2M, KP587785252 * T2L);
600
                    io[WS(os, 10)] = T2Q + T2T;
601
                    T2X = T2V + T2U;
602
                    io[WS(os, 6)] = T2X - T2Y;
603
                    io[WS(os, 14)] = T2Y + T2X;
604
                    T2W = T2U - T2V;
605
                    io[WS(os, 2)] = T2N + T2W;
606
                    io[WS(os, 18)] = T2W - T2N;
607
               }
608
               {
609
                    E T2Z, TK, T30, T38, T3a, T34, T37, T39, T31;
610
                    T2Z = KP559016994 * (TG - TJ);
611
                    TK = TG + TJ;
612
                    T30 = FNMS(KP250000000, TK, TD);
613
                    T34 = T32 - T33;
614
                    T37 = T35 - T36;
615
                    T38 = FMA(KP951056516, T34, KP587785252 * T37);
616
                    T3a = FNMS(KP587785252, T34, KP951056516 * T37);
617
                    ro[0] = TD + TK;
618
                    T39 = T30 - T2Z;
619
                    ro[WS(os, 12)] = T39 - T3a;
620
                    ro[WS(os, 8)] = T39 + T3a;
621
                    T31 = T2Z + T30;
622
                    ro[WS(os, 4)] = T31 - T38;
623
                    ro[WS(os, 16)] = T31 + T38;
624
               }
625
               {
626
                    E T3g, T3i, T3j, T3d, T3m, T3b, T3c, T3l, T3k;
627
                    T3g = KP559016994 * (T3e - T3f);
628
                    T3i = T3e + T3f;
629
                    T3j = FNMS(KP250000000, T3i, T3h);
630
                    T3b = TE - TF;
631
                    T3c = TH - TI;
632
                    T3d = FMA(KP951056516, T3b, KP587785252 * T3c);
633
                    T3m = FNMS(KP587785252, T3b, KP951056516 * T3c);
634
                    io[0] = T3h + T3i;
635
                    T3l = T3j - T3g;
636
                    io[WS(os, 8)] = T3l - T3m;
637
                    io[WS(os, 12)] = T3m + T3l;
638
                    T3k = T3g + T3j;
639
                    io[WS(os, 4)] = T3d + T3k;
640
                    io[WS(os, 16)] = T3k - T3d;
641
               }
642
               {
643
                    E T23, T1c, T24, T2c, T2e, T28, T2b, T2d, T25;
644
                    T23 = KP559016994 * (T10 - T1b);
645
                    T1c = T10 + T1b;
646
                    T24 = FNMS(KP250000000, T1c, TP);
647
                    T28 = T26 - T27;
648
                    T2b = T29 - T2a;
649
                    T2c = FMA(KP951056516, T28, KP587785252 * T2b);
650
                    T2e = FNMS(KP587785252, T28, KP951056516 * T2b);
651
                    io[WS(os, 5)] = TP + T1c;
652
                    T2d = T24 - T23;
653
                    io[WS(os, 13)] = T2d - T2e;
654
                    io[WS(os, 17)] = T2d + T2e;
655
                    T25 = T23 + T24;
656
                    io[WS(os, 1)] = T25 - T2c;
657
                    io[WS(os, 9)] = T25 + T2c;
658
               }
659
               {
660
                    E T2k, T2m, T2n, T2h, T2p, T2f, T2g, T2q, T2o;
661
                    T2k = KP559016994 * (T2i - T2j);
662
                    T2m = T2i + T2j;
663
                    T2n = FNMS(KP250000000, T2m, T2l);
664
                    T2f = TU - TZ;
665
                    T2g = T15 - T1a;
666
                    T2h = FMA(KP951056516, T2f, KP587785252 * T2g);
667
                    T2p = FNMS(KP587785252, T2f, KP951056516 * T2g);
668
                    ro[WS(os, 5)] = T2l + T2m;
669
                    T2q = T2n - T2k;
670
                    ro[WS(os, 13)] = T2p + T2q;
671
                    ro[WS(os, 17)] = T2q - T2p;
672
                    T2o = T2k + T2n;
673
                    ro[WS(os, 1)] = T2h + T2o;
674
                    ro[WS(os, 9)] = T2o - T2h;
675
               }
676
               {
677
                    E T1m, T1k, T1l, T1K, T1M, T1y, T1J, T1L, T1n;
678
                    T1m = KP559016994 * (T1g - T1j);
679
                    T1k = T1g + T1j;
680
                    T1l = FNMS(KP250000000, T1k, T1d);
681
                    T1y = T1s - T1x;
682
                    T1J = T1D - T1I;
683
                    T1K = FNMS(KP587785252, T1J, KP951056516 * T1y);
684
                    T1M = FMA(KP951056516, T1J, KP587785252 * T1y);
685
                    io[WS(os, 15)] = T1d + T1k;
686
                    T1L = T1m + T1l;
687
                    io[WS(os, 11)] = T1L - T1M;
688
                    io[WS(os, 19)] = T1L + T1M;
689
                    T1n = T1l - T1m;
690
                    io[WS(os, 3)] = T1n - T1K;
691
                    io[WS(os, 7)] = T1n + T1K;
692
               }
693
               {
694
                    E T1Z, T1X, T1Y, T1P, T21, T1N, T1O, T22, T20;
695
                    T1Z = KP559016994 * (T1V - T1W);
696
                    T1X = T1V + T1W;
697
                    T1Y = FNMS(KP250000000, T1X, T1U);
698
                    T1N = T1h - T1i;
699
                    T1O = T1e - T1f;
700
                    T1P = FNMS(KP587785252, T1O, KP951056516 * T1N);
701
                    T21 = FMA(KP951056516, T1O, KP587785252 * T1N);
702
                    ro[WS(os, 15)] = T1U + T1X;
703
                    T22 = T1Z + T1Y;
704
                    ro[WS(os, 11)] = T21 + T22;
705
                    ro[WS(os, 19)] = T22 - T21;
706
                    T20 = T1Y - T1Z;
707
                    ro[WS(os, 3)] = T1P + T20;
708
                    ro[WS(os, 7)] = T20 - T1P;
709
               }
710
          }
711
     }
712
}
713

    
714
static const kdft_desc desc = { 20, "n1_20", {184, 24, 24, 0}, &GENUS, 0, 0, 0, 0 };
715

    
716
void X(codelet_n1_20) (planner *p) {
717
     X(kdft_register) (p, n1_20, &desc);
718
}
719

    
720
#endif