To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_15.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (21.3 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:14 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 15 -name t1_15 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 184 FP additions, 140 FP multiplications,
32
 * (or, 72 additions, 28 multiplications, 112 fused multiply/add),
33
 * 51 stack variables, 6 constants, and 60 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
42
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
43
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
44
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
45
     {
46
          INT m;
47
          for (m = mb, W = W + (mb * 28); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 28, MAKE_VOLATILE_STRIDE(30, rs)) {
48
               E T1, T3j, T1G, T3u, Te, T1B, T3i, T3t, T1y, T2i, T2a, T2M, T37, T2V, Tz;
49
               E T2e, T1O, T2t, T39, T2X, TT, T2f, T1V, T2z, T3a, T2Y, T1e, T2h, T23, T2G;
50
               E T36, T2U;
51
               {
52
                    E T7, T1D, Td, T1F;
53
                    T1 = ri[0];
54
                    T3j = ii[0];
55
                    {
56
                         E T3, T6, T4, T1C, T2, T5;
57
                         T3 = ri[WS(rs, 5)];
58
                         T6 = ii[WS(rs, 5)];
59
                         T2 = W[8];
60
                         T4 = T2 * T3;
61
                         T1C = T2 * T6;
62
                         T5 = W[9];
63
                         T7 = FMA(T5, T6, T4);
64
                         T1D = FNMS(T5, T3, T1C);
65
                    }
66
                    {
67
                         E T9, Tc, Ta, T1E, T8, Tb;
68
                         T9 = ri[WS(rs, 10)];
69
                         Tc = ii[WS(rs, 10)];
70
                         T8 = W[18];
71
                         Ta = T8 * T9;
72
                         T1E = T8 * Tc;
73
                         Tb = W[19];
74
                         Td = FMA(Tb, Tc, Ta);
75
                         T1F = FNMS(Tb, T9, T1E);
76
                    }
77
                    T1G = T1D - T1F;
78
                    T3u = Td - T7;
79
                    Te = T7 + Td;
80
                    T1B = FNMS(KP500000000, Te, T1);
81
                    T3i = T1D + T1F;
82
                    T3t = FNMS(KP500000000, T3i, T3j);
83
               }
84
               {
85
                    E T1k, T2I, T1w, T28, T1q, T26;
86
                    {
87
                         E T1g, T1j, T1h, T2H, T1f, T1i;
88
                         T1g = ri[WS(rs, 9)];
89
                         T1j = ii[WS(rs, 9)];
90
                         T1f = W[16];
91
                         T1h = T1f * T1g;
92
                         T2H = T1f * T1j;
93
                         T1i = W[17];
94
                         T1k = FMA(T1i, T1j, T1h);
95
                         T2I = FNMS(T1i, T1g, T2H);
96
                    }
97
                    {
98
                         E T1s, T1v, T1t, T27, T1r, T1u;
99
                         T1s = ri[WS(rs, 4)];
100
                         T1v = ii[WS(rs, 4)];
101
                         T1r = W[6];
102
                         T1t = T1r * T1s;
103
                         T27 = T1r * T1v;
104
                         T1u = W[7];
105
                         T1w = FMA(T1u, T1v, T1t);
106
                         T28 = FNMS(T1u, T1s, T27);
107
                    }
108
                    {
109
                         E T1m, T1p, T1n, T25, T1l, T1o;
110
                         T1m = ri[WS(rs, 14)];
111
                         T1p = ii[WS(rs, 14)];
112
                         T1l = W[26];
113
                         T1n = T1l * T1m;
114
                         T25 = T1l * T1p;
115
                         T1o = W[27];
116
                         T1q = FMA(T1o, T1p, T1n);
117
                         T26 = FNMS(T1o, T1m, T25);
118
                    }
119
                    {
120
                         E T29, T1x, T24, T2L, T2J, T2K;
121
                         T29 = T26 - T28;
122
                         T1x = T1q + T1w;
123
                         T24 = FNMS(KP500000000, T1x, T1k);
124
                         T1y = T1k + T1x;
125
                         T2i = FMA(KP866025403, T29, T24);
126
                         T2a = FNMS(KP866025403, T29, T24);
127
                         T2L = T1w - T1q;
128
                         T2J = T26 + T28;
129
                         T2K = FNMS(KP500000000, T2J, T2I);
130
                         T2M = FMA(KP866025403, T2L, T2K);
131
                         T37 = T2I + T2J;
132
                         T2V = FNMS(KP866025403, T2L, T2K);
133
                    }
134
               }
135
               {
136
                    E Tl, T2p, Tx, T1M, Tr, T1K;
137
                    {
138
                         E Th, Tk, Ti, T2o, Tg, Tj;
139
                         Th = ri[WS(rs, 3)];
140
                         Tk = ii[WS(rs, 3)];
141
                         Tg = W[4];
142
                         Ti = Tg * Th;
143
                         T2o = Tg * Tk;
144
                         Tj = W[5];
145
                         Tl = FMA(Tj, Tk, Ti);
146
                         T2p = FNMS(Tj, Th, T2o);
147
                    }
148
                    {
149
                         E Tt, Tw, Tu, T1L, Ts, Tv;
150
                         Tt = ri[WS(rs, 13)];
151
                         Tw = ii[WS(rs, 13)];
152
                         Ts = W[24];
153
                         Tu = Ts * Tt;
154
                         T1L = Ts * Tw;
155
                         Tv = W[25];
156
                         Tx = FMA(Tv, Tw, Tu);
157
                         T1M = FNMS(Tv, Tt, T1L);
158
                    }
159
                    {
160
                         E Tn, Tq, To, T1J, Tm, Tp;
161
                         Tn = ri[WS(rs, 8)];
162
                         Tq = ii[WS(rs, 8)];
163
                         Tm = W[14];
164
                         To = Tm * Tn;
165
                         T1J = Tm * Tq;
166
                         Tp = W[15];
167
                         Tr = FMA(Tp, Tq, To);
168
                         T1K = FNMS(Tp, Tn, T1J);
169
                    }
170
                    {
171
                         E T1N, Ty, T1I, T2s, T2q, T2r;
172
                         T1N = T1K - T1M;
173
                         Ty = Tr + Tx;
174
                         T1I = FNMS(KP500000000, Ty, Tl);
175
                         Tz = Tl + Ty;
176
                         T2e = FMA(KP866025403, T1N, T1I);
177
                         T1O = FNMS(KP866025403, T1N, T1I);
178
                         T2s = Tx - Tr;
179
                         T2q = T1K + T1M;
180
                         T2r = FNMS(KP500000000, T2q, T2p);
181
                         T2t = FMA(KP866025403, T2s, T2r);
182
                         T39 = T2p + T2q;
183
                         T2X = FNMS(KP866025403, T2s, T2r);
184
                    }
185
               }
186
               {
187
                    E TF, T2v, TR, T1T, TL, T1R;
188
                    {
189
                         E TB, TE, TC, T2u, TA, TD;
190
                         TB = ri[WS(rs, 12)];
191
                         TE = ii[WS(rs, 12)];
192
                         TA = W[22];
193
                         TC = TA * TB;
194
                         T2u = TA * TE;
195
                         TD = W[23];
196
                         TF = FMA(TD, TE, TC);
197
                         T2v = FNMS(TD, TB, T2u);
198
                    }
199
                    {
200
                         E TN, TQ, TO, T1S, TM, TP;
201
                         TN = ri[WS(rs, 7)];
202
                         TQ = ii[WS(rs, 7)];
203
                         TM = W[12];
204
                         TO = TM * TN;
205
                         T1S = TM * TQ;
206
                         TP = W[13];
207
                         TR = FMA(TP, TQ, TO);
208
                         T1T = FNMS(TP, TN, T1S);
209
                    }
210
                    {
211
                         E TH, TK, TI, T1Q, TG, TJ;
212
                         TH = ri[WS(rs, 2)];
213
                         TK = ii[WS(rs, 2)];
214
                         TG = W[2];
215
                         TI = TG * TH;
216
                         T1Q = TG * TK;
217
                         TJ = W[3];
218
                         TL = FMA(TJ, TK, TI);
219
                         T1R = FNMS(TJ, TH, T1Q);
220
                    }
221
                    {
222
                         E T1U, TS, T1P, T2y, T2w, T2x;
223
                         T1U = T1R - T1T;
224
                         TS = TL + TR;
225
                         T1P = FNMS(KP500000000, TS, TF);
226
                         TT = TF + TS;
227
                         T2f = FMA(KP866025403, T1U, T1P);
228
                         T1V = FNMS(KP866025403, T1U, T1P);
229
                         T2y = TR - TL;
230
                         T2w = T1R + T1T;
231
                         T2x = FNMS(KP500000000, T2w, T2v);
232
                         T2z = FMA(KP866025403, T2y, T2x);
233
                         T3a = T2v + T2w;
234
                         T2Y = FNMS(KP866025403, T2y, T2x);
235
                    }
236
               }
237
               {
238
                    E T10, T2C, T1c, T21, T16, T1Z;
239
                    {
240
                         E TW, TZ, TX, T2B, TV, TY;
241
                         TW = ri[WS(rs, 6)];
242
                         TZ = ii[WS(rs, 6)];
243
                         TV = W[10];
244
                         TX = TV * TW;
245
                         T2B = TV * TZ;
246
                         TY = W[11];
247
                         T10 = FMA(TY, TZ, TX);
248
                         T2C = FNMS(TY, TW, T2B);
249
                    }
250
                    {
251
                         E T18, T1b, T19, T20, T17, T1a;
252
                         T18 = ri[WS(rs, 1)];
253
                         T1b = ii[WS(rs, 1)];
254
                         T17 = W[0];
255
                         T19 = T17 * T18;
256
                         T20 = T17 * T1b;
257
                         T1a = W[1];
258
                         T1c = FMA(T1a, T1b, T19);
259
                         T21 = FNMS(T1a, T18, T20);
260
                    }
261
                    {
262
                         E T12, T15, T13, T1Y, T11, T14;
263
                         T12 = ri[WS(rs, 11)];
264
                         T15 = ii[WS(rs, 11)];
265
                         T11 = W[20];
266
                         T13 = T11 * T12;
267
                         T1Y = T11 * T15;
268
                         T14 = W[21];
269
                         T16 = FMA(T14, T15, T13);
270
                         T1Z = FNMS(T14, T12, T1Y);
271
                    }
272
                    {
273
                         E T22, T1d, T1X, T2F, T2D, T2E;
274
                         T22 = T1Z - T21;
275
                         T1d = T16 + T1c;
276
                         T1X = FNMS(KP500000000, T1d, T10);
277
                         T1e = T10 + T1d;
278
                         T2h = FMA(KP866025403, T22, T1X);
279
                         T23 = FNMS(KP866025403, T22, T1X);
280
                         T2F = T1c - T16;
281
                         T2D = T1Z + T21;
282
                         T2E = FNMS(KP500000000, T2D, T2C);
283
                         T2G = FMA(KP866025403, T2F, T2E);
284
                         T36 = T2C + T2D;
285
                         T2U = FNMS(KP866025403, T2F, T2E);
286
                    }
287
               }
288
               {
289
                    E T3c, T3e, Tf, T1A, T33, T34, T3d, T35;
290
                    {
291
                         E T38, T3b, TU, T1z;
292
                         T38 = T36 - T37;
293
                         T3b = T39 - T3a;
294
                         T3c = FNMS(KP618033988, T3b, T38);
295
                         T3e = FMA(KP618033988, T38, T3b);
296
                         Tf = T1 + Te;
297
                         TU = Tz + TT;
298
                         T1z = T1e + T1y;
299
                         T1A = TU + T1z;
300
                         T33 = FNMS(KP250000000, T1A, Tf);
301
                         T34 = TU - T1z;
302
                    }
303
                    ri[0] = Tf + T1A;
304
                    T3d = FMA(KP559016994, T34, T33);
305
                    ri[WS(rs, 9)] = FNMS(KP951056516, T3e, T3d);
306
                    ri[WS(rs, 6)] = FMA(KP951056516, T3e, T3d);
307
                    T35 = FNMS(KP559016994, T34, T33);
308
                    ri[WS(rs, 12)] = FNMS(KP951056516, T3c, T35);
309
                    ri[WS(rs, 3)] = FMA(KP951056516, T3c, T35);
310
               }
311
               {
312
                    E T3q, T3s, T3k, T3h, T3l, T3m, T3r, T3n;
313
                    {
314
                         E T3o, T3p, T3f, T3g;
315
                         T3o = T1e - T1y;
316
                         T3p = Tz - TT;
317
                         T3q = FNMS(KP618033988, T3p, T3o);
318
                         T3s = FMA(KP618033988, T3o, T3p);
319
                         T3k = T3i + T3j;
320
                         T3f = T39 + T3a;
321
                         T3g = T36 + T37;
322
                         T3h = T3f + T3g;
323
                         T3l = FNMS(KP250000000, T3h, T3k);
324
                         T3m = T3f - T3g;
325
                    }
326
                    ii[0] = T3h + T3k;
327
                    T3r = FMA(KP559016994, T3m, T3l);
328
                    ii[WS(rs, 6)] = FNMS(KP951056516, T3s, T3r);
329
                    ii[WS(rs, 9)] = FMA(KP951056516, T3s, T3r);
330
                    T3n = FNMS(KP559016994, T3m, T3l);
331
                    ii[WS(rs, 3)] = FNMS(KP951056516, T3q, T3n);
332
                    ii[WS(rs, 12)] = FMA(KP951056516, T3q, T3n);
333
               }
334
               {
335
                    E T30, T32, T1H, T2c, T2R, T2S, T31, T2T;
336
                    {
337
                         E T2W, T2Z, T1W, T2b;
338
                         T2W = T2U - T2V;
339
                         T2Z = T2X - T2Y;
340
                         T30 = FNMS(KP618033988, T2Z, T2W);
341
                         T32 = FMA(KP618033988, T2W, T2Z);
342
                         T1H = FNMS(KP866025403, T1G, T1B);
343
                         T1W = T1O + T1V;
344
                         T2b = T23 + T2a;
345
                         T2c = T1W + T2b;
346
                         T2R = FNMS(KP250000000, T2c, T1H);
347
                         T2S = T1W - T2b;
348
                    }
349
                    ri[WS(rs, 5)] = T1H + T2c;
350
                    T31 = FMA(KP559016994, T2S, T2R);
351
                    ri[WS(rs, 14)] = FNMS(KP951056516, T32, T31);
352
                    ri[WS(rs, 11)] = FMA(KP951056516, T32, T31);
353
                    T2T = FNMS(KP559016994, T2S, T2R);
354
                    ri[WS(rs, 2)] = FNMS(KP951056516, T30, T2T);
355
                    ri[WS(rs, 8)] = FMA(KP951056516, T30, T2T);
356
               }
357
               {
358
                    E T3Q, T3S, T3H, T3K, T3L, T3M, T3R, T3N;
359
                    {
360
                         E T3O, T3P, T3I, T3J;
361
                         T3O = T23 - T2a;
362
                         T3P = T1O - T1V;
363
                         T3Q = FNMS(KP618033988, T3P, T3O);
364
                         T3S = FMA(KP618033988, T3O, T3P);
365
                         T3H = FNMS(KP866025403, T3u, T3t);
366
                         T3I = T2X + T2Y;
367
                         T3J = T2U + T2V;
368
                         T3K = T3I + T3J;
369
                         T3L = FNMS(KP250000000, T3K, T3H);
370
                         T3M = T3I - T3J;
371
                    }
372
                    ii[WS(rs, 5)] = T3K + T3H;
373
                    T3R = FMA(KP559016994, T3M, T3L);
374
                    ii[WS(rs, 11)] = FNMS(KP951056516, T3S, T3R);
375
                    ii[WS(rs, 14)] = FMA(KP951056516, T3S, T3R);
376
                    T3N = FNMS(KP559016994, T3M, T3L);
377
                    ii[WS(rs, 2)] = FMA(KP951056516, T3Q, T3N);
378
                    ii[WS(rs, 8)] = FNMS(KP951056516, T3Q, T3N);
379
               }
380
               {
381
                    E T3E, T3G, T3v, T3y, T3z, T3A, T3F, T3B;
382
                    {
383
                         E T3C, T3D, T3w, T3x;
384
                         T3C = T2e - T2f;
385
                         T3D = T2h - T2i;
386
                         T3E = FMA(KP618033988, T3D, T3C);
387
                         T3G = FNMS(KP618033988, T3C, T3D);
388
                         T3v = FMA(KP866025403, T3u, T3t);
389
                         T3w = T2t + T2z;
390
                         T3x = T2G + T2M;
391
                         T3y = T3w + T3x;
392
                         T3z = FNMS(KP250000000, T3y, T3v);
393
                         T3A = T3w - T3x;
394
                    }
395
                    ii[WS(rs, 10)] = T3y + T3v;
396
                    T3F = FNMS(KP559016994, T3A, T3z);
397
                    ii[WS(rs, 7)] = FMA(KP951056516, T3G, T3F);
398
                    ii[WS(rs, 13)] = FNMS(KP951056516, T3G, T3F);
399
                    T3B = FMA(KP559016994, T3A, T3z);
400
                    ii[WS(rs, 1)] = FNMS(KP951056516, T3E, T3B);
401
                    ii[WS(rs, 4)] = FMA(KP951056516, T3E, T3B);
402
               }
403
               {
404
                    E T2O, T2Q, T2d, T2k, T2l, T2m, T2P, T2n;
405
                    {
406
                         E T2A, T2N, T2g, T2j;
407
                         T2A = T2t - T2z;
408
                         T2N = T2G - T2M;
409
                         T2O = FMA(KP618033988, T2N, T2A);
410
                         T2Q = FNMS(KP618033988, T2A, T2N);
411
                         T2d = FMA(KP866025403, T1G, T1B);
412
                         T2g = T2e + T2f;
413
                         T2j = T2h + T2i;
414
                         T2k = T2g + T2j;
415
                         T2l = FNMS(KP250000000, T2k, T2d);
416
                         T2m = T2g - T2j;
417
                    }
418
                    ri[WS(rs, 10)] = T2d + T2k;
419
                    T2P = FNMS(KP559016994, T2m, T2l);
420
                    ri[WS(rs, 7)] = FNMS(KP951056516, T2Q, T2P);
421
                    ri[WS(rs, 13)] = FMA(KP951056516, T2Q, T2P);
422
                    T2n = FMA(KP559016994, T2m, T2l);
423
                    ri[WS(rs, 4)] = FNMS(KP951056516, T2O, T2n);
424
                    ri[WS(rs, 1)] = FMA(KP951056516, T2O, T2n);
425
               }
426
          }
427
     }
428
}
429

    
430
static const tw_instr twinstr[] = {
431
     {TW_FULL, 0, 15},
432
     {TW_NEXT, 1, 0}
433
};
434

    
435
static const ct_desc desc = { 15, "t1_15", twinstr, &GENUS, {72, 28, 112, 0}, 0, 0, 0 };
436

    
437
void X(codelet_t1_15) (planner *p) {
438
     X(kdft_dit_register) (p, t1_15, &desc);
439
}
440
#else
441

    
442
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 15 -name t1_15 -include dft/scalar/t.h */
443

    
444
/*
445
 * This function contains 184 FP additions, 112 FP multiplications,
446
 * (or, 128 additions, 56 multiplications, 56 fused multiply/add),
447
 * 65 stack variables, 6 constants, and 60 memory accesses
448
 */
449
#include "dft/scalar/t.h"
450

    
451
static void t1_15(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
452
{
453
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
454
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
455
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
456
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
457
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
458
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
459
     {
460
          INT m;
461
          for (m = mb, W = W + (mb * 28); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 28, MAKE_VOLATILE_STRIDE(30, rs)) {
462
               E T1q, T34, Td, T1n, T2S, T35, T13, T1k, T1l, T2E, T2F, T2O, T1H, T1T, T2k;
463
               E T2t, T2f, T2s, T1M, T1U, Tu, TL, TM, T2H, T2I, T2N, T1w, T1Q, T29, T2w;
464
               E T24, T2v, T1B, T1R;
465
               {
466
                    E T1, T2R, T6, T1o, Tb, T1p, Tc, T2Q;
467
                    T1 = ri[0];
468
                    T2R = ii[0];
469
                    {
470
                         E T3, T5, T2, T4;
471
                         T3 = ri[WS(rs, 5)];
472
                         T5 = ii[WS(rs, 5)];
473
                         T2 = W[8];
474
                         T4 = W[9];
475
                         T6 = FMA(T2, T3, T4 * T5);
476
                         T1o = FNMS(T4, T3, T2 * T5);
477
                    }
478
                    {
479
                         E T8, Ta, T7, T9;
480
                         T8 = ri[WS(rs, 10)];
481
                         Ta = ii[WS(rs, 10)];
482
                         T7 = W[18];
483
                         T9 = W[19];
484
                         Tb = FMA(T7, T8, T9 * Ta);
485
                         T1p = FNMS(T9, T8, T7 * Ta);
486
                    }
487
                    T1q = KP866025403 * (T1o - T1p);
488
                    T34 = KP866025403 * (Tb - T6);
489
                    Tc = T6 + Tb;
490
                    Td = T1 + Tc;
491
                    T1n = FNMS(KP500000000, Tc, T1);
492
                    T2Q = T1o + T1p;
493
                    T2S = T2Q + T2R;
494
                    T35 = FNMS(KP500000000, T2Q, T2R);
495
               }
496
               {
497
                    E TR, T2c, T18, T2h, TW, T1E, T11, T1F, T12, T2d, T1d, T1J, T1i, T1K, T1j;
498
                    E T2i;
499
                    {
500
                         E TO, TQ, TN, TP;
501
                         TO = ri[WS(rs, 6)];
502
                         TQ = ii[WS(rs, 6)];
503
                         TN = W[10];
504
                         TP = W[11];
505
                         TR = FMA(TN, TO, TP * TQ);
506
                         T2c = FNMS(TP, TO, TN * TQ);
507
                    }
508
                    {
509
                         E T15, T17, T14, T16;
510
                         T15 = ri[WS(rs, 9)];
511
                         T17 = ii[WS(rs, 9)];
512
                         T14 = W[16];
513
                         T16 = W[17];
514
                         T18 = FMA(T14, T15, T16 * T17);
515
                         T2h = FNMS(T16, T15, T14 * T17);
516
                    }
517
                    {
518
                         E TT, TV, TS, TU;
519
                         TT = ri[WS(rs, 11)];
520
                         TV = ii[WS(rs, 11)];
521
                         TS = W[20];
522
                         TU = W[21];
523
                         TW = FMA(TS, TT, TU * TV);
524
                         T1E = FNMS(TU, TT, TS * TV);
525
                    }
526
                    {
527
                         E TY, T10, TX, TZ;
528
                         TY = ri[WS(rs, 1)];
529
                         T10 = ii[WS(rs, 1)];
530
                         TX = W[0];
531
                         TZ = W[1];
532
                         T11 = FMA(TX, TY, TZ * T10);
533
                         T1F = FNMS(TZ, TY, TX * T10);
534
                    }
535
                    T12 = TW + T11;
536
                    T2d = T1E + T1F;
537
                    {
538
                         E T1a, T1c, T19, T1b;
539
                         T1a = ri[WS(rs, 14)];
540
                         T1c = ii[WS(rs, 14)];
541
                         T19 = W[26];
542
                         T1b = W[27];
543
                         T1d = FMA(T19, T1a, T1b * T1c);
544
                         T1J = FNMS(T1b, T1a, T19 * T1c);
545
                    }
546
                    {
547
                         E T1f, T1h, T1e, T1g;
548
                         T1f = ri[WS(rs, 4)];
549
                         T1h = ii[WS(rs, 4)];
550
                         T1e = W[6];
551
                         T1g = W[7];
552
                         T1i = FMA(T1e, T1f, T1g * T1h);
553
                         T1K = FNMS(T1g, T1f, T1e * T1h);
554
                    }
555
                    T1j = T1d + T1i;
556
                    T2i = T1J + T1K;
557
                    {
558
                         E T1D, T1G, T2g, T2j;
559
                         T13 = TR + T12;
560
                         T1k = T18 + T1j;
561
                         T1l = T13 + T1k;
562
                         T2E = T2c + T2d;
563
                         T2F = T2h + T2i;
564
                         T2O = T2E + T2F;
565
                         T1D = FNMS(KP500000000, T12, TR);
566
                         T1G = KP866025403 * (T1E - T1F);
567
                         T1H = T1D - T1G;
568
                         T1T = T1D + T1G;
569
                         T2g = KP866025403 * (T1i - T1d);
570
                         T2j = FNMS(KP500000000, T2i, T2h);
571
                         T2k = T2g + T2j;
572
                         T2t = T2j - T2g;
573
                         {
574
                              E T2b, T2e, T1I, T1L;
575
                              T2b = KP866025403 * (T11 - TW);
576
                              T2e = FNMS(KP500000000, T2d, T2c);
577
                              T2f = T2b + T2e;
578
                              T2s = T2e - T2b;
579
                              T1I = FNMS(KP500000000, T1j, T18);
580
                              T1L = KP866025403 * (T1J - T1K);
581
                              T1M = T1I - T1L;
582
                              T1U = T1I + T1L;
583
                         }
584
                    }
585
               }
586
               {
587
                    E Ti, T21, Tz, T26, Tn, T1t, Ts, T1u, Tt, T22, TE, T1y, TJ, T1z, TK;
588
                    E T27;
589
                    {
590
                         E Tf, Th, Te, Tg;
591
                         Tf = ri[WS(rs, 3)];
592
                         Th = ii[WS(rs, 3)];
593
                         Te = W[4];
594
                         Tg = W[5];
595
                         Ti = FMA(Te, Tf, Tg * Th);
596
                         T21 = FNMS(Tg, Tf, Te * Th);
597
                    }
598
                    {
599
                         E Tw, Ty, Tv, Tx;
600
                         Tw = ri[WS(rs, 12)];
601
                         Ty = ii[WS(rs, 12)];
602
                         Tv = W[22];
603
                         Tx = W[23];
604
                         Tz = FMA(Tv, Tw, Tx * Ty);
605
                         T26 = FNMS(Tx, Tw, Tv * Ty);
606
                    }
607
                    {
608
                         E Tk, Tm, Tj, Tl;
609
                         Tk = ri[WS(rs, 8)];
610
                         Tm = ii[WS(rs, 8)];
611
                         Tj = W[14];
612
                         Tl = W[15];
613
                         Tn = FMA(Tj, Tk, Tl * Tm);
614
                         T1t = FNMS(Tl, Tk, Tj * Tm);
615
                    }
616
                    {
617
                         E Tp, Tr, To, Tq;
618
                         Tp = ri[WS(rs, 13)];
619
                         Tr = ii[WS(rs, 13)];
620
                         To = W[24];
621
                         Tq = W[25];
622
                         Ts = FMA(To, Tp, Tq * Tr);
623
                         T1u = FNMS(Tq, Tp, To * Tr);
624
                    }
625
                    Tt = Tn + Ts;
626
                    T22 = T1t + T1u;
627
                    {
628
                         E TB, TD, TA, TC;
629
                         TB = ri[WS(rs, 2)];
630
                         TD = ii[WS(rs, 2)];
631
                         TA = W[2];
632
                         TC = W[3];
633
                         TE = FMA(TA, TB, TC * TD);
634
                         T1y = FNMS(TC, TB, TA * TD);
635
                    }
636
                    {
637
                         E TG, TI, TF, TH;
638
                         TG = ri[WS(rs, 7)];
639
                         TI = ii[WS(rs, 7)];
640
                         TF = W[12];
641
                         TH = W[13];
642
                         TJ = FMA(TF, TG, TH * TI);
643
                         T1z = FNMS(TH, TG, TF * TI);
644
                    }
645
                    TK = TE + TJ;
646
                    T27 = T1y + T1z;
647
                    {
648
                         E T1s, T1v, T25, T28;
649
                         Tu = Ti + Tt;
650
                         TL = Tz + TK;
651
                         TM = Tu + TL;
652
                         T2H = T21 + T22;
653
                         T2I = T26 + T27;
654
                         T2N = T2H + T2I;
655
                         T1s = FNMS(KP500000000, Tt, Ti);
656
                         T1v = KP866025403 * (T1t - T1u);
657
                         T1w = T1s - T1v;
658
                         T1Q = T1s + T1v;
659
                         T25 = KP866025403 * (TJ - TE);
660
                         T28 = FNMS(KP500000000, T27, T26);
661
                         T29 = T25 + T28;
662
                         T2w = T28 - T25;
663
                         {
664
                              E T20, T23, T1x, T1A;
665
                              T20 = KP866025403 * (Ts - Tn);
666
                              T23 = FNMS(KP500000000, T22, T21);
667
                              T24 = T20 + T23;
668
                              T2v = T23 - T20;
669
                              T1x = FNMS(KP500000000, TK, Tz);
670
                              T1A = KP866025403 * (T1y - T1z);
671
                              T1B = T1x - T1A;
672
                              T1R = T1x + T1A;
673
                         }
674
                    }
675
               }
676
               {
677
                    E T2C, T1m, T2B, T2K, T2M, T2G, T2J, T2L, T2D;
678
                    T2C = KP559016994 * (TM - T1l);
679
                    T1m = TM + T1l;
680
                    T2B = FNMS(KP250000000, T1m, Td);
681
                    T2G = T2E - T2F;
682
                    T2J = T2H - T2I;
683
                    T2K = FNMS(KP587785252, T2J, KP951056516 * T2G);
684
                    T2M = FMA(KP951056516, T2J, KP587785252 * T2G);
685
                    ri[0] = Td + T1m;
686
                    T2L = T2C + T2B;
687
                    ri[WS(rs, 9)] = T2L - T2M;
688
                    ri[WS(rs, 6)] = T2L + T2M;
689
                    T2D = T2B - T2C;
690
                    ri[WS(rs, 12)] = T2D - T2K;
691
                    ri[WS(rs, 3)] = T2D + T2K;
692
               }
693
               {
694
                    E T2U, T2P, T2T, T2Y, T30, T2W, T2X, T2Z, T2V;
695
                    T2U = KP559016994 * (T2N - T2O);
696
                    T2P = T2N + T2O;
697
                    T2T = FNMS(KP250000000, T2P, T2S);
698
                    T2W = T13 - T1k;
699
                    T2X = Tu - TL;
700
                    T2Y = FNMS(KP587785252, T2X, KP951056516 * T2W);
701
                    T30 = FMA(KP951056516, T2X, KP587785252 * T2W);
702
                    ii[0] = T2P + T2S;
703
                    T2Z = T2U + T2T;
704
                    ii[WS(rs, 6)] = T2Z - T30;
705
                    ii[WS(rs, 9)] = T30 + T2Z;
706
                    T2V = T2T - T2U;
707
                    ii[WS(rs, 3)] = T2V - T2Y;
708
                    ii[WS(rs, 12)] = T2Y + T2V;
709
               }
710
               {
711
                    E T2y, T2A, T1r, T1O, T2p, T2q, T2z, T2r;
712
                    {
713
                         E T2u, T2x, T1C, T1N;
714
                         T2u = T2s - T2t;
715
                         T2x = T2v - T2w;
716
                         T2y = FNMS(KP587785252, T2x, KP951056516 * T2u);
717
                         T2A = FMA(KP951056516, T2x, KP587785252 * T2u);
718
                         T1r = T1n - T1q;
719
                         T1C = T1w + T1B;
720
                         T1N = T1H + T1M;
721
                         T1O = T1C + T1N;
722
                         T2p = FNMS(KP250000000, T1O, T1r);
723
                         T2q = KP559016994 * (T1C - T1N);
724
                    }
725
                    ri[WS(rs, 5)] = T1r + T1O;
726
                    T2z = T2q + T2p;
727
                    ri[WS(rs, 14)] = T2z - T2A;
728
                    ri[WS(rs, 11)] = T2z + T2A;
729
                    T2r = T2p - T2q;
730
                    ri[WS(rs, 2)] = T2r - T2y;
731
                    ri[WS(rs, 8)] = T2r + T2y;
732
               }
733
               {
734
                    E T3h, T3q, T3i, T3l, T3m, T3n, T3p, T3o;
735
                    {
736
                         E T3f, T3g, T3j, T3k;
737
                         T3f = T1H - T1M;
738
                         T3g = T1w - T1B;
739
                         T3h = FNMS(KP587785252, T3g, KP951056516 * T3f);
740
                         T3q = FMA(KP951056516, T3g, KP587785252 * T3f);
741
                         T3i = T35 - T34;
742
                         T3j = T2v + T2w;
743
                         T3k = T2s + T2t;
744
                         T3l = T3j + T3k;
745
                         T3m = FNMS(KP250000000, T3l, T3i);
746
                         T3n = KP559016994 * (T3j - T3k);
747
                    }
748
                    ii[WS(rs, 5)] = T3l + T3i;
749
                    T3p = T3n + T3m;
750
                    ii[WS(rs, 11)] = T3p - T3q;
751
                    ii[WS(rs, 14)] = T3q + T3p;
752
                    T3o = T3m - T3n;
753
                    ii[WS(rs, 2)] = T3h + T3o;
754
                    ii[WS(rs, 8)] = T3o - T3h;
755
               }
756
               {
757
                    E T3c, T3d, T36, T37, T33, T38, T3e, T39;
758
                    {
759
                         E T3a, T3b, T31, T32;
760
                         T3a = T1Q - T1R;
761
                         T3b = T1T - T1U;
762
                         T3c = FMA(KP951056516, T3a, KP587785252 * T3b);
763
                         T3d = FNMS(KP587785252, T3a, KP951056516 * T3b);
764
                         T36 = T34 + T35;
765
                         T31 = T24 + T29;
766
                         T32 = T2f + T2k;
767
                         T37 = T31 + T32;
768
                         T33 = KP559016994 * (T31 - T32);
769
                         T38 = FNMS(KP250000000, T37, T36);
770
                    }
771
                    ii[WS(rs, 10)] = T37 + T36;
772
                    T3e = T38 - T33;
773
                    ii[WS(rs, 7)] = T3d + T3e;
774
                    ii[WS(rs, 13)] = T3e - T3d;
775
                    T39 = T33 + T38;
776
                    ii[WS(rs, 1)] = T39 - T3c;
777
                    ii[WS(rs, 4)] = T3c + T39;
778
               }
779
               {
780
                    E T2m, T2o, T1P, T1W, T1X, T1Y, T2n, T1Z;
781
                    {
782
                         E T2a, T2l, T1S, T1V;
783
                         T2a = T24 - T29;
784
                         T2l = T2f - T2k;
785
                         T2m = FMA(KP951056516, T2a, KP587785252 * T2l);
786
                         T2o = FNMS(KP587785252, T2a, KP951056516 * T2l);
787
                         T1P = T1n + T1q;
788
                         T1S = T1Q + T1R;
789
                         T1V = T1T + T1U;
790
                         T1W = T1S + T1V;
791
                         T1X = KP559016994 * (T1S - T1V);
792
                         T1Y = FNMS(KP250000000, T1W, T1P);
793
                    }
794
                    ri[WS(rs, 10)] = T1P + T1W;
795
                    T2n = T1Y - T1X;
796
                    ri[WS(rs, 7)] = T2n - T2o;
797
                    ri[WS(rs, 13)] = T2n + T2o;
798
                    T1Z = T1X + T1Y;
799
                    ri[WS(rs, 4)] = T1Z - T2m;
800
                    ri[WS(rs, 1)] = T1Z + T2m;
801
               }
802
          }
803
     }
804
}
805

    
806
static const tw_instr twinstr[] = {
807
     {TW_FULL, 0, 15},
808
     {TW_NEXT, 1, 0}
809
};
810

    
811
static const ct_desc desc = { 15, "t1_15", twinstr, &GENUS, {128, 56, 56, 0}, 0, 0, 0 };
812

    
813
void X(codelet_t1_15) (planner *p) {
814
     X(kdft_dit_register) (p, t1_15, &desc);
815
}
816
#endif