To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / q1_6.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (35.8 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:31 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twidsq.native -fma -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 6 -name q1_6 -include dft/scalar/q.h */
29

    
30
/*
31
 * This function contains 276 FP additions, 192 FP multiplications,
32
 * (or, 144 additions, 60 multiplications, 132 fused multiply/add),
33
 * 109 stack variables, 2 constants, and 144 memory accesses
34
 */
35
#include "dft/scalar/q.h"
36

    
37
static void q1_6(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
38
{
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
     {
42
          INT m;
43
          for (m = mb, W = W + (mb * 10); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
44
               E T3, Tc, Tw, TW, Ta, TM, Tf, Tg, Tt, TT, Tn, TP, Tu, Tv, TU;
45
               E TV, T17, T1g, T1A, T20, T1e, T1Q, T1j, T1k, T1x, T1X, T1r, T1T, T1y, T1z;
46
               E T1Y, T1Z, T2B, T31, T2v, T2X, T2C, T2D, T32, T33, T2b, T2k, T2E, T34, T2i;
47
               E T2U, T2n, T2o, T3f, T3o, T3I, T48, T3m, T3Y, T3r, T3s, T3F, T45, T3z, T41;
48
               E T3G, T3H, T46, T47, T4j, T4s, T4M, T5c, T4q, T52, T4v, T4w, T4J, T59, T4D;
49
               E T55, T4K, T4L, T5a, T5b, T5N, T6d, T5H, T69, T5O, T5P, T6e, T6f, T5n, T5w;
50
               E T5Q, T6g, T5u, T66, T5z, T5A;
51
               {
52
                    E T9, Te, T6, Td, T1, T2;
53
                    T1 = rio[0];
54
                    T2 = rio[WS(rs, 3)];
55
                    T3 = T1 + T2;
56
                    Tc = T1 - T2;
57
                    {
58
                         E T7, T8, T4, T5;
59
                         T7 = rio[WS(rs, 4)];
60
                         T8 = rio[WS(rs, 1)];
61
                         T9 = T7 + T8;
62
                         Te = T7 - T8;
63
                         T4 = rio[WS(rs, 2)];
64
                         T5 = rio[WS(rs, 5)];
65
                         T6 = T4 + T5;
66
                         Td = T4 - T5;
67
                    }
68
                    Tw = Te - Td;
69
                    TW = T9 - T6;
70
                    Ta = T6 + T9;
71
                    TM = FNMS(KP500000000, Ta, T3);
72
                    Tf = Td + Te;
73
                    Tg = FNMS(KP500000000, Tf, Tc);
74
               }
75
               {
76
                    E Tj, TN, Tm, TO, Th, Ti;
77
                    Th = iio[WS(rs, 2)];
78
                    Ti = iio[WS(rs, 5)];
79
                    Tj = Th - Ti;
80
                    TN = Th + Ti;
81
                    {
82
                         E Tr, Ts, Tk, Tl;
83
                         Tr = iio[0];
84
                         Ts = iio[WS(rs, 3)];
85
                         Tt = Tr - Ts;
86
                         TT = Tr + Ts;
87
                         Tk = iio[WS(rs, 4)];
88
                         Tl = iio[WS(rs, 1)];
89
                         Tm = Tk - Tl;
90
                         TO = Tk + Tl;
91
                    }
92
                    Tn = Tj - Tm;
93
                    TP = TN - TO;
94
                    Tu = Tj + Tm;
95
                    Tv = FNMS(KP500000000, Tu, Tt);
96
                    TU = TN + TO;
97
                    TV = FNMS(KP500000000, TU, TT);
98
               }
99
               {
100
                    E T1d, T1i, T1a, T1h, T15, T16;
101
                    T15 = rio[WS(vs, 1)];
102
                    T16 = rio[WS(vs, 1) + WS(rs, 3)];
103
                    T17 = T15 + T16;
104
                    T1g = T15 - T16;
105
                    {
106
                         E T1b, T1c, T18, T19;
107
                         T1b = rio[WS(vs, 1) + WS(rs, 4)];
108
                         T1c = rio[WS(vs, 1) + WS(rs, 1)];
109
                         T1d = T1b + T1c;
110
                         T1i = T1b - T1c;
111
                         T18 = rio[WS(vs, 1) + WS(rs, 2)];
112
                         T19 = rio[WS(vs, 1) + WS(rs, 5)];
113
                         T1a = T18 + T19;
114
                         T1h = T18 - T19;
115
                    }
116
                    T1A = T1i - T1h;
117
                    T20 = T1d - T1a;
118
                    T1e = T1a + T1d;
119
                    T1Q = FNMS(KP500000000, T1e, T17);
120
                    T1j = T1h + T1i;
121
                    T1k = FNMS(KP500000000, T1j, T1g);
122
               }
123
               {
124
                    E T1n, T1R, T1q, T1S, T1l, T1m;
125
                    T1l = iio[WS(vs, 1) + WS(rs, 2)];
126
                    T1m = iio[WS(vs, 1) + WS(rs, 5)];
127
                    T1n = T1l - T1m;
128
                    T1R = T1l + T1m;
129
                    {
130
                         E T1v, T1w, T1o, T1p;
131
                         T1v = iio[WS(vs, 1)];
132
                         T1w = iio[WS(vs, 1) + WS(rs, 3)];
133
                         T1x = T1v - T1w;
134
                         T1X = T1v + T1w;
135
                         T1o = iio[WS(vs, 1) + WS(rs, 4)];
136
                         T1p = iio[WS(vs, 1) + WS(rs, 1)];
137
                         T1q = T1o - T1p;
138
                         T1S = T1o + T1p;
139
                    }
140
                    T1r = T1n - T1q;
141
                    T1T = T1R - T1S;
142
                    T1y = T1n + T1q;
143
                    T1z = FNMS(KP500000000, T1y, T1x);
144
                    T1Y = T1R + T1S;
145
                    T1Z = FNMS(KP500000000, T1Y, T1X);
146
               }
147
               {
148
                    E T2r, T2V, T2u, T2W, T2p, T2q;
149
                    T2p = iio[WS(vs, 2) + WS(rs, 2)];
150
                    T2q = iio[WS(vs, 2) + WS(rs, 5)];
151
                    T2r = T2p - T2q;
152
                    T2V = T2p + T2q;
153
                    {
154
                         E T2z, T2A, T2s, T2t;
155
                         T2z = iio[WS(vs, 2)];
156
                         T2A = iio[WS(vs, 2) + WS(rs, 3)];
157
                         T2B = T2z - T2A;
158
                         T31 = T2z + T2A;
159
                         T2s = iio[WS(vs, 2) + WS(rs, 4)];
160
                         T2t = iio[WS(vs, 2) + WS(rs, 1)];
161
                         T2u = T2s - T2t;
162
                         T2W = T2s + T2t;
163
                    }
164
                    T2v = T2r - T2u;
165
                    T2X = T2V - T2W;
166
                    T2C = T2r + T2u;
167
                    T2D = FNMS(KP500000000, T2C, T2B);
168
                    T32 = T2V + T2W;
169
                    T33 = FNMS(KP500000000, T32, T31);
170
               }
171
               {
172
                    E T2h, T2m, T2e, T2l, T29, T2a;
173
                    T29 = rio[WS(vs, 2)];
174
                    T2a = rio[WS(vs, 2) + WS(rs, 3)];
175
                    T2b = T29 + T2a;
176
                    T2k = T29 - T2a;
177
                    {
178
                         E T2f, T2g, T2c, T2d;
179
                         T2f = rio[WS(vs, 2) + WS(rs, 4)];
180
                         T2g = rio[WS(vs, 2) + WS(rs, 1)];
181
                         T2h = T2f + T2g;
182
                         T2m = T2f - T2g;
183
                         T2c = rio[WS(vs, 2) + WS(rs, 2)];
184
                         T2d = rio[WS(vs, 2) + WS(rs, 5)];
185
                         T2e = T2c + T2d;
186
                         T2l = T2c - T2d;
187
                    }
188
                    T2E = T2m - T2l;
189
                    T34 = T2h - T2e;
190
                    T2i = T2e + T2h;
191
                    T2U = FNMS(KP500000000, T2i, T2b);
192
                    T2n = T2l + T2m;
193
                    T2o = FNMS(KP500000000, T2n, T2k);
194
               }
195
               {
196
                    E T3l, T3q, T3i, T3p, T3d, T3e;
197
                    T3d = rio[WS(vs, 3)];
198
                    T3e = rio[WS(vs, 3) + WS(rs, 3)];
199
                    T3f = T3d + T3e;
200
                    T3o = T3d - T3e;
201
                    {
202
                         E T3j, T3k, T3g, T3h;
203
                         T3j = rio[WS(vs, 3) + WS(rs, 4)];
204
                         T3k = rio[WS(vs, 3) + WS(rs, 1)];
205
                         T3l = T3j + T3k;
206
                         T3q = T3j - T3k;
207
                         T3g = rio[WS(vs, 3) + WS(rs, 2)];
208
                         T3h = rio[WS(vs, 3) + WS(rs, 5)];
209
                         T3i = T3g + T3h;
210
                         T3p = T3g - T3h;
211
                    }
212
                    T3I = T3q - T3p;
213
                    T48 = T3l - T3i;
214
                    T3m = T3i + T3l;
215
                    T3Y = FNMS(KP500000000, T3m, T3f);
216
                    T3r = T3p + T3q;
217
                    T3s = FNMS(KP500000000, T3r, T3o);
218
               }
219
               {
220
                    E T3v, T3Z, T3y, T40, T3t, T3u;
221
                    T3t = iio[WS(vs, 3) + WS(rs, 2)];
222
                    T3u = iio[WS(vs, 3) + WS(rs, 5)];
223
                    T3v = T3t - T3u;
224
                    T3Z = T3t + T3u;
225
                    {
226
                         E T3D, T3E, T3w, T3x;
227
                         T3D = iio[WS(vs, 3)];
228
                         T3E = iio[WS(vs, 3) + WS(rs, 3)];
229
                         T3F = T3D - T3E;
230
                         T45 = T3D + T3E;
231
                         T3w = iio[WS(vs, 3) + WS(rs, 4)];
232
                         T3x = iio[WS(vs, 3) + WS(rs, 1)];
233
                         T3y = T3w - T3x;
234
                         T40 = T3w + T3x;
235
                    }
236
                    T3z = T3v - T3y;
237
                    T41 = T3Z - T40;
238
                    T3G = T3v + T3y;
239
                    T3H = FNMS(KP500000000, T3G, T3F);
240
                    T46 = T3Z + T40;
241
                    T47 = FNMS(KP500000000, T46, T45);
242
               }
243
               {
244
                    E T4p, T4u, T4m, T4t, T4h, T4i;
245
                    T4h = rio[WS(vs, 4)];
246
                    T4i = rio[WS(vs, 4) + WS(rs, 3)];
247
                    T4j = T4h + T4i;
248
                    T4s = T4h - T4i;
249
                    {
250
                         E T4n, T4o, T4k, T4l;
251
                         T4n = rio[WS(vs, 4) + WS(rs, 4)];
252
                         T4o = rio[WS(vs, 4) + WS(rs, 1)];
253
                         T4p = T4n + T4o;
254
                         T4u = T4n - T4o;
255
                         T4k = rio[WS(vs, 4) + WS(rs, 2)];
256
                         T4l = rio[WS(vs, 4) + WS(rs, 5)];
257
                         T4m = T4k + T4l;
258
                         T4t = T4k - T4l;
259
                    }
260
                    T4M = T4u - T4t;
261
                    T5c = T4p - T4m;
262
                    T4q = T4m + T4p;
263
                    T52 = FNMS(KP500000000, T4q, T4j);
264
                    T4v = T4t + T4u;
265
                    T4w = FNMS(KP500000000, T4v, T4s);
266
               }
267
               {
268
                    E T4z, T53, T4C, T54, T4x, T4y;
269
                    T4x = iio[WS(vs, 4) + WS(rs, 2)];
270
                    T4y = iio[WS(vs, 4) + WS(rs, 5)];
271
                    T4z = T4x - T4y;
272
                    T53 = T4x + T4y;
273
                    {
274
                         E T4H, T4I, T4A, T4B;
275
                         T4H = iio[WS(vs, 4)];
276
                         T4I = iio[WS(vs, 4) + WS(rs, 3)];
277
                         T4J = T4H - T4I;
278
                         T59 = T4H + T4I;
279
                         T4A = iio[WS(vs, 4) + WS(rs, 4)];
280
                         T4B = iio[WS(vs, 4) + WS(rs, 1)];
281
                         T4C = T4A - T4B;
282
                         T54 = T4A + T4B;
283
                    }
284
                    T4D = T4z - T4C;
285
                    T55 = T53 - T54;
286
                    T4K = T4z + T4C;
287
                    T4L = FNMS(KP500000000, T4K, T4J);
288
                    T5a = T53 + T54;
289
                    T5b = FNMS(KP500000000, T5a, T59);
290
               }
291
               {
292
                    E T5D, T67, T5G, T68, T5B, T5C;
293
                    T5B = iio[WS(vs, 5) + WS(rs, 2)];
294
                    T5C = iio[WS(vs, 5) + WS(rs, 5)];
295
                    T5D = T5B - T5C;
296
                    T67 = T5B + T5C;
297
                    {
298
                         E T5L, T5M, T5E, T5F;
299
                         T5L = iio[WS(vs, 5)];
300
                         T5M = iio[WS(vs, 5) + WS(rs, 3)];
301
                         T5N = T5L - T5M;
302
                         T6d = T5L + T5M;
303
                         T5E = iio[WS(vs, 5) + WS(rs, 4)];
304
                         T5F = iio[WS(vs, 5) + WS(rs, 1)];
305
                         T5G = T5E - T5F;
306
                         T68 = T5E + T5F;
307
                    }
308
                    T5H = T5D - T5G;
309
                    T69 = T67 - T68;
310
                    T5O = T5D + T5G;
311
                    T5P = FNMS(KP500000000, T5O, T5N);
312
                    T6e = T67 + T68;
313
                    T6f = FNMS(KP500000000, T6e, T6d);
314
               }
315
               {
316
                    E T5t, T5y, T5q, T5x, T5l, T5m;
317
                    T5l = rio[WS(vs, 5)];
318
                    T5m = rio[WS(vs, 5) + WS(rs, 3)];
319
                    T5n = T5l + T5m;
320
                    T5w = T5l - T5m;
321
                    {
322
                         E T5r, T5s, T5o, T5p;
323
                         T5r = rio[WS(vs, 5) + WS(rs, 4)];
324
                         T5s = rio[WS(vs, 5) + WS(rs, 1)];
325
                         T5t = T5r + T5s;
326
                         T5y = T5r - T5s;
327
                         T5o = rio[WS(vs, 5) + WS(rs, 2)];
328
                         T5p = rio[WS(vs, 5) + WS(rs, 5)];
329
                         T5q = T5o + T5p;
330
                         T5x = T5o - T5p;
331
                    }
332
                    T5Q = T5y - T5x;
333
                    T6g = T5t - T5q;
334
                    T5u = T5q + T5t;
335
                    T66 = FNMS(KP500000000, T5u, T5n);
336
                    T5z = T5x + T5y;
337
                    T5A = FNMS(KP500000000, T5z, T5w);
338
               }
339
               rio[0] = T3 + Ta;
340
               iio[0] = TT + TU;
341
               rio[WS(rs, 1)] = T17 + T1e;
342
               iio[WS(rs, 1)] = T1X + T1Y;
343
               rio[WS(rs, 2)] = T2b + T2i;
344
               iio[WS(rs, 2)] = T31 + T32;
345
               iio[WS(rs, 4)] = T59 + T5a;
346
               rio[WS(rs, 4)] = T4j + T4q;
347
               rio[WS(rs, 3)] = T3f + T3m;
348
               iio[WS(rs, 3)] = T45 + T46;
349
               rio[WS(rs, 5)] = T5n + T5u;
350
               iio[WS(rs, 5)] = T6d + T6e;
351
               {
352
                    E To, Tx, Tp, Ty, Tb, Tq;
353
                    To = FMA(KP866025403, Tn, Tg);
354
                    Tx = FMA(KP866025403, Tw, Tv);
355
                    Tb = W[0];
356
                    Tp = Tb * To;
357
                    Ty = Tb * Tx;
358
                    Tq = W[1];
359
                    rio[WS(vs, 1)] = FMA(Tq, Tx, Tp);
360
                    iio[WS(vs, 1)] = FNMS(Tq, To, Ty);
361
               }
362
               {
363
                    E TG, TJ, TH, TK, TF, TI;
364
                    TG = Tc + Tf;
365
                    TJ = Tt + Tu;
366
                    TF = W[4];
367
                    TH = TF * TG;
368
                    TK = TF * TJ;
369
                    TI = W[5];
370
                    rio[WS(vs, 3)] = FMA(TI, TJ, TH);
371
                    iio[WS(vs, 3)] = FNMS(TI, TG, TK);
372
               }
373
               {
374
                    E T10, T13, T11, T14, TZ, T12;
375
                    T10 = FMA(KP866025403, TP, TM);
376
                    T13 = FMA(KP866025403, TW, TV);
377
                    TZ = W[6];
378
                    T11 = TZ * T10;
379
                    T14 = TZ * T13;
380
                    T12 = W[7];
381
                    rio[WS(vs, 4)] = FMA(T12, T13, T11);
382
                    iio[WS(vs, 4)] = FNMS(T12, T10, T14);
383
               }
384
               {
385
                    E T60, T63, T61, T64, T5Z, T62;
386
                    T60 = T5w + T5z;
387
                    T63 = T5N + T5O;
388
                    T5Z = W[4];
389
                    T61 = T5Z * T60;
390
                    T64 = T5Z * T63;
391
                    T62 = W[5];
392
                    rio[WS(vs, 3) + WS(rs, 5)] = FMA(T62, T63, T61);
393
                    iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T62, T60, T64);
394
               }
395
               {
396
                    E T6k, T6n, T6l, T6o, T6j, T6m;
397
                    T6k = FMA(KP866025403, T69, T66);
398
                    T6n = FMA(KP866025403, T6g, T6f);
399
                    T6j = W[6];
400
                    T6l = T6j * T6k;
401
                    T6o = T6j * T6n;
402
                    T6m = W[7];
403
                    rio[WS(vs, 4) + WS(rs, 5)] = FMA(T6m, T6n, T6l);
404
                    iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T6m, T6k, T6o);
405
               }
406
               {
407
                    E TA, TD, TB, TE, Tz, TC;
408
                    TA = FNMS(KP866025403, Tn, Tg);
409
                    TD = FNMS(KP866025403, Tw, Tv);
410
                    Tz = W[8];
411
                    TB = Tz * TA;
412
                    TE = Tz * TD;
413
                    TC = W[9];
414
                    rio[WS(vs, 5)] = FMA(TC, TD, TB);
415
                    iio[WS(vs, 5)] = FNMS(TC, TA, TE);
416
               }
417
               {
418
                    E TQ, TX, TR, TY, TL, TS;
419
                    TQ = FNMS(KP866025403, TP, TM);
420
                    TX = FNMS(KP866025403, TW, TV);
421
                    TL = W[2];
422
                    TR = TL * TQ;
423
                    TY = TL * TX;
424
                    TS = W[3];
425
                    rio[WS(vs, 2)] = FMA(TS, TX, TR);
426
                    iio[WS(vs, 2)] = FNMS(TS, TQ, TY);
427
               }
428
               {
429
                    E T5U, T5X, T5V, T5Y, T5T, T5W;
430
                    T5U = FNMS(KP866025403, T5H, T5A);
431
                    T5X = FNMS(KP866025403, T5Q, T5P);
432
                    T5T = W[8];
433
                    T5V = T5T * T5U;
434
                    T5Y = T5T * T5X;
435
                    T5W = W[9];
436
                    rio[WS(vs, 5) + WS(rs, 5)] = FMA(T5W, T5X, T5V);
437
                    iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T5W, T5U, T5Y);
438
               }
439
               {
440
                    E T6a, T6h, T6b, T6i, T65, T6c;
441
                    T6a = FNMS(KP866025403, T69, T66);
442
                    T6h = FNMS(KP866025403, T6g, T6f);
443
                    T65 = W[2];
444
                    T6b = T65 * T6a;
445
                    T6i = T65 * T6h;
446
                    T6c = W[3];
447
                    rio[WS(vs, 2) + WS(rs, 5)] = FMA(T6c, T6h, T6b);
448
                    iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T6c, T6a, T6i);
449
               }
450
               {
451
                    E T5I, T5R, T5J, T5S, T5v, T5K;
452
                    T5I = FMA(KP866025403, T5H, T5A);
453
                    T5R = FMA(KP866025403, T5Q, T5P);
454
                    T5v = W[0];
455
                    T5J = T5v * T5I;
456
                    T5S = T5v * T5R;
457
                    T5K = W[1];
458
                    rio[WS(vs, 1) + WS(rs, 5)] = FMA(T5K, T5R, T5J);
459
                    iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T5K, T5I, T5S);
460
               }
461
               {
462
                    E T1s, T1B, T1t, T1C, T1f, T1u;
463
                    T1s = FMA(KP866025403, T1r, T1k);
464
                    T1B = FMA(KP866025403, T1A, T1z);
465
                    T1f = W[0];
466
                    T1t = T1f * T1s;
467
                    T1C = T1f * T1B;
468
                    T1u = W[1];
469
                    rio[WS(vs, 1) + WS(rs, 1)] = FMA(T1u, T1B, T1t);
470
                    iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T1u, T1s, T1C);
471
               }
472
               {
473
                    E T3S, T3V, T3T, T3W, T3R, T3U;
474
                    T3S = T3o + T3r;
475
                    T3V = T3F + T3G;
476
                    T3R = W[4];
477
                    T3T = T3R * T3S;
478
                    T3W = T3R * T3V;
479
                    T3U = W[5];
480
                    rio[WS(vs, 3) + WS(rs, 3)] = FMA(T3U, T3V, T3T);
481
                    iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T3U, T3S, T3W);
482
               }
483
               {
484
                    E T3A, T3J, T3B, T3K, T3n, T3C;
485
                    T3A = FMA(KP866025403, T3z, T3s);
486
                    T3J = FMA(KP866025403, T3I, T3H);
487
                    T3n = W[0];
488
                    T3B = T3n * T3A;
489
                    T3K = T3n * T3J;
490
                    T3C = W[1];
491
                    rio[WS(vs, 1) + WS(rs, 3)] = FMA(T3C, T3J, T3B);
492
                    iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T3C, T3A, T3K);
493
               }
494
               {
495
                    E T56, T5d, T57, T5e, T51, T58;
496
                    T56 = FNMS(KP866025403, T55, T52);
497
                    T5d = FNMS(KP866025403, T5c, T5b);
498
                    T51 = W[2];
499
                    T57 = T51 * T56;
500
                    T5e = T51 * T5d;
501
                    T58 = W[3];
502
                    rio[WS(vs, 2) + WS(rs, 4)] = FMA(T58, T5d, T57);
503
                    iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T58, T56, T5e);
504
               }
505
               {
506
                    E T2Y, T35, T2Z, T36, T2T, T30;
507
                    T2Y = FNMS(KP866025403, T2X, T2U);
508
                    T35 = FNMS(KP866025403, T34, T33);
509
                    T2T = W[2];
510
                    T2Z = T2T * T2Y;
511
                    T36 = T2T * T35;
512
                    T30 = W[3];
513
                    rio[WS(vs, 2) + WS(rs, 2)] = FMA(T30, T35, T2Z);
514
                    iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T30, T2Y, T36);
515
               }
516
               {
517
                    E T3M, T3P, T3N, T3Q, T3L, T3O;
518
                    T3M = FNMS(KP866025403, T3z, T3s);
519
                    T3P = FNMS(KP866025403, T3I, T3H);
520
                    T3L = W[8];
521
                    T3N = T3L * T3M;
522
                    T3Q = T3L * T3P;
523
                    T3O = W[9];
524
                    rio[WS(vs, 5) + WS(rs, 3)] = FMA(T3O, T3P, T3N);
525
                    iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T3O, T3M, T3Q);
526
               }
527
               {
528
                    E T38, T3b, T39, T3c, T37, T3a;
529
                    T38 = FMA(KP866025403, T2X, T2U);
530
                    T3b = FMA(KP866025403, T34, T33);
531
                    T37 = W[6];
532
                    T39 = T37 * T38;
533
                    T3c = T37 * T3b;
534
                    T3a = W[7];
535
                    rio[WS(vs, 4) + WS(rs, 2)] = FMA(T3a, T3b, T39);
536
                    iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T3a, T38, T3c);
537
               }
538
               {
539
                    E T1E, T1H, T1F, T1I, T1D, T1G;
540
                    T1E = FNMS(KP866025403, T1r, T1k);
541
                    T1H = FNMS(KP866025403, T1A, T1z);
542
                    T1D = W[8];
543
                    T1F = T1D * T1E;
544
                    T1I = T1D * T1H;
545
                    T1G = W[9];
546
                    rio[WS(vs, 5) + WS(rs, 1)] = FMA(T1G, T1H, T1F);
547
                    iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T1G, T1E, T1I);
548
               }
549
               {
550
                    E T5g, T5j, T5h, T5k, T5f, T5i;
551
                    T5g = FMA(KP866025403, T55, T52);
552
                    T5j = FMA(KP866025403, T5c, T5b);
553
                    T5f = W[6];
554
                    T5h = T5f * T5g;
555
                    T5k = T5f * T5j;
556
                    T5i = W[7];
557
                    rio[WS(vs, 4) + WS(rs, 4)] = FMA(T5i, T5j, T5h);
558
                    iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T5i, T5g, T5k);
559
               }
560
               {
561
                    E T1K, T1N, T1L, T1O, T1J, T1M;
562
                    T1K = T1g + T1j;
563
                    T1N = T1x + T1y;
564
                    T1J = W[4];
565
                    T1L = T1J * T1K;
566
                    T1O = T1J * T1N;
567
                    T1M = W[5];
568
                    rio[WS(vs, 3) + WS(rs, 1)] = FMA(T1M, T1N, T1L);
569
                    iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T1M, T1K, T1O);
570
               }
571
               {
572
                    E T4W, T4Z, T4X, T50, T4V, T4Y;
573
                    T4W = T4s + T4v;
574
                    T4Z = T4J + T4K;
575
                    T4V = W[4];
576
                    T4X = T4V * T4W;
577
                    T50 = T4V * T4Z;
578
                    T4Y = W[5];
579
                    rio[WS(vs, 3) + WS(rs, 4)] = FMA(T4Y, T4Z, T4X);
580
                    iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T4Y, T4W, T50);
581
               }
582
               {
583
                    E T4E, T4N, T4F, T4O, T4r, T4G;
584
                    T4E = FMA(KP866025403, T4D, T4w);
585
                    T4N = FMA(KP866025403, T4M, T4L);
586
                    T4r = W[0];
587
                    T4F = T4r * T4E;
588
                    T4O = T4r * T4N;
589
                    T4G = W[1];
590
                    rio[WS(vs, 1) + WS(rs, 4)] = FMA(T4G, T4N, T4F);
591
                    iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T4G, T4E, T4O);
592
               }
593
               {
594
                    E T2O, T2R, T2P, T2S, T2N, T2Q;
595
                    T2O = T2k + T2n;
596
                    T2R = T2B + T2C;
597
                    T2N = W[4];
598
                    T2P = T2N * T2O;
599
                    T2S = T2N * T2R;
600
                    T2Q = W[5];
601
                    rio[WS(vs, 3) + WS(rs, 2)] = FMA(T2Q, T2R, T2P);
602
                    iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T2Q, T2O, T2S);
603
               }
604
               {
605
                    E T2w, T2F, T2x, T2G, T2j, T2y;
606
                    T2w = FMA(KP866025403, T2v, T2o);
607
                    T2F = FMA(KP866025403, T2E, T2D);
608
                    T2j = W[0];
609
                    T2x = T2j * T2w;
610
                    T2G = T2j * T2F;
611
                    T2y = W[1];
612
                    rio[WS(vs, 1) + WS(rs, 2)] = FMA(T2y, T2F, T2x);
613
                    iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T2y, T2w, T2G);
614
               }
615
               {
616
                    E T24, T27, T25, T28, T23, T26;
617
                    T24 = FMA(KP866025403, T1T, T1Q);
618
                    T27 = FMA(KP866025403, T20, T1Z);
619
                    T23 = W[6];
620
                    T25 = T23 * T24;
621
                    T28 = T23 * T27;
622
                    T26 = W[7];
623
                    rio[WS(vs, 4) + WS(rs, 1)] = FMA(T26, T27, T25);
624
                    iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T26, T24, T28);
625
               }
626
               {
627
                    E T42, T49, T43, T4a, T3X, T44;
628
                    T42 = FNMS(KP866025403, T41, T3Y);
629
                    T49 = FNMS(KP866025403, T48, T47);
630
                    T3X = W[2];
631
                    T43 = T3X * T42;
632
                    T4a = T3X * T49;
633
                    T44 = W[3];
634
                    rio[WS(vs, 2) + WS(rs, 3)] = FMA(T44, T49, T43);
635
                    iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T44, T42, T4a);
636
               }
637
               {
638
                    E T2I, T2L, T2J, T2M, T2H, T2K;
639
                    T2I = FNMS(KP866025403, T2v, T2o);
640
                    T2L = FNMS(KP866025403, T2E, T2D);
641
                    T2H = W[8];
642
                    T2J = T2H * T2I;
643
                    T2M = T2H * T2L;
644
                    T2K = W[9];
645
                    rio[WS(vs, 5) + WS(rs, 2)] = FMA(T2K, T2L, T2J);
646
                    iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T2K, T2I, T2M);
647
               }
648
               {
649
                    E T4Q, T4T, T4R, T4U, T4P, T4S;
650
                    T4Q = FNMS(KP866025403, T4D, T4w);
651
                    T4T = FNMS(KP866025403, T4M, T4L);
652
                    T4P = W[8];
653
                    T4R = T4P * T4Q;
654
                    T4U = T4P * T4T;
655
                    T4S = W[9];
656
                    rio[WS(vs, 5) + WS(rs, 4)] = FMA(T4S, T4T, T4R);
657
                    iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T4S, T4Q, T4U);
658
               }
659
               {
660
                    E T1U, T21, T1V, T22, T1P, T1W;
661
                    T1U = FNMS(KP866025403, T1T, T1Q);
662
                    T21 = FNMS(KP866025403, T20, T1Z);
663
                    T1P = W[2];
664
                    T1V = T1P * T1U;
665
                    T22 = T1P * T21;
666
                    T1W = W[3];
667
                    rio[WS(vs, 2) + WS(rs, 1)] = FMA(T1W, T21, T1V);
668
                    iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T1W, T1U, T22);
669
               }
670
               {
671
                    E T4c, T4f, T4d, T4g, T4b, T4e;
672
                    T4c = FMA(KP866025403, T41, T3Y);
673
                    T4f = FMA(KP866025403, T48, T47);
674
                    T4b = W[6];
675
                    T4d = T4b * T4c;
676
                    T4g = T4b * T4f;
677
                    T4e = W[7];
678
                    rio[WS(vs, 4) + WS(rs, 3)] = FMA(T4e, T4f, T4d);
679
                    iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T4e, T4c, T4g);
680
               }
681
          }
682
     }
683
}
684

    
685
static const tw_instr twinstr[] = {
686
     {TW_FULL, 0, 6},
687
     {TW_NEXT, 1, 0}
688
};
689

    
690
static const ct_desc desc = { 6, "q1_6", twinstr, &GENUS, {144, 60, 132, 0}, 0, 0, 0 };
691

    
692
void X(codelet_q1_6) (planner *p) {
693
     X(kdft_difsq_register) (p, q1_6, &desc);
694
}
695
#else
696

    
697
/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 6 -name q1_6 -include dft/scalar/q.h */
698

    
699
/*
700
 * This function contains 276 FP additions, 168 FP multiplications,
701
 * (or, 192 additions, 84 multiplications, 84 fused multiply/add),
702
 * 85 stack variables, 2 constants, and 144 memory accesses
703
 */
704
#include "dft/scalar/q.h"
705

    
706
static void q1_6(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
707
{
708
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
709
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
710
     {
711
          INT m;
712
          for (m = mb, W = W + (mb * 10); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 10, MAKE_VOLATILE_STRIDE(12, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
713
               E T3, Tc, Tt, TM, TX, T16, T1n, T1G, T2h, T2A, T1R, T20, T2L, T2U, T3b;
714
               E T3u, T3F, T3O, T45, T4o, T4Z, T5i, T4z, T4I, Ta, TP, Tf, Tq, Tn, TN;
715
               E Tu, TJ, T14, T1J, T19, T1k, T1h, T1H, T1o, T1D, T2b, T2B, T2i, T2x, T1Y;
716
               E T2D, T23, T2e, T2S, T3x, T2X, T38, T35, T3v, T3c, T3r, T3M, T4r, T3R, T42;
717
               E T3Z, T4p, T46, T4l, T4T, T5j, T50, T5f, T4G, T5l, T4L, T4W;
718
               {
719
                    E T1, T2, T1l, T1m;
720
                    T1 = rio[0];
721
                    T2 = rio[WS(rs, 3)];
722
                    T3 = T1 + T2;
723
                    Tc = T1 - T2;
724
                    {
725
                         E Tr, Ts, TV, TW;
726
                         Tr = iio[0];
727
                         Ts = iio[WS(rs, 3)];
728
                         Tt = Tr - Ts;
729
                         TM = Tr + Ts;
730
                         TV = rio[WS(vs, 1)];
731
                         TW = rio[WS(vs, 1) + WS(rs, 3)];
732
                         TX = TV + TW;
733
                         T16 = TV - TW;
734
                    }
735
                    T1l = iio[WS(vs, 1)];
736
                    T1m = iio[WS(vs, 1) + WS(rs, 3)];
737
                    T1n = T1l - T1m;
738
                    T1G = T1l + T1m;
739
                    {
740
                         E T2f, T2g, T1P, T1Q;
741
                         T2f = iio[WS(vs, 2)];
742
                         T2g = iio[WS(vs, 2) + WS(rs, 3)];
743
                         T2h = T2f - T2g;
744
                         T2A = T2f + T2g;
745
                         T1P = rio[WS(vs, 2)];
746
                         T1Q = rio[WS(vs, 2) + WS(rs, 3)];
747
                         T1R = T1P + T1Q;
748
                         T20 = T1P - T1Q;
749
                    }
750
               }
751
               {
752
                    E T2J, T2K, T43, T44;
753
                    T2J = rio[WS(vs, 3)];
754
                    T2K = rio[WS(vs, 3) + WS(rs, 3)];
755
                    T2L = T2J + T2K;
756
                    T2U = T2J - T2K;
757
                    {
758
                         E T39, T3a, T3D, T3E;
759
                         T39 = iio[WS(vs, 3)];
760
                         T3a = iio[WS(vs, 3) + WS(rs, 3)];
761
                         T3b = T39 - T3a;
762
                         T3u = T39 + T3a;
763
                         T3D = rio[WS(vs, 4)];
764
                         T3E = rio[WS(vs, 4) + WS(rs, 3)];
765
                         T3F = T3D + T3E;
766
                         T3O = T3D - T3E;
767
                    }
768
                    T43 = iio[WS(vs, 4)];
769
                    T44 = iio[WS(vs, 4) + WS(rs, 3)];
770
                    T45 = T43 - T44;
771
                    T4o = T43 + T44;
772
                    {
773
                         E T4X, T4Y, T4x, T4y;
774
                         T4X = iio[WS(vs, 5)];
775
                         T4Y = iio[WS(vs, 5) + WS(rs, 3)];
776
                         T4Z = T4X - T4Y;
777
                         T5i = T4X + T4Y;
778
                         T4x = rio[WS(vs, 5)];
779
                         T4y = rio[WS(vs, 5) + WS(rs, 3)];
780
                         T4z = T4x + T4y;
781
                         T4I = T4x - T4y;
782
                    }
783
               }
784
               {
785
                    E T6, Td, T9, Te;
786
                    {
787
                         E T4, T5, T7, T8;
788
                         T4 = rio[WS(rs, 2)];
789
                         T5 = rio[WS(rs, 5)];
790
                         T6 = T4 + T5;
791
                         Td = T4 - T5;
792
                         T7 = rio[WS(rs, 4)];
793
                         T8 = rio[WS(rs, 1)];
794
                         T9 = T7 + T8;
795
                         Te = T7 - T8;
796
                    }
797
                    Ta = T6 + T9;
798
                    TP = KP866025403 * (T9 - T6);
799
                    Tf = Td + Te;
800
                    Tq = KP866025403 * (Te - Td);
801
               }
802
               {
803
                    E Tj, TH, Tm, TI;
804
                    {
805
                         E Th, Ti, Tk, Tl;
806
                         Th = iio[WS(rs, 2)];
807
                         Ti = iio[WS(rs, 5)];
808
                         Tj = Th - Ti;
809
                         TH = Th + Ti;
810
                         Tk = iio[WS(rs, 4)];
811
                         Tl = iio[WS(rs, 1)];
812
                         Tm = Tk - Tl;
813
                         TI = Tk + Tl;
814
                    }
815
                    Tn = KP866025403 * (Tj - Tm);
816
                    TN = TH + TI;
817
                    Tu = Tj + Tm;
818
                    TJ = KP866025403 * (TH - TI);
819
               }
820
               {
821
                    E T10, T17, T13, T18;
822
                    {
823
                         E TY, TZ, T11, T12;
824
                         TY = rio[WS(vs, 1) + WS(rs, 2)];
825
                         TZ = rio[WS(vs, 1) + WS(rs, 5)];
826
                         T10 = TY + TZ;
827
                         T17 = TY - TZ;
828
                         T11 = rio[WS(vs, 1) + WS(rs, 4)];
829
                         T12 = rio[WS(vs, 1) + WS(rs, 1)];
830
                         T13 = T11 + T12;
831
                         T18 = T11 - T12;
832
                    }
833
                    T14 = T10 + T13;
834
                    T1J = KP866025403 * (T13 - T10);
835
                    T19 = T17 + T18;
836
                    T1k = KP866025403 * (T18 - T17);
837
               }
838
               {
839
                    E T1d, T1B, T1g, T1C;
840
                    {
841
                         E T1b, T1c, T1e, T1f;
842
                         T1b = iio[WS(vs, 1) + WS(rs, 2)];
843
                         T1c = iio[WS(vs, 1) + WS(rs, 5)];
844
                         T1d = T1b - T1c;
845
                         T1B = T1b + T1c;
846
                         T1e = iio[WS(vs, 1) + WS(rs, 4)];
847
                         T1f = iio[WS(vs, 1) + WS(rs, 1)];
848
                         T1g = T1e - T1f;
849
                         T1C = T1e + T1f;
850
                    }
851
                    T1h = KP866025403 * (T1d - T1g);
852
                    T1H = T1B + T1C;
853
                    T1o = T1d + T1g;
854
                    T1D = KP866025403 * (T1B - T1C);
855
               }
856
               {
857
                    E T27, T2v, T2a, T2w;
858
                    {
859
                         E T25, T26, T28, T29;
860
                         T25 = iio[WS(vs, 2) + WS(rs, 2)];
861
                         T26 = iio[WS(vs, 2) + WS(rs, 5)];
862
                         T27 = T25 - T26;
863
                         T2v = T25 + T26;
864
                         T28 = iio[WS(vs, 2) + WS(rs, 4)];
865
                         T29 = iio[WS(vs, 2) + WS(rs, 1)];
866
                         T2a = T28 - T29;
867
                         T2w = T28 + T29;
868
                    }
869
                    T2b = KP866025403 * (T27 - T2a);
870
                    T2B = T2v + T2w;
871
                    T2i = T27 + T2a;
872
                    T2x = KP866025403 * (T2v - T2w);
873
               }
874
               {
875
                    E T1U, T21, T1X, T22;
876
                    {
877
                         E T1S, T1T, T1V, T1W;
878
                         T1S = rio[WS(vs, 2) + WS(rs, 2)];
879
                         T1T = rio[WS(vs, 2) + WS(rs, 5)];
880
                         T1U = T1S + T1T;
881
                         T21 = T1S - T1T;
882
                         T1V = rio[WS(vs, 2) + WS(rs, 4)];
883
                         T1W = rio[WS(vs, 2) + WS(rs, 1)];
884
                         T1X = T1V + T1W;
885
                         T22 = T1V - T1W;
886
                    }
887
                    T1Y = T1U + T1X;
888
                    T2D = KP866025403 * (T1X - T1U);
889
                    T23 = T21 + T22;
890
                    T2e = KP866025403 * (T22 - T21);
891
               }
892
               {
893
                    E T2O, T2V, T2R, T2W;
894
                    {
895
                         E T2M, T2N, T2P, T2Q;
896
                         T2M = rio[WS(vs, 3) + WS(rs, 2)];
897
                         T2N = rio[WS(vs, 3) + WS(rs, 5)];
898
                         T2O = T2M + T2N;
899
                         T2V = T2M - T2N;
900
                         T2P = rio[WS(vs, 3) + WS(rs, 4)];
901
                         T2Q = rio[WS(vs, 3) + WS(rs, 1)];
902
                         T2R = T2P + T2Q;
903
                         T2W = T2P - T2Q;
904
                    }
905
                    T2S = T2O + T2R;
906
                    T3x = KP866025403 * (T2R - T2O);
907
                    T2X = T2V + T2W;
908
                    T38 = KP866025403 * (T2W - T2V);
909
               }
910
               {
911
                    E T31, T3p, T34, T3q;
912
                    {
913
                         E T2Z, T30, T32, T33;
914
                         T2Z = iio[WS(vs, 3) + WS(rs, 2)];
915
                         T30 = iio[WS(vs, 3) + WS(rs, 5)];
916
                         T31 = T2Z - T30;
917
                         T3p = T2Z + T30;
918
                         T32 = iio[WS(vs, 3) + WS(rs, 4)];
919
                         T33 = iio[WS(vs, 3) + WS(rs, 1)];
920
                         T34 = T32 - T33;
921
                         T3q = T32 + T33;
922
                    }
923
                    T35 = KP866025403 * (T31 - T34);
924
                    T3v = T3p + T3q;
925
                    T3c = T31 + T34;
926
                    T3r = KP866025403 * (T3p - T3q);
927
               }
928
               {
929
                    E T3I, T3P, T3L, T3Q;
930
                    {
931
                         E T3G, T3H, T3J, T3K;
932
                         T3G = rio[WS(vs, 4) + WS(rs, 2)];
933
                         T3H = rio[WS(vs, 4) + WS(rs, 5)];
934
                         T3I = T3G + T3H;
935
                         T3P = T3G - T3H;
936
                         T3J = rio[WS(vs, 4) + WS(rs, 4)];
937
                         T3K = rio[WS(vs, 4) + WS(rs, 1)];
938
                         T3L = T3J + T3K;
939
                         T3Q = T3J - T3K;
940
                    }
941
                    T3M = T3I + T3L;
942
                    T4r = KP866025403 * (T3L - T3I);
943
                    T3R = T3P + T3Q;
944
                    T42 = KP866025403 * (T3Q - T3P);
945
               }
946
               {
947
                    E T3V, T4j, T3Y, T4k;
948
                    {
949
                         E T3T, T3U, T3W, T3X;
950
                         T3T = iio[WS(vs, 4) + WS(rs, 2)];
951
                         T3U = iio[WS(vs, 4) + WS(rs, 5)];
952
                         T3V = T3T - T3U;
953
                         T4j = T3T + T3U;
954
                         T3W = iio[WS(vs, 4) + WS(rs, 4)];
955
                         T3X = iio[WS(vs, 4) + WS(rs, 1)];
956
                         T3Y = T3W - T3X;
957
                         T4k = T3W + T3X;
958
                    }
959
                    T3Z = KP866025403 * (T3V - T3Y);
960
                    T4p = T4j + T4k;
961
                    T46 = T3V + T3Y;
962
                    T4l = KP866025403 * (T4j - T4k);
963
               }
964
               {
965
                    E T4P, T5d, T4S, T5e;
966
                    {
967
                         E T4N, T4O, T4Q, T4R;
968
                         T4N = iio[WS(vs, 5) + WS(rs, 2)];
969
                         T4O = iio[WS(vs, 5) + WS(rs, 5)];
970
                         T4P = T4N - T4O;
971
                         T5d = T4N + T4O;
972
                         T4Q = iio[WS(vs, 5) + WS(rs, 4)];
973
                         T4R = iio[WS(vs, 5) + WS(rs, 1)];
974
                         T4S = T4Q - T4R;
975
                         T5e = T4Q + T4R;
976
                    }
977
                    T4T = KP866025403 * (T4P - T4S);
978
                    T5j = T5d + T5e;
979
                    T50 = T4P + T4S;
980
                    T5f = KP866025403 * (T5d - T5e);
981
               }
982
               {
983
                    E T4C, T4J, T4F, T4K;
984
                    {
985
                         E T4A, T4B, T4D, T4E;
986
                         T4A = rio[WS(vs, 5) + WS(rs, 2)];
987
                         T4B = rio[WS(vs, 5) + WS(rs, 5)];
988
                         T4C = T4A + T4B;
989
                         T4J = T4A - T4B;
990
                         T4D = rio[WS(vs, 5) + WS(rs, 4)];
991
                         T4E = rio[WS(vs, 5) + WS(rs, 1)];
992
                         T4F = T4D + T4E;
993
                         T4K = T4D - T4E;
994
                    }
995
                    T4G = T4C + T4F;
996
                    T5l = KP866025403 * (T4F - T4C);
997
                    T4L = T4J + T4K;
998
                    T4W = KP866025403 * (T4K - T4J);
999
               }
1000
               rio[0] = T3 + Ta;
1001
               iio[0] = TM + TN;
1002
               rio[WS(rs, 1)] = TX + T14;
1003
               iio[WS(rs, 1)] = T1G + T1H;
1004
               rio[WS(rs, 3)] = T2L + T2S;
1005
               rio[WS(rs, 2)] = T1R + T1Y;
1006
               iio[WS(rs, 2)] = T2A + T2B;
1007
               iio[WS(rs, 3)] = T3u + T3v;
1008
               iio[WS(rs, 4)] = T4o + T4p;
1009
               iio[WS(rs, 5)] = T5i + T5j;
1010
               rio[WS(rs, 5)] = T4z + T4G;
1011
               rio[WS(rs, 4)] = T3F + T3M;
1012
               {
1013
                    E T1w, T1y, T1v, T1x;
1014
                    T1w = T16 + T19;
1015
                    T1y = T1n + T1o;
1016
                    T1v = W[4];
1017
                    T1x = W[5];
1018
                    rio[WS(vs, 3) + WS(rs, 1)] = FMA(T1v, T1w, T1x * T1y);
1019
                    iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T1x, T1w, T1v * T1y);
1020
               }
1021
               {
1022
                    E T58, T5a, T57, T59;
1023
                    T58 = T4I + T4L;
1024
                    T5a = T4Z + T50;
1025
                    T57 = W[4];
1026
                    T59 = W[5];
1027
                    rio[WS(vs, 3) + WS(rs, 5)] = FMA(T57, T58, T59 * T5a);
1028
                    iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T59, T58, T57 * T5a);
1029
               }
1030
               {
1031
                    E TC, TE, TB, TD;
1032
                    TC = Tc + Tf;
1033
                    TE = Tt + Tu;
1034
                    TB = W[4];
1035
                    TD = W[5];
1036
                    rio[WS(vs, 3)] = FMA(TB, TC, TD * TE);
1037
                    iio[WS(vs, 3)] = FNMS(TD, TC, TB * TE);
1038
               }
1039
               {
1040
                    E T4e, T4g, T4d, T4f;
1041
                    T4e = T3O + T3R;
1042
                    T4g = T45 + T46;
1043
                    T4d = W[4];
1044
                    T4f = W[5];
1045
                    rio[WS(vs, 3) + WS(rs, 4)] = FMA(T4d, T4e, T4f * T4g);
1046
                    iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T4f, T4e, T4d * T4g);
1047
               }
1048
               {
1049
                    E T3k, T3m, T3j, T3l;
1050
                    T3k = T2U + T2X;
1051
                    T3m = T3b + T3c;
1052
                    T3j = W[4];
1053
                    T3l = W[5];
1054
                    rio[WS(vs, 3) + WS(rs, 3)] = FMA(T3j, T3k, T3l * T3m);
1055
                    iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T3l, T3k, T3j * T3m);
1056
               }
1057
               {
1058
                    E T2q, T2s, T2p, T2r;
1059
                    T2q = T20 + T23;
1060
                    T2s = T2h + T2i;
1061
                    T2p = W[4];
1062
                    T2r = W[5];
1063
                    rio[WS(vs, 3) + WS(rs, 2)] = FMA(T2p, T2q, T2r * T2s);
1064
                    iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T2r, T2q, T2p * T2s);
1065
               }
1066
               {
1067
                    E T5g, T5o, T5m, T5q, T5c, T5k;
1068
                    T5c = FNMS(KP500000000, T4G, T4z);
1069
                    T5g = T5c - T5f;
1070
                    T5o = T5c + T5f;
1071
                    T5k = FNMS(KP500000000, T5j, T5i);
1072
                    T5m = T5k - T5l;
1073
                    T5q = T5l + T5k;
1074
                    {
1075
                         E T5b, T5h, T5n, T5p;
1076
                         T5b = W[2];
1077
                         T5h = W[3];
1078
                         rio[WS(vs, 2) + WS(rs, 5)] = FMA(T5b, T5g, T5h * T5m);
1079
                         iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T5h, T5g, T5b * T5m);
1080
                         T5n = W[6];
1081
                         T5p = W[7];
1082
                         rio[WS(vs, 4) + WS(rs, 5)] = FMA(T5n, T5o, T5p * T5q);
1083
                         iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T5p, T5o, T5n * T5q);
1084
                    }
1085
               }
1086
               {
1087
                    E To, Ty, Tw, TA, Tg, Tv;
1088
                    Tg = FNMS(KP500000000, Tf, Tc);
1089
                    To = Tg + Tn;
1090
                    Ty = Tg - Tn;
1091
                    Tv = FNMS(KP500000000, Tu, Tt);
1092
                    Tw = Tq + Tv;
1093
                    TA = Tv - Tq;
1094
                    {
1095
                         E Tb, Tp, Tx, Tz;
1096
                         Tb = W[0];
1097
                         Tp = W[1];
1098
                         rio[WS(vs, 1)] = FMA(Tb, To, Tp * Tw);
1099
                         iio[WS(vs, 1)] = FNMS(Tp, To, Tb * Tw);
1100
                         Tx = W[8];
1101
                         Tz = W[9];
1102
                         rio[WS(vs, 5)] = FMA(Tx, Ty, Tz * TA);
1103
                         iio[WS(vs, 5)] = FNMS(Tz, Ty, Tx * TA);
1104
                    }
1105
               }
1106
               {
1107
                    E T36, T3g, T3e, T3i, T2Y, T3d;
1108
                    T2Y = FNMS(KP500000000, T2X, T2U);
1109
                    T36 = T2Y + T35;
1110
                    T3g = T2Y - T35;
1111
                    T3d = FNMS(KP500000000, T3c, T3b);
1112
                    T3e = T38 + T3d;
1113
                    T3i = T3d - T38;
1114
                    {
1115
                         E T2T, T37, T3f, T3h;
1116
                         T2T = W[0];
1117
                         T37 = W[1];
1118
                         rio[WS(vs, 1) + WS(rs, 3)] = FMA(T2T, T36, T37 * T3e);
1119
                         iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T37, T36, T2T * T3e);
1120
                         T3f = W[8];
1121
                         T3h = W[9];
1122
                         rio[WS(vs, 5) + WS(rs, 3)] = FMA(T3f, T3g, T3h * T3i);
1123
                         iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T3h, T3g, T3f * T3i);
1124
                    }
1125
               }
1126
               {
1127
                    E T2y, T2G, T2E, T2I, T2u, T2C;
1128
                    T2u = FNMS(KP500000000, T1Y, T1R);
1129
                    T2y = T2u - T2x;
1130
                    T2G = T2u + T2x;
1131
                    T2C = FNMS(KP500000000, T2B, T2A);
1132
                    T2E = T2C - T2D;
1133
                    T2I = T2D + T2C;
1134
                    {
1135
                         E T2t, T2z, T2F, T2H;
1136
                         T2t = W[2];
1137
                         T2z = W[3];
1138
                         rio[WS(vs, 2) + WS(rs, 2)] = FMA(T2t, T2y, T2z * T2E);
1139
                         iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T2z, T2y, T2t * T2E);
1140
                         T2F = W[6];
1141
                         T2H = W[7];
1142
                         rio[WS(vs, 4) + WS(rs, 2)] = FMA(T2F, T2G, T2H * T2I);
1143
                         iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T2H, T2G, T2F * T2I);
1144
                    }
1145
               }
1146
               {
1147
                    E T3s, T3A, T3y, T3C, T3o, T3w;
1148
                    T3o = FNMS(KP500000000, T2S, T2L);
1149
                    T3s = T3o - T3r;
1150
                    T3A = T3o + T3r;
1151
                    T3w = FNMS(KP500000000, T3v, T3u);
1152
                    T3y = T3w - T3x;
1153
                    T3C = T3x + T3w;
1154
                    {
1155
                         E T3n, T3t, T3z, T3B;
1156
                         T3n = W[2];
1157
                         T3t = W[3];
1158
                         rio[WS(vs, 2) + WS(rs, 3)] = FMA(T3n, T3s, T3t * T3y);
1159
                         iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T3t, T3s, T3n * T3y);
1160
                         T3z = W[6];
1161
                         T3B = W[7];
1162
                         rio[WS(vs, 4) + WS(rs, 3)] = FMA(T3z, T3A, T3B * T3C);
1163
                         iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T3B, T3A, T3z * T3C);
1164
                    }
1165
               }
1166
               {
1167
                    E T1E, T1M, T1K, T1O, T1A, T1I;
1168
                    T1A = FNMS(KP500000000, T14, TX);
1169
                    T1E = T1A - T1D;
1170
                    T1M = T1A + T1D;
1171
                    T1I = FNMS(KP500000000, T1H, T1G);
1172
                    T1K = T1I - T1J;
1173
                    T1O = T1J + T1I;
1174
                    {
1175
                         E T1z, T1F, T1L, T1N;
1176
                         T1z = W[2];
1177
                         T1F = W[3];
1178
                         rio[WS(vs, 2) + WS(rs, 1)] = FMA(T1z, T1E, T1F * T1K);
1179
                         iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T1F, T1E, T1z * T1K);
1180
                         T1L = W[6];
1181
                         T1N = W[7];
1182
                         rio[WS(vs, 4) + WS(rs, 1)] = FMA(T1L, T1M, T1N * T1O);
1183
                         iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T1N, T1M, T1L * T1O);
1184
                    }
1185
               }
1186
               {
1187
                    E T4m, T4u, T4s, T4w, T4i, T4q;
1188
                    T4i = FNMS(KP500000000, T3M, T3F);
1189
                    T4m = T4i - T4l;
1190
                    T4u = T4i + T4l;
1191
                    T4q = FNMS(KP500000000, T4p, T4o);
1192
                    T4s = T4q - T4r;
1193
                    T4w = T4r + T4q;
1194
                    {
1195
                         E T4h, T4n, T4t, T4v;
1196
                         T4h = W[2];
1197
                         T4n = W[3];
1198
                         rio[WS(vs, 2) + WS(rs, 4)] = FMA(T4h, T4m, T4n * T4s);
1199
                         iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T4n, T4m, T4h * T4s);
1200
                         T4t = W[6];
1201
                         T4v = W[7];
1202
                         rio[WS(vs, 4) + WS(rs, 4)] = FMA(T4t, T4u, T4v * T4w);
1203
                         iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T4v, T4u, T4t * T4w);
1204
                    }
1205
               }
1206
               {
1207
                    E TK, TS, TQ, TU, TG, TO;
1208
                    TG = FNMS(KP500000000, Ta, T3);
1209
                    TK = TG - TJ;
1210
                    TS = TG + TJ;
1211
                    TO = FNMS(KP500000000, TN, TM);
1212
                    TQ = TO - TP;
1213
                    TU = TP + TO;
1214
                    {
1215
                         E TF, TL, TR, TT;
1216
                         TF = W[2];
1217
                         TL = W[3];
1218
                         rio[WS(vs, 2)] = FMA(TF, TK, TL * TQ);
1219
                         iio[WS(vs, 2)] = FNMS(TL, TK, TF * TQ);
1220
                         TR = W[6];
1221
                         TT = W[7];
1222
                         rio[WS(vs, 4)] = FMA(TR, TS, TT * TU);
1223
                         iio[WS(vs, 4)] = FNMS(TT, TS, TR * TU);
1224
                    }
1225
               }
1226
               {
1227
                    E T2c, T2m, T2k, T2o, T24, T2j;
1228
                    T24 = FNMS(KP500000000, T23, T20);
1229
                    T2c = T24 + T2b;
1230
                    T2m = T24 - T2b;
1231
                    T2j = FNMS(KP500000000, T2i, T2h);
1232
                    T2k = T2e + T2j;
1233
                    T2o = T2j - T2e;
1234
                    {
1235
                         E T1Z, T2d, T2l, T2n;
1236
                         T1Z = W[0];
1237
                         T2d = W[1];
1238
                         rio[WS(vs, 1) + WS(rs, 2)] = FMA(T1Z, T2c, T2d * T2k);
1239
                         iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T2d, T2c, T1Z * T2k);
1240
                         T2l = W[8];
1241
                         T2n = W[9];
1242
                         rio[WS(vs, 5) + WS(rs, 2)] = FMA(T2l, T2m, T2n * T2o);
1243
                         iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T2n, T2m, T2l * T2o);
1244
                    }
1245
               }
1246
               {
1247
                    E T40, T4a, T48, T4c, T3S, T47;
1248
                    T3S = FNMS(KP500000000, T3R, T3O);
1249
                    T40 = T3S + T3Z;
1250
                    T4a = T3S - T3Z;
1251
                    T47 = FNMS(KP500000000, T46, T45);
1252
                    T48 = T42 + T47;
1253
                    T4c = T47 - T42;
1254
                    {
1255
                         E T3N, T41, T49, T4b;
1256
                         T3N = W[0];
1257
                         T41 = W[1];
1258
                         rio[WS(vs, 1) + WS(rs, 4)] = FMA(T3N, T40, T41 * T48);
1259
                         iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T41, T40, T3N * T48);
1260
                         T49 = W[8];
1261
                         T4b = W[9];
1262
                         rio[WS(vs, 5) + WS(rs, 4)] = FMA(T49, T4a, T4b * T4c);
1263
                         iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T4b, T4a, T49 * T4c);
1264
                    }
1265
               }
1266
               {
1267
                    E T1i, T1s, T1q, T1u, T1a, T1p;
1268
                    T1a = FNMS(KP500000000, T19, T16);
1269
                    T1i = T1a + T1h;
1270
                    T1s = T1a - T1h;
1271
                    T1p = FNMS(KP500000000, T1o, T1n);
1272
                    T1q = T1k + T1p;
1273
                    T1u = T1p - T1k;
1274
                    {
1275
                         E T15, T1j, T1r, T1t;
1276
                         T15 = W[0];
1277
                         T1j = W[1];
1278
                         rio[WS(vs, 1) + WS(rs, 1)] = FMA(T15, T1i, T1j * T1q);
1279
                         iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T1j, T1i, T15 * T1q);
1280
                         T1r = W[8];
1281
                         T1t = W[9];
1282
                         rio[WS(vs, 5) + WS(rs, 1)] = FMA(T1r, T1s, T1t * T1u);
1283
                         iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T1t, T1s, T1r * T1u);
1284
                    }
1285
               }
1286
               {
1287
                    E T4U, T54, T52, T56, T4M, T51;
1288
                    T4M = FNMS(KP500000000, T4L, T4I);
1289
                    T4U = T4M + T4T;
1290
                    T54 = T4M - T4T;
1291
                    T51 = FNMS(KP500000000, T50, T4Z);
1292
                    T52 = T4W + T51;
1293
                    T56 = T51 - T4W;
1294
                    {
1295
                         E T4H, T4V, T53, T55;
1296
                         T4H = W[0];
1297
                         T4V = W[1];
1298
                         rio[WS(vs, 1) + WS(rs, 5)] = FMA(T4H, T4U, T4V * T52);
1299
                         iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T4V, T4U, T4H * T52);
1300
                         T53 = W[8];
1301
                         T55 = W[9];
1302
                         rio[WS(vs, 5) + WS(rs, 5)] = FMA(T53, T54, T55 * T56);
1303
                         iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T55, T54, T53 * T56);
1304
                    }
1305
               }
1306
          }
1307
     }
1308
}
1309

    
1310
static const tw_instr twinstr[] = {
1311
     {TW_FULL, 0, 6},
1312
     {TW_NEXT, 1, 0}
1313
};
1314

    
1315
static const ct_desc desc = { 6, "q1_6", twinstr, &GENUS, {192, 84, 84, 0}, 0, 0, 0 };
1316

    
1317
void X(codelet_q1_6) (planner *p) {
1318
     X(kdft_difsq_register) (p, q1_6, &desc);
1319
}
1320
#endif