To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_16.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (14.5 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:11 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 16 -name n1_16 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 144 FP additions, 40 FP multiplications,
32
 * (or, 104 additions, 0 multiplications, 40 fused multiply/add),
33
 * 50 stack variables, 3 constants, and 64 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
40
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
41
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
42
     {
43
          INT i;
44
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) {
45
               E T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z;
46
               E T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B;
47
               E T1U, T1A;
48
               {
49
                    E T3, TL, Ty, T1k, T6, T1j, TB, TM;
50
                    {
51
                         E T1, T2, Tw, Tx;
52
                         T1 = ri[0];
53
                         T2 = ri[WS(is, 8)];
54
                         T3 = T1 + T2;
55
                         TL = T1 - T2;
56
                         Tw = ii[0];
57
                         Tx = ii[WS(is, 8)];
58
                         Ty = Tw + Tx;
59
                         T1k = Tw - Tx;
60
                    }
61
                    {
62
                         E T4, T5, Tz, TA;
63
                         T4 = ri[WS(is, 4)];
64
                         T5 = ri[WS(is, 12)];
65
                         T6 = T4 + T5;
66
                         T1j = T4 - T5;
67
                         Tz = ii[WS(is, 4)];
68
                         TA = ii[WS(is, 12)];
69
                         TB = Tz + TA;
70
                         TM = Tz - TA;
71
                    }
72
                    T7 = T3 + T6;
73
                    T1R = T3 - T6;
74
                    T25 = Ty - TB;
75
                    TC = Ty + TB;
76
                    TN = TL - TM;
77
                    T1x = TL + TM;
78
                    T1H = T1k - T1j;
79
                    T1l = T1j + T1k;
80
               }
81
               {
82
                    E Tp, T1c, T1a, T20, Ts, T17, T1f, T21;
83
                    {
84
                         E Tn, To, T18, T19;
85
                         Tn = ri[WS(is, 15)];
86
                         To = ri[WS(is, 7)];
87
                         Tp = Tn + To;
88
                         T1c = Tn - To;
89
                         T18 = ii[WS(is, 15)];
90
                         T19 = ii[WS(is, 7)];
91
                         T1a = T18 - T19;
92
                         T20 = T18 + T19;
93
                    }
94
                    {
95
                         E Tq, Tr, T1d, T1e;
96
                         Tq = ri[WS(is, 3)];
97
                         Tr = ri[WS(is, 11)];
98
                         Ts = Tq + Tr;
99
                         T17 = Tq - Tr;
100
                         T1d = ii[WS(is, 3)];
101
                         T1e = ii[WS(is, 11)];
102
                         T1f = T1d - T1e;
103
                         T21 = T1d + T1e;
104
                    }
105
                    Tt = Tp + Ts;
106
                    T22 = T20 - T21;
107
                    T2h = T20 + T21;
108
                    T1b = T17 + T1a;
109
                    T1g = T1c - T1f;
110
                    T1E = T1a - T17;
111
                    T1Z = Tp - Ts;
112
                    T1D = T1c + T1f;
113
               }
114
               {
115
                    E Ta, TP, TF, TO, Td, TR, TI, TS;
116
                    {
117
                         E T8, T9, TD, TE;
118
                         T8 = ri[WS(is, 2)];
119
                         T9 = ri[WS(is, 10)];
120
                         Ta = T8 + T9;
121
                         TP = T8 - T9;
122
                         TD = ii[WS(is, 2)];
123
                         TE = ii[WS(is, 10)];
124
                         TF = TD + TE;
125
                         TO = TD - TE;
126
                    }
127
                    {
128
                         E Tb, Tc, TG, TH;
129
                         Tb = ri[WS(is, 14)];
130
                         Tc = ri[WS(is, 6)];
131
                         Td = Tb + Tc;
132
                         TR = Tb - Tc;
133
                         TG = ii[WS(is, 14)];
134
                         TH = ii[WS(is, 6)];
135
                         TI = TG + TH;
136
                         TS = TG - TH;
137
                    }
138
                    Te = Ta + Td;
139
                    T1S = TF - TI;
140
                    T26 = Td - Ta;
141
                    TJ = TF + TI;
142
                    TQ = TO - TP;
143
                    T1m = TR - TS;
144
                    T1n = TP + TO;
145
                    TT = TR + TS;
146
               }
147
               {
148
                    E Ti, T11, TZ, T1V, Tl, TW, T14, T1W;
149
                    {
150
                         E Tg, Th, TX, TY;
151
                         Tg = ri[WS(is, 1)];
152
                         Th = ri[WS(is, 9)];
153
                         Ti = Tg + Th;
154
                         T11 = Tg - Th;
155
                         TX = ii[WS(is, 1)];
156
                         TY = ii[WS(is, 9)];
157
                         TZ = TX - TY;
158
                         T1V = TX + TY;
159
                    }
160
                    {
161
                         E Tj, Tk, T12, T13;
162
                         Tj = ri[WS(is, 5)];
163
                         Tk = ri[WS(is, 13)];
164
                         Tl = Tj + Tk;
165
                         TW = Tj - Tk;
166
                         T12 = ii[WS(is, 5)];
167
                         T13 = ii[WS(is, 13)];
168
                         T14 = T12 - T13;
169
                         T1W = T12 + T13;
170
                    }
171
                    Tm = Ti + Tl;
172
                    T1X = T1V - T1W;
173
                    T2g = T1V + T1W;
174
                    T10 = TW + TZ;
175
                    T15 = T11 - T14;
176
                    T1B = TZ - TW;
177
                    T1U = Ti - Tl;
178
                    T1A = T11 + T14;
179
               }
180
               {
181
                    E Tf, Tu, T2j, T2k;
182
                    Tf = T7 + Te;
183
                    Tu = Tm + Tt;
184
                    ro[WS(os, 8)] = Tf - Tu;
185
                    ro[0] = Tf + Tu;
186
                    T2j = TC + TJ;
187
                    T2k = T2g + T2h;
188
                    io[WS(os, 8)] = T2j - T2k;
189
                    io[0] = T2j + T2k;
190
               }
191
               {
192
                    E Tv, TK, T2f, T2i;
193
                    Tv = Tt - Tm;
194
                    TK = TC - TJ;
195
                    io[WS(os, 4)] = Tv + TK;
196
                    io[WS(os, 12)] = TK - Tv;
197
                    T2f = T7 - Te;
198
                    T2i = T2g - T2h;
199
                    ro[WS(os, 12)] = T2f - T2i;
200
                    ro[WS(os, 4)] = T2f + T2i;
201
               }
202
               {
203
                    E T1T, T27, T24, T28, T1Y, T23;
204
                    T1T = T1R + T1S;
205
                    T27 = T25 - T26;
206
                    T1Y = T1U + T1X;
207
                    T23 = T1Z - T22;
208
                    T24 = T1Y + T23;
209
                    T28 = T23 - T1Y;
210
                    ro[WS(os, 10)] = FNMS(KP707106781, T24, T1T);
211
                    io[WS(os, 6)] = FMA(KP707106781, T28, T27);
212
                    ro[WS(os, 2)] = FMA(KP707106781, T24, T1T);
213
                    io[WS(os, 14)] = FNMS(KP707106781, T28, T27);
214
               }
215
               {
216
                    E T29, T2d, T2c, T2e, T2a, T2b;
217
                    T29 = T1R - T1S;
218
                    T2d = T26 + T25;
219
                    T2a = T1X - T1U;
220
                    T2b = T1Z + T22;
221
                    T2c = T2a - T2b;
222
                    T2e = T2a + T2b;
223
                    ro[WS(os, 14)] = FNMS(KP707106781, T2c, T29);
224
                    io[WS(os, 2)] = FMA(KP707106781, T2e, T2d);
225
                    ro[WS(os, 6)] = FMA(KP707106781, T2c, T29);
226
                    io[WS(os, 10)] = FNMS(KP707106781, T2e, T2d);
227
               }
228
               {
229
                    E TV, T1v, T1p, T1r, T1i, T1q, T1u, T1w, TU, T1o;
230
                    TU = TQ - TT;
231
                    TV = FMA(KP707106781, TU, TN);
232
                    T1v = FNMS(KP707106781, TU, TN);
233
                    T1o = T1m - T1n;
234
                    T1p = FNMS(KP707106781, T1o, T1l);
235
                    T1r = FMA(KP707106781, T1o, T1l);
236
                    {
237
                         E T16, T1h, T1s, T1t;
238
                         T16 = FMA(KP414213562, T15, T10);
239
                         T1h = FNMS(KP414213562, T1g, T1b);
240
                         T1i = T16 - T1h;
241
                         T1q = T16 + T1h;
242
                         T1s = FMA(KP414213562, T1b, T1g);
243
                         T1t = FNMS(KP414213562, T10, T15);
244
                         T1u = T1s - T1t;
245
                         T1w = T1t + T1s;
246
                    }
247
                    ro[WS(os, 11)] = FNMS(KP923879532, T1i, TV);
248
                    io[WS(os, 11)] = FNMS(KP923879532, T1u, T1r);
249
                    ro[WS(os, 3)] = FMA(KP923879532, T1i, TV);
250
                    io[WS(os, 3)] = FMA(KP923879532, T1u, T1r);
251
                    io[WS(os, 7)] = FNMS(KP923879532, T1q, T1p);
252
                    ro[WS(os, 7)] = FNMS(KP923879532, T1w, T1v);
253
                    io[WS(os, 15)] = FMA(KP923879532, T1q, T1p);
254
                    ro[WS(os, 15)] = FMA(KP923879532, T1w, T1v);
255
               }
256
               {
257
                    E T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I;
258
                    T1y = T1n + T1m;
259
                    T1z = FMA(KP707106781, T1y, T1x);
260
                    T1L = FNMS(KP707106781, T1y, T1x);
261
                    T1I = TQ + TT;
262
                    T1J = FNMS(KP707106781, T1I, T1H);
263
                    T1P = FMA(KP707106781, T1I, T1H);
264
                    {
265
                         E T1C, T1F, T1M, T1N;
266
                         T1C = FMA(KP414213562, T1B, T1A);
267
                         T1F = FNMS(KP414213562, T1E, T1D);
268
                         T1G = T1C + T1F;
269
                         T1K = T1F - T1C;
270
                         T1M = FNMS(KP414213562, T1A, T1B);
271
                         T1N = FMA(KP414213562, T1D, T1E);
272
                         T1O = T1M - T1N;
273
                         T1Q = T1M + T1N;
274
                    }
275
                    ro[WS(os, 9)] = FNMS(KP923879532, T1G, T1z);
276
                    io[WS(os, 9)] = FNMS(KP923879532, T1Q, T1P);
277
                    ro[WS(os, 1)] = FMA(KP923879532, T1G, T1z);
278
                    io[WS(os, 1)] = FMA(KP923879532, T1Q, T1P);
279
                    io[WS(os, 13)] = FNMS(KP923879532, T1K, T1J);
280
                    ro[WS(os, 13)] = FNMS(KP923879532, T1O, T1L);
281
                    io[WS(os, 5)] = FMA(KP923879532, T1K, T1J);
282
                    ro[WS(os, 5)] = FMA(KP923879532, T1O, T1L);
283
               }
284
          }
285
     }
286
}
287

    
288
static const kdft_desc desc = { 16, "n1_16", {104, 0, 40, 0}, &GENUS, 0, 0, 0, 0 };
289

    
290
void X(codelet_n1_16) (planner *p) {
291
     X(kdft_register) (p, n1_16, &desc);
292
}
293

    
294
#else
295

    
296
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 16 -name n1_16 -include dft/scalar/n.h */
297

    
298
/*
299
 * This function contains 144 FP additions, 24 FP multiplications,
300
 * (or, 136 additions, 16 multiplications, 8 fused multiply/add),
301
 * 50 stack variables, 3 constants, and 64 memory accesses
302
 */
303
#include "dft/scalar/n.h"
304

    
305
static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
306
{
307
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
308
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
309
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
310
     {
311
          INT i;
312
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(64, is), MAKE_VOLATILE_STRIDE(64, os)) {
313
               E T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z;
314
               E T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B;
315
               E T1U, T1A;
316
               {
317
                    E T3, TL, Ty, T1k, T6, T1j, TB, TM;
318
                    {
319
                         E T1, T2, Tw, Tx;
320
                         T1 = ri[0];
321
                         T2 = ri[WS(is, 8)];
322
                         T3 = T1 + T2;
323
                         TL = T1 - T2;
324
                         Tw = ii[0];
325
                         Tx = ii[WS(is, 8)];
326
                         Ty = Tw + Tx;
327
                         T1k = Tw - Tx;
328
                    }
329
                    {
330
                         E T4, T5, Tz, TA;
331
                         T4 = ri[WS(is, 4)];
332
                         T5 = ri[WS(is, 12)];
333
                         T6 = T4 + T5;
334
                         T1j = T4 - T5;
335
                         Tz = ii[WS(is, 4)];
336
                         TA = ii[WS(is, 12)];
337
                         TB = Tz + TA;
338
                         TM = Tz - TA;
339
                    }
340
                    T7 = T3 + T6;
341
                    T1R = T3 - T6;
342
                    T25 = Ty - TB;
343
                    TC = Ty + TB;
344
                    TN = TL - TM;
345
                    T1x = TL + TM;
346
                    T1H = T1k - T1j;
347
                    T1l = T1j + T1k;
348
               }
349
               {
350
                    E Tp, T17, T1f, T20, Ts, T1c, T1a, T21;
351
                    {
352
                         E Tn, To, T1d, T1e;
353
                         Tn = ri[WS(is, 15)];
354
                         To = ri[WS(is, 7)];
355
                         Tp = Tn + To;
356
                         T17 = Tn - To;
357
                         T1d = ii[WS(is, 15)];
358
                         T1e = ii[WS(is, 7)];
359
                         T1f = T1d - T1e;
360
                         T20 = T1d + T1e;
361
                    }
362
                    {
363
                         E Tq, Tr, T18, T19;
364
                         Tq = ri[WS(is, 3)];
365
                         Tr = ri[WS(is, 11)];
366
                         Ts = Tq + Tr;
367
                         T1c = Tq - Tr;
368
                         T18 = ii[WS(is, 3)];
369
                         T19 = ii[WS(is, 11)];
370
                         T1a = T18 - T19;
371
                         T21 = T18 + T19;
372
                    }
373
                    Tt = Tp + Ts;
374
                    T22 = T20 - T21;
375
                    T2h = T20 + T21;
376
                    T1b = T17 - T1a;
377
                    T1g = T1c + T1f;
378
                    T1E = T1f - T1c;
379
                    T1Z = Tp - Ts;
380
                    T1D = T17 + T1a;
381
               }
382
               {
383
                    E Ta, TP, TF, TO, Td, TR, TI, TS;
384
                    {
385
                         E T8, T9, TD, TE;
386
                         T8 = ri[WS(is, 2)];
387
                         T9 = ri[WS(is, 10)];
388
                         Ta = T8 + T9;
389
                         TP = T8 - T9;
390
                         TD = ii[WS(is, 2)];
391
                         TE = ii[WS(is, 10)];
392
                         TF = TD + TE;
393
                         TO = TD - TE;
394
                    }
395
                    {
396
                         E Tb, Tc, TG, TH;
397
                         Tb = ri[WS(is, 14)];
398
                         Tc = ri[WS(is, 6)];
399
                         Td = Tb + Tc;
400
                         TR = Tb - Tc;
401
                         TG = ii[WS(is, 14)];
402
                         TH = ii[WS(is, 6)];
403
                         TI = TG + TH;
404
                         TS = TG - TH;
405
                    }
406
                    Te = Ta + Td;
407
                    T1S = TF - TI;
408
                    T26 = Td - Ta;
409
                    TJ = TF + TI;
410
                    TQ = TO - TP;
411
                    T1m = TR - TS;
412
                    T1n = TP + TO;
413
                    TT = TR + TS;
414
               }
415
               {
416
                    E Ti, T11, TZ, T1V, Tl, TW, T14, T1W;
417
                    {
418
                         E Tg, Th, TX, TY;
419
                         Tg = ri[WS(is, 1)];
420
                         Th = ri[WS(is, 9)];
421
                         Ti = Tg + Th;
422
                         T11 = Tg - Th;
423
                         TX = ii[WS(is, 1)];
424
                         TY = ii[WS(is, 9)];
425
                         TZ = TX - TY;
426
                         T1V = TX + TY;
427
                    }
428
                    {
429
                         E Tj, Tk, T12, T13;
430
                         Tj = ri[WS(is, 5)];
431
                         Tk = ri[WS(is, 13)];
432
                         Tl = Tj + Tk;
433
                         TW = Tj - Tk;
434
                         T12 = ii[WS(is, 5)];
435
                         T13 = ii[WS(is, 13)];
436
                         T14 = T12 - T13;
437
                         T1W = T12 + T13;
438
                    }
439
                    Tm = Ti + Tl;
440
                    T1X = T1V - T1W;
441
                    T2g = T1V + T1W;
442
                    T10 = TW + TZ;
443
                    T15 = T11 - T14;
444
                    T1B = T11 + T14;
445
                    T1U = Ti - Tl;
446
                    T1A = TZ - TW;
447
               }
448
               {
449
                    E Tf, Tu, T2j, T2k;
450
                    Tf = T7 + Te;
451
                    Tu = Tm + Tt;
452
                    ro[WS(os, 8)] = Tf - Tu;
453
                    ro[0] = Tf + Tu;
454
                    T2j = TC + TJ;
455
                    T2k = T2g + T2h;
456
                    io[WS(os, 8)] = T2j - T2k;
457
                    io[0] = T2j + T2k;
458
               }
459
               {
460
                    E Tv, TK, T2f, T2i;
461
                    Tv = Tt - Tm;
462
                    TK = TC - TJ;
463
                    io[WS(os, 4)] = Tv + TK;
464
                    io[WS(os, 12)] = TK - Tv;
465
                    T2f = T7 - Te;
466
                    T2i = T2g - T2h;
467
                    ro[WS(os, 12)] = T2f - T2i;
468
                    ro[WS(os, 4)] = T2f + T2i;
469
               }
470
               {
471
                    E T1T, T27, T24, T28, T1Y, T23;
472
                    T1T = T1R + T1S;
473
                    T27 = T25 - T26;
474
                    T1Y = T1U + T1X;
475
                    T23 = T1Z - T22;
476
                    T24 = KP707106781 * (T1Y + T23);
477
                    T28 = KP707106781 * (T23 - T1Y);
478
                    ro[WS(os, 10)] = T1T - T24;
479
                    io[WS(os, 6)] = T27 + T28;
480
                    ro[WS(os, 2)] = T1T + T24;
481
                    io[WS(os, 14)] = T27 - T28;
482
               }
483
               {
484
                    E T29, T2d, T2c, T2e, T2a, T2b;
485
                    T29 = T1R - T1S;
486
                    T2d = T26 + T25;
487
                    T2a = T1X - T1U;
488
                    T2b = T1Z + T22;
489
                    T2c = KP707106781 * (T2a - T2b);
490
                    T2e = KP707106781 * (T2a + T2b);
491
                    ro[WS(os, 14)] = T29 - T2c;
492
                    io[WS(os, 2)] = T2d + T2e;
493
                    ro[WS(os, 6)] = T29 + T2c;
494
                    io[WS(os, 10)] = T2d - T2e;
495
               }
496
               {
497
                    E TV, T1r, T1p, T1v, T1i, T1q, T1u, T1w, TU, T1o;
498
                    TU = KP707106781 * (TQ - TT);
499
                    TV = TN + TU;
500
                    T1r = TN - TU;
501
                    T1o = KP707106781 * (T1m - T1n);
502
                    T1p = T1l - T1o;
503
                    T1v = T1l + T1o;
504
                    {
505
                         E T16, T1h, T1s, T1t;
506
                         T16 = FMA(KP923879532, T10, KP382683432 * T15);
507
                         T1h = FNMS(KP923879532, T1g, KP382683432 * T1b);
508
                         T1i = T16 + T1h;
509
                         T1q = T1h - T16;
510
                         T1s = FNMS(KP923879532, T15, KP382683432 * T10);
511
                         T1t = FMA(KP382683432, T1g, KP923879532 * T1b);
512
                         T1u = T1s - T1t;
513
                         T1w = T1s + T1t;
514
                    }
515
                    ro[WS(os, 11)] = TV - T1i;
516
                    io[WS(os, 11)] = T1v - T1w;
517
                    ro[WS(os, 3)] = TV + T1i;
518
                    io[WS(os, 3)] = T1v + T1w;
519
                    io[WS(os, 15)] = T1p - T1q;
520
                    ro[WS(os, 15)] = T1r - T1u;
521
                    io[WS(os, 7)] = T1p + T1q;
522
                    ro[WS(os, 7)] = T1r + T1u;
523
               }
524
               {
525
                    E T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I;
526
                    T1y = KP707106781 * (T1n + T1m);
527
                    T1z = T1x + T1y;
528
                    T1L = T1x - T1y;
529
                    T1I = KP707106781 * (TQ + TT);
530
                    T1J = T1H - T1I;
531
                    T1P = T1H + T1I;
532
                    {
533
                         E T1C, T1F, T1M, T1N;
534
                         T1C = FMA(KP382683432, T1A, KP923879532 * T1B);
535
                         T1F = FNMS(KP382683432, T1E, KP923879532 * T1D);
536
                         T1G = T1C + T1F;
537
                         T1K = T1F - T1C;
538
                         T1M = FNMS(KP382683432, T1B, KP923879532 * T1A);
539
                         T1N = FMA(KP923879532, T1E, KP382683432 * T1D);
540
                         T1O = T1M - T1N;
541
                         T1Q = T1M + T1N;
542
                    }
543
                    ro[WS(os, 9)] = T1z - T1G;
544
                    io[WS(os, 9)] = T1P - T1Q;
545
                    ro[WS(os, 1)] = T1z + T1G;
546
                    io[WS(os, 1)] = T1P + T1Q;
547
                    io[WS(os, 13)] = T1J - T1K;
548
                    ro[WS(os, 13)] = T1L - T1O;
549
                    io[WS(os, 5)] = T1J + T1K;
550
                    ro[WS(os, 5)] = T1L + T1O;
551
               }
552
          }
553
     }
554
}
555

    
556
static const kdft_desc desc = { 16, "n1_16", {136, 16, 8, 0}, &GENUS, 0, 0, 0, 0 };
557

    
558
void X(codelet_n1_16) (planner *p) {
559
     X(kdft_register) (p, n1_16, &desc);
560
}
561

    
562
#endif