To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / q1_4.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (13.8 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:29 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twidsq.native -fma -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 4 -name q1_4 -include dft/scalar/q.h */
29

    
30
/*
31
 * This function contains 88 FP additions, 48 FP multiplications,
32
 * (or, 64 additions, 24 multiplications, 24 fused multiply/add),
33
 * 51 stack variables, 0 constants, and 64 memory accesses
34
 */
35
#include "dft/scalar/q.h"
36

    
37
static void q1_4(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
38
{
39
     {
40
          INT m;
41
          for (m = mb, W = W + (mb * 6); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
42
               E T3, Tv, Tw, T6, Tc, Tf, Tx, Ts, Tm, Ti, T1H, T29, T2a, T1K, T1Q;
43
               E T1T, T2b, T26, T20, T1W, TB, T13, T14, TE, TK, TN, T15, T10, TU, TQ;
44
               E T19, T1B, T1C, T1c, T1i, T1l, T1D, T1y, T1s, T1o;
45
               {
46
                    E T1, T2, Tb, Tg, Th, T8;
47
                    {
48
                         E T9, Ta, T4, T5;
49
                         T1 = rio[0];
50
                         T2 = rio[WS(rs, 2)];
51
                         T3 = T1 + T2;
52
                         T9 = iio[0];
53
                         Ta = iio[WS(rs, 2)];
54
                         Tb = T9 - Ta;
55
                         Tv = T9 + Ta;
56
                         Tg = iio[WS(rs, 1)];
57
                         Th = iio[WS(rs, 3)];
58
                         Tw = Tg + Th;
59
                         T4 = rio[WS(rs, 1)];
60
                         T5 = rio[WS(rs, 3)];
61
                         T6 = T4 + T5;
62
                         T8 = T4 - T5;
63
                    }
64
                    Tc = T8 + Tb;
65
                    Tf = T1 - T2;
66
                    Tx = Tv - Tw;
67
                    Ts = T3 - T6;
68
                    Tm = Tb - T8;
69
                    Ti = Tg - Th;
70
               }
71
               {
72
                    E T1F, T1G, T1P, T1U, T1V, T1M;
73
                    {
74
                         E T1N, T1O, T1I, T1J;
75
                         T1F = rio[WS(vs, 3)];
76
                         T1G = rio[WS(vs, 3) + WS(rs, 2)];
77
                         T1H = T1F + T1G;
78
                         T1N = iio[WS(vs, 3)];
79
                         T1O = iio[WS(vs, 3) + WS(rs, 2)];
80
                         T1P = T1N - T1O;
81
                         T29 = T1N + T1O;
82
                         T1U = iio[WS(vs, 3) + WS(rs, 1)];
83
                         T1V = iio[WS(vs, 3) + WS(rs, 3)];
84
                         T2a = T1U + T1V;
85
                         T1I = rio[WS(vs, 3) + WS(rs, 1)];
86
                         T1J = rio[WS(vs, 3) + WS(rs, 3)];
87
                         T1K = T1I + T1J;
88
                         T1M = T1I - T1J;
89
                    }
90
                    T1Q = T1M + T1P;
91
                    T1T = T1F - T1G;
92
                    T2b = T29 - T2a;
93
                    T26 = T1H - T1K;
94
                    T20 = T1P - T1M;
95
                    T1W = T1U - T1V;
96
               }
97
               {
98
                    E Tz, TA, TJ, TO, TP, TG;
99
                    {
100
                         E TH, TI, TC, TD;
101
                         Tz = rio[WS(vs, 1)];
102
                         TA = rio[WS(vs, 1) + WS(rs, 2)];
103
                         TB = Tz + TA;
104
                         TH = iio[WS(vs, 1)];
105
                         TI = iio[WS(vs, 1) + WS(rs, 2)];
106
                         TJ = TH - TI;
107
                         T13 = TH + TI;
108
                         TO = iio[WS(vs, 1) + WS(rs, 1)];
109
                         TP = iio[WS(vs, 1) + WS(rs, 3)];
110
                         T14 = TO + TP;
111
                         TC = rio[WS(vs, 1) + WS(rs, 1)];
112
                         TD = rio[WS(vs, 1) + WS(rs, 3)];
113
                         TE = TC + TD;
114
                         TG = TC - TD;
115
                    }
116
                    TK = TG + TJ;
117
                    TN = Tz - TA;
118
                    T15 = T13 - T14;
119
                    T10 = TB - TE;
120
                    TU = TJ - TG;
121
                    TQ = TO - TP;
122
               }
123
               {
124
                    E T17, T18, T1h, T1m, T1n, T1e;
125
                    {
126
                         E T1f, T1g, T1a, T1b;
127
                         T17 = rio[WS(vs, 2)];
128
                         T18 = rio[WS(vs, 2) + WS(rs, 2)];
129
                         T19 = T17 + T18;
130
                         T1f = iio[WS(vs, 2)];
131
                         T1g = iio[WS(vs, 2) + WS(rs, 2)];
132
                         T1h = T1f - T1g;
133
                         T1B = T1f + T1g;
134
                         T1m = iio[WS(vs, 2) + WS(rs, 1)];
135
                         T1n = iio[WS(vs, 2) + WS(rs, 3)];
136
                         T1C = T1m + T1n;
137
                         T1a = rio[WS(vs, 2) + WS(rs, 1)];
138
                         T1b = rio[WS(vs, 2) + WS(rs, 3)];
139
                         T1c = T1a + T1b;
140
                         T1e = T1a - T1b;
141
                    }
142
                    T1i = T1e + T1h;
143
                    T1l = T17 - T18;
144
                    T1D = T1B - T1C;
145
                    T1y = T19 - T1c;
146
                    T1s = T1h - T1e;
147
                    T1o = T1m - T1n;
148
               }
149
               rio[0] = T3 + T6;
150
               iio[0] = Tv + Tw;
151
               rio[WS(rs, 1)] = TB + TE;
152
               iio[WS(rs, 1)] = T13 + T14;
153
               rio[WS(rs, 2)] = T19 + T1c;
154
               iio[WS(rs, 2)] = T1B + T1C;
155
               iio[WS(rs, 3)] = T29 + T2a;
156
               rio[WS(rs, 3)] = T1H + T1K;
157
               {
158
                    E Tt, Ty, Tr, Tu;
159
                    Tr = W[2];
160
                    Tt = Tr * Ts;
161
                    Ty = Tr * Tx;
162
                    Tu = W[3];
163
                    rio[WS(vs, 2)] = FMA(Tu, Tx, Tt);
164
                    iio[WS(vs, 2)] = FNMS(Tu, Ts, Ty);
165
               }
166
               {
167
                    E T27, T2c, T25, T28;
168
                    T25 = W[2];
169
                    T27 = T25 * T26;
170
                    T2c = T25 * T2b;
171
                    T28 = W[3];
172
                    rio[WS(vs, 2) + WS(rs, 3)] = FMA(T28, T2b, T27);
173
                    iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T28, T26, T2c);
174
               }
175
               {
176
                    E T11, T16, TZ, T12;
177
                    TZ = W[2];
178
                    T11 = TZ * T10;
179
                    T16 = TZ * T15;
180
                    T12 = W[3];
181
                    rio[WS(vs, 2) + WS(rs, 1)] = FMA(T12, T15, T11);
182
                    iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T12, T10, T16);
183
               }
184
               {
185
                    E T1z, T1E, T1x, T1A;
186
                    T1x = W[2];
187
                    T1z = T1x * T1y;
188
                    T1E = T1x * T1D;
189
                    T1A = W[3];
190
                    rio[WS(vs, 2) + WS(rs, 2)] = FMA(T1A, T1D, T1z);
191
                    iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T1A, T1y, T1E);
192
               }
193
               {
194
                    E Tj, Te, Tk, T7, Td;
195
                    Tj = Tf - Ti;
196
                    Te = W[5];
197
                    Tk = Te * Tc;
198
                    T7 = W[4];
199
                    Td = T7 * Tc;
200
                    iio[WS(vs, 3)] = FNMS(Te, Tj, Td);
201
                    rio[WS(vs, 3)] = FMA(T7, Tj, Tk);
202
               }
203
               {
204
                    E T1p, T1k, T1q, T1d, T1j;
205
                    T1p = T1l - T1o;
206
                    T1k = W[5];
207
                    T1q = T1k * T1i;
208
                    T1d = W[4];
209
                    T1j = T1d * T1i;
210
                    iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T1k, T1p, T1j);
211
                    rio[WS(vs, 3) + WS(rs, 2)] = FMA(T1d, T1p, T1q);
212
               }
213
               {
214
                    E T23, T22, T24, T1Z, T21;
215
                    T23 = T1T + T1W;
216
                    T22 = W[1];
217
                    T24 = T22 * T20;
218
                    T1Z = W[0];
219
                    T21 = T1Z * T20;
220
                    iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T22, T23, T21);
221
                    rio[WS(vs, 1) + WS(rs, 3)] = FMA(T1Z, T23, T24);
222
               }
223
               {
224
                    E TX, TW, TY, TT, TV;
225
                    TX = TN + TQ;
226
                    TW = W[1];
227
                    TY = TW * TU;
228
                    TT = W[0];
229
                    TV = TT * TU;
230
                    iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TW, TX, TV);
231
                    rio[WS(vs, 1) + WS(rs, 1)] = FMA(TT, TX, TY);
232
               }
233
               {
234
                    E TR, TM, TS, TF, TL;
235
                    TR = TN - TQ;
236
                    TM = W[5];
237
                    TS = TM * TK;
238
                    TF = W[4];
239
                    TL = TF * TK;
240
                    iio[WS(vs, 3) + WS(rs, 1)] = FNMS(TM, TR, TL);
241
                    rio[WS(vs, 3) + WS(rs, 1)] = FMA(TF, TR, TS);
242
               }
243
               {
244
                    E Tp, To, Tq, Tl, Tn;
245
                    Tp = Tf + Ti;
246
                    To = W[1];
247
                    Tq = To * Tm;
248
                    Tl = W[0];
249
                    Tn = Tl * Tm;
250
                    iio[WS(vs, 1)] = FNMS(To, Tp, Tn);
251
                    rio[WS(vs, 1)] = FMA(Tl, Tp, Tq);
252
               }
253
               {
254
                    E T1v, T1u, T1w, T1r, T1t;
255
                    T1v = T1l + T1o;
256
                    T1u = W[1];
257
                    T1w = T1u * T1s;
258
                    T1r = W[0];
259
                    T1t = T1r * T1s;
260
                    iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T1u, T1v, T1t);
261
                    rio[WS(vs, 1) + WS(rs, 2)] = FMA(T1r, T1v, T1w);
262
               }
263
               {
264
                    E T1X, T1S, T1Y, T1L, T1R;
265
                    T1X = T1T - T1W;
266
                    T1S = W[5];
267
                    T1Y = T1S * T1Q;
268
                    T1L = W[4];
269
                    T1R = T1L * T1Q;
270
                    iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T1S, T1X, T1R);
271
                    rio[WS(vs, 3) + WS(rs, 3)] = FMA(T1L, T1X, T1Y);
272
               }
273
          }
274
     }
275
}
276

    
277
static const tw_instr twinstr[] = {
278
     {TW_FULL, 0, 4},
279
     {TW_NEXT, 1, 0}
280
};
281

    
282
static const ct_desc desc = { 4, "q1_4", twinstr, &GENUS, {64, 24, 24, 0}, 0, 0, 0 };
283

    
284
void X(codelet_q1_4) (planner *p) {
285
     X(kdft_difsq_register) (p, q1_4, &desc);
286
}
287
#else
288

    
289
/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 4 -name q1_4 -include dft/scalar/q.h */
290

    
291
/*
292
 * This function contains 88 FP additions, 48 FP multiplications,
293
 * (or, 64 additions, 24 multiplications, 24 fused multiply/add),
294
 * 37 stack variables, 0 constants, and 64 memory accesses
295
 */
296
#include "dft/scalar/q.h"
297

    
298
static void q1_4(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
299
{
300
     {
301
          INT m;
302
          for (m = mb, W = W + (mb * 6); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
303
               E T3, Te, Tb, Tq, T6, T8, Th, Tr, Tv, TG, TD, TS, Ty, TA, TJ;
304
               E TT, TX, T18, T15, T1k, T10, T12, T1b, T1l, T1p, T1A, T1x, T1M, T1s, T1u;
305
               E T1D, T1N;
306
               {
307
                    E T1, T2, T9, Ta;
308
                    T1 = rio[0];
309
                    T2 = rio[WS(rs, 2)];
310
                    T3 = T1 + T2;
311
                    Te = T1 - T2;
312
                    T9 = iio[0];
313
                    Ta = iio[WS(rs, 2)];
314
                    Tb = T9 - Ta;
315
                    Tq = T9 + Ta;
316
               }
317
               {
318
                    E T4, T5, Tf, Tg;
319
                    T4 = rio[WS(rs, 1)];
320
                    T5 = rio[WS(rs, 3)];
321
                    T6 = T4 + T5;
322
                    T8 = T4 - T5;
323
                    Tf = iio[WS(rs, 1)];
324
                    Tg = iio[WS(rs, 3)];
325
                    Th = Tf - Tg;
326
                    Tr = Tf + Tg;
327
               }
328
               {
329
                    E Tt, Tu, TB, TC;
330
                    Tt = rio[WS(vs, 1)];
331
                    Tu = rio[WS(vs, 1) + WS(rs, 2)];
332
                    Tv = Tt + Tu;
333
                    TG = Tt - Tu;
334
                    TB = iio[WS(vs, 1)];
335
                    TC = iio[WS(vs, 1) + WS(rs, 2)];
336
                    TD = TB - TC;
337
                    TS = TB + TC;
338
               }
339
               {
340
                    E Tw, Tx, TH, TI;
341
                    Tw = rio[WS(vs, 1) + WS(rs, 1)];
342
                    Tx = rio[WS(vs, 1) + WS(rs, 3)];
343
                    Ty = Tw + Tx;
344
                    TA = Tw - Tx;
345
                    TH = iio[WS(vs, 1) + WS(rs, 1)];
346
                    TI = iio[WS(vs, 1) + WS(rs, 3)];
347
                    TJ = TH - TI;
348
                    TT = TH + TI;
349
               }
350
               {
351
                    E TV, TW, T13, T14;
352
                    TV = rio[WS(vs, 2)];
353
                    TW = rio[WS(vs, 2) + WS(rs, 2)];
354
                    TX = TV + TW;
355
                    T18 = TV - TW;
356
                    T13 = iio[WS(vs, 2)];
357
                    T14 = iio[WS(vs, 2) + WS(rs, 2)];
358
                    T15 = T13 - T14;
359
                    T1k = T13 + T14;
360
               }
361
               {
362
                    E TY, TZ, T19, T1a;
363
                    TY = rio[WS(vs, 2) + WS(rs, 1)];
364
                    TZ = rio[WS(vs, 2) + WS(rs, 3)];
365
                    T10 = TY + TZ;
366
                    T12 = TY - TZ;
367
                    T19 = iio[WS(vs, 2) + WS(rs, 1)];
368
                    T1a = iio[WS(vs, 2) + WS(rs, 3)];
369
                    T1b = T19 - T1a;
370
                    T1l = T19 + T1a;
371
               }
372
               {
373
                    E T1n, T1o, T1v, T1w;
374
                    T1n = rio[WS(vs, 3)];
375
                    T1o = rio[WS(vs, 3) + WS(rs, 2)];
376
                    T1p = T1n + T1o;
377
                    T1A = T1n - T1o;
378
                    T1v = iio[WS(vs, 3)];
379
                    T1w = iio[WS(vs, 3) + WS(rs, 2)];
380
                    T1x = T1v - T1w;
381
                    T1M = T1v + T1w;
382
               }
383
               {
384
                    E T1q, T1r, T1B, T1C;
385
                    T1q = rio[WS(vs, 3) + WS(rs, 1)];
386
                    T1r = rio[WS(vs, 3) + WS(rs, 3)];
387
                    T1s = T1q + T1r;
388
                    T1u = T1q - T1r;
389
                    T1B = iio[WS(vs, 3) + WS(rs, 1)];
390
                    T1C = iio[WS(vs, 3) + WS(rs, 3)];
391
                    T1D = T1B - T1C;
392
                    T1N = T1B + T1C;
393
               }
394
               rio[0] = T3 + T6;
395
               iio[0] = Tq + Tr;
396
               rio[WS(rs, 1)] = Tv + Ty;
397
               iio[WS(rs, 1)] = TS + TT;
398
               rio[WS(rs, 2)] = TX + T10;
399
               iio[WS(rs, 2)] = T1k + T1l;
400
               iio[WS(rs, 3)] = T1M + T1N;
401
               rio[WS(rs, 3)] = T1p + T1s;
402
               {
403
                    E Tc, Ti, T7, Td;
404
                    Tc = T8 + Tb;
405
                    Ti = Te - Th;
406
                    T7 = W[4];
407
                    Td = W[5];
408
                    iio[WS(vs, 3)] = FNMS(Td, Ti, T7 * Tc);
409
                    rio[WS(vs, 3)] = FMA(Td, Tc, T7 * Ti);
410
               }
411
               {
412
                    E T1K, T1O, T1J, T1L;
413
                    T1K = T1p - T1s;
414
                    T1O = T1M - T1N;
415
                    T1J = W[2];
416
                    T1L = W[3];
417
                    rio[WS(vs, 2) + WS(rs, 3)] = FMA(T1J, T1K, T1L * T1O);
418
                    iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T1L, T1K, T1J * T1O);
419
               }
420
               {
421
                    E Tk, Tm, Tj, Tl;
422
                    Tk = Tb - T8;
423
                    Tm = Te + Th;
424
                    Tj = W[0];
425
                    Tl = W[1];
426
                    iio[WS(vs, 1)] = FNMS(Tl, Tm, Tj * Tk);
427
                    rio[WS(vs, 1)] = FMA(Tl, Tk, Tj * Tm);
428
               }
429
               {
430
                    E To, Ts, Tn, Tp;
431
                    To = T3 - T6;
432
                    Ts = Tq - Tr;
433
                    Tn = W[2];
434
                    Tp = W[3];
435
                    rio[WS(vs, 2)] = FMA(Tn, To, Tp * Ts);
436
                    iio[WS(vs, 2)] = FNMS(Tp, To, Tn * Ts);
437
               }
438
               {
439
                    E T16, T1c, T11, T17;
440
                    T16 = T12 + T15;
441
                    T1c = T18 - T1b;
442
                    T11 = W[4];
443
                    T17 = W[5];
444
                    iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T17, T1c, T11 * T16);
445
                    rio[WS(vs, 3) + WS(rs, 2)] = FMA(T17, T16, T11 * T1c);
446
               }
447
               {
448
                    E T1G, T1I, T1F, T1H;
449
                    T1G = T1x - T1u;
450
                    T1I = T1A + T1D;
451
                    T1F = W[0];
452
                    T1H = W[1];
453
                    iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T1H, T1I, T1F * T1G);
454
                    rio[WS(vs, 1) + WS(rs, 3)] = FMA(T1H, T1G, T1F * T1I);
455
               }
456
               {
457
                    E TQ, TU, TP, TR;
458
                    TQ = Tv - Ty;
459
                    TU = TS - TT;
460
                    TP = W[2];
461
                    TR = W[3];
462
                    rio[WS(vs, 2) + WS(rs, 1)] = FMA(TP, TQ, TR * TU);
463
                    iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TR, TQ, TP * TU);
464
               }
465
               {
466
                    E T1e, T1g, T1d, T1f;
467
                    T1e = T15 - T12;
468
                    T1g = T18 + T1b;
469
                    T1d = W[0];
470
                    T1f = W[1];
471
                    iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T1f, T1g, T1d * T1e);
472
                    rio[WS(vs, 1) + WS(rs, 2)] = FMA(T1f, T1e, T1d * T1g);
473
               }
474
               {
475
                    E T1i, T1m, T1h, T1j;
476
                    T1i = TX - T10;
477
                    T1m = T1k - T1l;
478
                    T1h = W[2];
479
                    T1j = W[3];
480
                    rio[WS(vs, 2) + WS(rs, 2)] = FMA(T1h, T1i, T1j * T1m);
481
                    iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T1j, T1i, T1h * T1m);
482
               }
483
               {
484
                    E T1y, T1E, T1t, T1z;
485
                    T1y = T1u + T1x;
486
                    T1E = T1A - T1D;
487
                    T1t = W[4];
488
                    T1z = W[5];
489
                    iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T1z, T1E, T1t * T1y);
490
                    rio[WS(vs, 3) + WS(rs, 3)] = FMA(T1z, T1y, T1t * T1E);
491
               }
492
               {
493
                    E TM, TO, TL, TN;
494
                    TM = TD - TA;
495
                    TO = TG + TJ;
496
                    TL = W[0];
497
                    TN = W[1];
498
                    iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TN, TO, TL * TM);
499
                    rio[WS(vs, 1) + WS(rs, 1)] = FMA(TN, TM, TL * TO);
500
               }
501
               {
502
                    E TE, TK, Tz, TF;
503
                    TE = TA + TD;
504
                    TK = TG - TJ;
505
                    Tz = W[4];
506
                    TF = W[5];
507
                    iio[WS(vs, 3) + WS(rs, 1)] = FNMS(TF, TK, Tz * TE);
508
                    rio[WS(vs, 3) + WS(rs, 1)] = FMA(TF, TE, Tz * TK);
509
               }
510
          }
511
     }
512
}
513

    
514
static const tw_instr twinstr[] = {
515
     {TW_FULL, 0, 4},
516
     {TW_NEXT, 1, 0}
517
};
518

    
519
static const ct_desc desc = { 4, "q1_4", twinstr, &GENUS, {64, 24, 24, 0}, 0, 0, 0 };
520

    
521
void X(codelet_q1_4) (planner *p) {
522
     X(kdft_difsq_register) (p, q1_4, &desc);
523
}
524
#endif