To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_9.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (13.7 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:13 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 9 -name t1_9 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 96 FP additions, 88 FP multiplications,
32
 * (or, 24 additions, 16 multiplications, 72 fused multiply/add),
33
 * 55 stack variables, 10 constants, and 36 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP852868531, +0.852868531952443209628250963940074071936020296);
40
     DK(KP492403876, +0.492403876506104029683371512294761506835321626);
41
     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
42
     DK(KP954188894, +0.954188894138671133499268364187245676532219158);
43
     DK(KP363970234, +0.363970234266202361351047882776834043890471784);
44
     DK(KP777861913, +0.777861913430206160028177977318626690410586096);
45
     DK(KP839099631, +0.839099631177280011763127298123181364687434283);
46
     DK(KP176326980, +0.176326980708464973471090386868618986121633062);
47
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
48
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
49
     {
50
          INT m;
51
          for (m = mb, W = W + (mb * 16); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
52
               E T1, T1R, Te, T1W, T10, T1Q, T1l, T1r, Ty, T1p, Tl, T1o, T1g, T1q, T1a;
53
               E T1d, TS, T18, TF, T13, T19, T1c;
54
               T1 = ri[0];
55
               T1R = ii[0];
56
               {
57
                    E T3, T6, T4, TW, T9, Tc, Ta, TY, T2, T8;
58
                    T3 = ri[WS(rs, 3)];
59
                    T6 = ii[WS(rs, 3)];
60
                    T2 = W[4];
61
                    T4 = T2 * T3;
62
                    TW = T2 * T6;
63
                    T9 = ri[WS(rs, 6)];
64
                    Tc = ii[WS(rs, 6)];
65
                    T8 = W[10];
66
                    Ta = T8 * T9;
67
                    TY = T8 * Tc;
68
                    {
69
                         E T7, TX, Td, TZ, T5, Tb;
70
                         T5 = W[5];
71
                         T7 = FMA(T5, T6, T4);
72
                         TX = FNMS(T5, T3, TW);
73
                         Tb = W[11];
74
                         Td = FMA(Tb, Tc, Ta);
75
                         TZ = FNMS(Tb, T9, TY);
76
                         Te = T7 + Td;
77
                         T1W = Td - T7;
78
                         T10 = TX - TZ;
79
                         T1Q = TX + TZ;
80
                    }
81
               }
82
               {
83
                    E Th, Tk, Ti, T1n, Tx, T1i, Tr, T1k, Tg, Tj;
84
                    Th = ri[WS(rs, 1)];
85
                    Tk = ii[WS(rs, 1)];
86
                    Tg = W[0];
87
                    Ti = Tg * Th;
88
                    T1n = Tg * Tk;
89
                    {
90
                         E Tt, Tw, Tu, T1h, Ts, Tv;
91
                         Tt = ri[WS(rs, 7)];
92
                         Tw = ii[WS(rs, 7)];
93
                         Ts = W[12];
94
                         Tu = Ts * Tt;
95
                         T1h = Ts * Tw;
96
                         Tv = W[13];
97
                         Tx = FMA(Tv, Tw, Tu);
98
                         T1i = FNMS(Tv, Tt, T1h);
99
                    }
100
                    {
101
                         E Tn, Tq, To, T1j, Tm, Tp;
102
                         Tn = ri[WS(rs, 4)];
103
                         Tq = ii[WS(rs, 4)];
104
                         Tm = W[6];
105
                         To = Tm * Tn;
106
                         T1j = Tm * Tq;
107
                         Tp = W[7];
108
                         Tr = FMA(Tp, Tq, To);
109
                         T1k = FNMS(Tp, Tn, T1j);
110
                    }
111
                    T1l = T1i - T1k;
112
                    T1r = Tr - Tx;
113
                    Ty = Tr + Tx;
114
                    T1p = T1k + T1i;
115
                    Tj = W[1];
116
                    Tl = FMA(Tj, Tk, Ti);
117
                    T1o = FNMS(Tj, Th, T1n);
118
                    T1g = FNMS(KP500000000, Ty, Tl);
119
                    T1q = FNMS(KP500000000, T1p, T1o);
120
               }
121
               {
122
                    E TB, TE, TC, T12, TR, T17, TL, T15, TA, TD;
123
                    TB = ri[WS(rs, 2)];
124
                    TE = ii[WS(rs, 2)];
125
                    TA = W[2];
126
                    TC = TA * TB;
127
                    T12 = TA * TE;
128
                    {
129
                         E TN, TQ, TO, T16, TM, TP;
130
                         TN = ri[WS(rs, 8)];
131
                         TQ = ii[WS(rs, 8)];
132
                         TM = W[14];
133
                         TO = TM * TN;
134
                         T16 = TM * TQ;
135
                         TP = W[15];
136
                         TR = FMA(TP, TQ, TO);
137
                         T17 = FNMS(TP, TN, T16);
138
                    }
139
                    {
140
                         E TH, TK, TI, T14, TG, TJ;
141
                         TH = ri[WS(rs, 5)];
142
                         TK = ii[WS(rs, 5)];
143
                         TG = W[8];
144
                         TI = TG * TH;
145
                         T14 = TG * TK;
146
                         TJ = W[9];
147
                         TL = FMA(TJ, TK, TI);
148
                         T15 = FNMS(TJ, TH, T14);
149
                    }
150
                    T1a = TR - TL;
151
                    T1d = T15 - T17;
152
                    TS = TL + TR;
153
                    T18 = T15 + T17;
154
                    TD = W[3];
155
                    TF = FMA(TD, TE, TC);
156
                    T13 = FNMS(TD, TB, T12);
157
                    T19 = FNMS(KP500000000, T18, T13);
158
                    T1c = FNMS(KP500000000, TS, TF);
159
               }
160
               {
161
                    E Tf, T1S, TU, T1U, T1O, T1P, T1L, T1T;
162
                    Tf = T1 + Te;
163
                    T1S = T1Q + T1R;
164
                    {
165
                         E Tz, TT, T1M, T1N;
166
                         Tz = Tl + Ty;
167
                         TT = TF + TS;
168
                         TU = Tz + TT;
169
                         T1U = TT - Tz;
170
                         T1M = T1o + T1p;
171
                         T1N = T13 + T18;
172
                         T1O = T1M - T1N;
173
                         T1P = T1M + T1N;
174
                    }
175
                    ri[0] = Tf + TU;
176
                    ii[0] = T1P + T1S;
177
                    T1L = FNMS(KP500000000, TU, Tf);
178
                    ri[WS(rs, 6)] = FNMS(KP866025403, T1O, T1L);
179
                    ri[WS(rs, 3)] = FMA(KP866025403, T1O, T1L);
180
                    T1T = FNMS(KP500000000, T1P, T1S);
181
                    ii[WS(rs, 3)] = FMA(KP866025403, T1U, T1T);
182
                    ii[WS(rs, 6)] = FNMS(KP866025403, T1U, T1T);
183
               }
184
               {
185
                    E T11, T1z, T1X, T21, T1f, T1w, T1t, T1x, T1u, T1Y, T1C, T1I, T1F, T1J, T1G;
186
                    E T22, TV, T1V;
187
                    TV = FNMS(KP500000000, Te, T1);
188
                    T11 = FMA(KP866025403, T10, TV);
189
                    T1z = FNMS(KP866025403, T10, TV);
190
                    T1V = FNMS(KP500000000, T1Q, T1R);
191
                    T1X = FMA(KP866025403, T1W, T1V);
192
                    T21 = FNMS(KP866025403, T1W, T1V);
193
                    {
194
                         E T1b, T1e, T1m, T1s;
195
                         T1b = FMA(KP866025403, T1a, T19);
196
                         T1e = FMA(KP866025403, T1d, T1c);
197
                         T1f = FMA(KP176326980, T1e, T1b);
198
                         T1w = FNMS(KP176326980, T1b, T1e);
199
                         T1m = FNMS(KP866025403, T1l, T1g);
200
                         T1s = FNMS(KP866025403, T1r, T1q);
201
                         T1t = FMA(KP839099631, T1s, T1m);
202
                         T1x = FNMS(KP839099631, T1m, T1s);
203
                    }
204
                    T1u = FMA(KP777861913, T1t, T1f);
205
                    T1Y = FNMS(KP777861913, T1x, T1w);
206
                    {
207
                         E T1A, T1B, T1D, T1E;
208
                         T1A = FMA(KP866025403, T1r, T1q);
209
                         T1B = FMA(KP866025403, T1l, T1g);
210
                         T1C = FMA(KP176326980, T1B, T1A);
211
                         T1I = FNMS(KP176326980, T1A, T1B);
212
                         T1D = FNMS(KP866025403, T1d, T1c);
213
                         T1E = FNMS(KP866025403, T1a, T19);
214
                         T1F = FNMS(KP363970234, T1E, T1D);
215
                         T1J = FMA(KP363970234, T1D, T1E);
216
                    }
217
                    T1G = FNMS(KP954188894, T1F, T1C);
218
                    T22 = FMA(KP954188894, T1J, T1I);
219
                    ri[WS(rs, 1)] = FMA(KP984807753, T1u, T11);
220
                    ii[WS(rs, 1)] = FNMS(KP984807753, T1Y, T1X);
221
                    ri[WS(rs, 2)] = FMA(KP984807753, T1G, T1z);
222
                    ii[WS(rs, 2)] = FNMS(KP984807753, T22, T21);
223
                    {
224
                         E T1v, T1y, T1Z, T20;
225
                         T1v = FNMS(KP492403876, T1u, T11);
226
                         T1y = FMA(KP777861913, T1x, T1w);
227
                         ri[WS(rs, 4)] = FMA(KP852868531, T1y, T1v);
228
                         ri[WS(rs, 7)] = FNMS(KP852868531, T1y, T1v);
229
                         T1Z = FMA(KP492403876, T1Y, T1X);
230
                         T20 = FNMS(KP777861913, T1t, T1f);
231
                         ii[WS(rs, 4)] = FMA(KP852868531, T20, T1Z);
232
                         ii[WS(rs, 7)] = FNMS(KP852868531, T20, T1Z);
233
                    }
234
                    {
235
                         E T1H, T1K, T23, T24;
236
                         T1H = FNMS(KP492403876, T1G, T1z);
237
                         T1K = FNMS(KP954188894, T1J, T1I);
238
                         ri[WS(rs, 5)] = FNMS(KP852868531, T1K, T1H);
239
                         ri[WS(rs, 8)] = FMA(KP852868531, T1K, T1H);
240
                         T23 = FMA(KP492403876, T22, T21);
241
                         T24 = FMA(KP954188894, T1F, T1C);
242
                         ii[WS(rs, 5)] = FNMS(KP852868531, T24, T23);
243
                         ii[WS(rs, 8)] = FMA(KP852868531, T24, T23);
244
                    }
245
               }
246
          }
247
     }
248
}
249

    
250
static const tw_instr twinstr[] = {
251
     {TW_FULL, 0, 9},
252
     {TW_NEXT, 1, 0}
253
};
254

    
255
static const ct_desc desc = { 9, "t1_9", twinstr, &GENUS, {24, 16, 72, 0}, 0, 0, 0 };
256

    
257
void X(codelet_t1_9) (planner *p) {
258
     X(kdft_dit_register) (p, t1_9, &desc);
259
}
260
#else
261

    
262
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 9 -name t1_9 -include dft/scalar/t.h */
263

    
264
/*
265
 * This function contains 96 FP additions, 72 FP multiplications,
266
 * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
267
 * 41 stack variables, 8 constants, and 36 memory accesses
268
 */
269
#include "dft/scalar/t.h"
270

    
271
static void t1_9(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
272
{
273
     DK(KP939692620, +0.939692620785908384054109277324731469936208134);
274
     DK(KP342020143, +0.342020143325668733044099614682259580763083368);
275
     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
276
     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
277
     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
278
     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
279
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
280
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
281
     {
282
          INT m;
283
          for (m = mb, W = W + (mb * 16); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 16, MAKE_VOLATILE_STRIDE(18, rs)) {
284
               E T1, T1B, TQ, T1G, Tc, TN, T1A, T1H, TL, T1x, T17, T1o, T1c, T1n, Tu;
285
               E T1w, TW, T1k, T11, T1l;
286
               {
287
                    E T6, TO, Tb, TP;
288
                    T1 = ri[0];
289
                    T1B = ii[0];
290
                    {
291
                         E T3, T5, T2, T4;
292
                         T3 = ri[WS(rs, 3)];
293
                         T5 = ii[WS(rs, 3)];
294
                         T2 = W[4];
295
                         T4 = W[5];
296
                         T6 = FMA(T2, T3, T4 * T5);
297
                         TO = FNMS(T4, T3, T2 * T5);
298
                    }
299
                    {
300
                         E T8, Ta, T7, T9;
301
                         T8 = ri[WS(rs, 6)];
302
                         Ta = ii[WS(rs, 6)];
303
                         T7 = W[10];
304
                         T9 = W[11];
305
                         Tb = FMA(T7, T8, T9 * Ta);
306
                         TP = FNMS(T9, T8, T7 * Ta);
307
                    }
308
                    TQ = KP866025403 * (TO - TP);
309
                    T1G = KP866025403 * (Tb - T6);
310
                    Tc = T6 + Tb;
311
                    TN = FNMS(KP500000000, Tc, T1);
312
                    T1A = TO + TP;
313
                    T1H = FNMS(KP500000000, T1A, T1B);
314
               }
315
               {
316
                    E Tz, T19, TE, T14, TJ, T15, TK, T1a;
317
                    {
318
                         E Tw, Ty, Tv, Tx;
319
                         Tw = ri[WS(rs, 2)];
320
                         Ty = ii[WS(rs, 2)];
321
                         Tv = W[2];
322
                         Tx = W[3];
323
                         Tz = FMA(Tv, Tw, Tx * Ty);
324
                         T19 = FNMS(Tx, Tw, Tv * Ty);
325
                    }
326
                    {
327
                         E TB, TD, TA, TC;
328
                         TB = ri[WS(rs, 5)];
329
                         TD = ii[WS(rs, 5)];
330
                         TA = W[8];
331
                         TC = W[9];
332
                         TE = FMA(TA, TB, TC * TD);
333
                         T14 = FNMS(TC, TB, TA * TD);
334
                    }
335
                    {
336
                         E TG, TI, TF, TH;
337
                         TG = ri[WS(rs, 8)];
338
                         TI = ii[WS(rs, 8)];
339
                         TF = W[14];
340
                         TH = W[15];
341
                         TJ = FMA(TF, TG, TH * TI);
342
                         T15 = FNMS(TH, TG, TF * TI);
343
                    }
344
                    TK = TE + TJ;
345
                    T1a = T14 + T15;
346
                    TL = Tz + TK;
347
                    T1x = T19 + T1a;
348
                    {
349
                         E T13, T16, T18, T1b;
350
                         T13 = FNMS(KP500000000, TK, Tz);
351
                         T16 = KP866025403 * (T14 - T15);
352
                         T17 = T13 + T16;
353
                         T1o = T13 - T16;
354
                         T18 = KP866025403 * (TJ - TE);
355
                         T1b = FNMS(KP500000000, T1a, T19);
356
                         T1c = T18 + T1b;
357
                         T1n = T1b - T18;
358
                    }
359
               }
360
               {
361
                    E Ti, TY, Tn, TT, Ts, TU, Tt, TZ;
362
                    {
363
                         E Tf, Th, Te, Tg;
364
                         Tf = ri[WS(rs, 1)];
365
                         Th = ii[WS(rs, 1)];
366
                         Te = W[0];
367
                         Tg = W[1];
368
                         Ti = FMA(Te, Tf, Tg * Th);
369
                         TY = FNMS(Tg, Tf, Te * Th);
370
                    }
371
                    {
372
                         E Tk, Tm, Tj, Tl;
373
                         Tk = ri[WS(rs, 4)];
374
                         Tm = ii[WS(rs, 4)];
375
                         Tj = W[6];
376
                         Tl = W[7];
377
                         Tn = FMA(Tj, Tk, Tl * Tm);
378
                         TT = FNMS(Tl, Tk, Tj * Tm);
379
                    }
380
                    {
381
                         E Tp, Tr, To, Tq;
382
                         Tp = ri[WS(rs, 7)];
383
                         Tr = ii[WS(rs, 7)];
384
                         To = W[12];
385
                         Tq = W[13];
386
                         Ts = FMA(To, Tp, Tq * Tr);
387
                         TU = FNMS(Tq, Tp, To * Tr);
388
                    }
389
                    Tt = Tn + Ts;
390
                    TZ = TT + TU;
391
                    Tu = Ti + Tt;
392
                    T1w = TY + TZ;
393
                    {
394
                         E TS, TV, TX, T10;
395
                         TS = FNMS(KP500000000, Tt, Ti);
396
                         TV = KP866025403 * (TT - TU);
397
                         TW = TS + TV;
398
                         T1k = TS - TV;
399
                         TX = KP866025403 * (Ts - Tn);
400
                         T10 = FNMS(KP500000000, TZ, TY);
401
                         T11 = TX + T10;
402
                         T1l = T10 - TX;
403
                    }
404
               }
405
               {
406
                    E T1y, Td, TM, T1v;
407
                    T1y = KP866025403 * (T1w - T1x);
408
                    Td = T1 + Tc;
409
                    TM = Tu + TL;
410
                    T1v = FNMS(KP500000000, TM, Td);
411
                    ri[0] = Td + TM;
412
                    ri[WS(rs, 3)] = T1v + T1y;
413
                    ri[WS(rs, 6)] = T1v - T1y;
414
               }
415
               {
416
                    E T1D, T1z, T1C, T1E;
417
                    T1D = KP866025403 * (TL - Tu);
418
                    T1z = T1w + T1x;
419
                    T1C = T1A + T1B;
420
                    T1E = FNMS(KP500000000, T1z, T1C);
421
                    ii[0] = T1z + T1C;
422
                    ii[WS(rs, 6)] = T1E - T1D;
423
                    ii[WS(rs, 3)] = T1D + T1E;
424
               }
425
               {
426
                    E TR, T1I, T1e, T1J, T1i, T1F, T1f, T1K;
427
                    TR = TN + TQ;
428
                    T1I = T1G + T1H;
429
                    {
430
                         E T12, T1d, T1g, T1h;
431
                         T12 = FMA(KP766044443, TW, KP642787609 * T11);
432
                         T1d = FMA(KP173648177, T17, KP984807753 * T1c);
433
                         T1e = T12 + T1d;
434
                         T1J = KP866025403 * (T1d - T12);
435
                         T1g = FNMS(KP642787609, TW, KP766044443 * T11);
436
                         T1h = FNMS(KP984807753, T17, KP173648177 * T1c);
437
                         T1i = KP866025403 * (T1g - T1h);
438
                         T1F = T1g + T1h;
439
                    }
440
                    ri[WS(rs, 1)] = TR + T1e;
441
                    ii[WS(rs, 1)] = T1F + T1I;
442
                    T1f = FNMS(KP500000000, T1e, TR);
443
                    ri[WS(rs, 7)] = T1f - T1i;
444
                    ri[WS(rs, 4)] = T1f + T1i;
445
                    T1K = FNMS(KP500000000, T1F, T1I);
446
                    ii[WS(rs, 4)] = T1J + T1K;
447
                    ii[WS(rs, 7)] = T1K - T1J;
448
               }
449
               {
450
                    E T1j, T1M, T1q, T1N, T1u, T1L, T1r, T1O;
451
                    T1j = TN - TQ;
452
                    T1M = T1H - T1G;
453
                    {
454
                         E T1m, T1p, T1s, T1t;
455
                         T1m = FMA(KP173648177, T1k, KP984807753 * T1l);
456
                         T1p = FNMS(KP939692620, T1o, KP342020143 * T1n);
457
                         T1q = T1m + T1p;
458
                         T1N = KP866025403 * (T1p - T1m);
459
                         T1s = FNMS(KP984807753, T1k, KP173648177 * T1l);
460
                         T1t = FMA(KP342020143, T1o, KP939692620 * T1n);
461
                         T1u = KP866025403 * (T1s + T1t);
462
                         T1L = T1s - T1t;
463
                    }
464
                    ri[WS(rs, 2)] = T1j + T1q;
465
                    ii[WS(rs, 2)] = T1L + T1M;
466
                    T1r = FNMS(KP500000000, T1q, T1j);
467
                    ri[WS(rs, 8)] = T1r - T1u;
468
                    ri[WS(rs, 5)] = T1r + T1u;
469
                    T1O = FNMS(KP500000000, T1L, T1M);
470
                    ii[WS(rs, 5)] = T1N + T1O;
471
                    ii[WS(rs, 8)] = T1O - T1N;
472
               }
473
          }
474
     }
475
}
476

    
477
static const tw_instr twinstr[] = {
478
     {TW_FULL, 0, 9},
479
     {TW_NEXT, 1, 0}
480
};
481

    
482
static const ct_desc desc = { 9, "t1_9", twinstr, &GENUS, {60, 36, 36, 0}, 0, 0, 0 };
483

    
484
void X(codelet_t1_9) (planner *p) {
485
     X(kdft_dit_register) (p, t1_9, &desc);
486
}
487
#endif