To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_10.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (13.1 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:14 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 10 -name t1_10 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 102 FP additions, 72 FP multiplications,
32
 * (or, 48 additions, 18 multiplications, 54 fused multiply/add),
33
 * 47 stack variables, 4 constants, and 40 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
42
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
43
     {
44
          INT m;
45
          for (m = mb, W = W + (mb * 18); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
46
               E T8, T23, T12, T1U, TM, TZ, T10, T1F, T1G, T1P, T16, T17, T18, T1s, T1x;
47
               E T25, Tl, Ty, Tz, T1I, T1J, T1O, T13, T14, T15, T1h, T1m, T24;
48
               {
49
                    E T1, T1T, T3, T6, T4, T1R, T2, T7, T1S, T5;
50
                    T1 = ri[0];
51
                    T1T = ii[0];
52
                    T3 = ri[WS(rs, 5)];
53
                    T6 = ii[WS(rs, 5)];
54
                    T2 = W[8];
55
                    T4 = T2 * T3;
56
                    T1R = T2 * T6;
57
                    T5 = W[9];
58
                    T7 = FMA(T5, T6, T4);
59
                    T1S = FNMS(T5, T3, T1R);
60
                    T8 = T1 - T7;
61
                    T23 = T1T - T1S;
62
                    T12 = T1 + T7;
63
                    T1U = T1S + T1T;
64
               }
65
               {
66
                    E TF, T1p, TY, T1w, TL, T1r, TS, T1u;
67
                    {
68
                         E TB, TE, TC, T1o, TA, TD;
69
                         TB = ri[WS(rs, 4)];
70
                         TE = ii[WS(rs, 4)];
71
                         TA = W[6];
72
                         TC = TA * TB;
73
                         T1o = TA * TE;
74
                         TD = W[7];
75
                         TF = FMA(TD, TE, TC);
76
                         T1p = FNMS(TD, TB, T1o);
77
                    }
78
                    {
79
                         E TU, TX, TV, T1v, TT, TW;
80
                         TU = ri[WS(rs, 1)];
81
                         TX = ii[WS(rs, 1)];
82
                         TT = W[0];
83
                         TV = TT * TU;
84
                         T1v = TT * TX;
85
                         TW = W[1];
86
                         TY = FMA(TW, TX, TV);
87
                         T1w = FNMS(TW, TU, T1v);
88
                    }
89
                    {
90
                         E TH, TK, TI, T1q, TG, TJ;
91
                         TH = ri[WS(rs, 9)];
92
                         TK = ii[WS(rs, 9)];
93
                         TG = W[16];
94
                         TI = TG * TH;
95
                         T1q = TG * TK;
96
                         TJ = W[17];
97
                         TL = FMA(TJ, TK, TI);
98
                         T1r = FNMS(TJ, TH, T1q);
99
                    }
100
                    {
101
                         E TO, TR, TP, T1t, TN, TQ;
102
                         TO = ri[WS(rs, 6)];
103
                         TR = ii[WS(rs, 6)];
104
                         TN = W[10];
105
                         TP = TN * TO;
106
                         T1t = TN * TR;
107
                         TQ = W[11];
108
                         TS = FMA(TQ, TR, TP);
109
                         T1u = FNMS(TQ, TO, T1t);
110
                    }
111
                    TM = TF - TL;
112
                    TZ = TS - TY;
113
                    T10 = TM + TZ;
114
                    T1F = T1p + T1r;
115
                    T1G = T1u + T1w;
116
                    T1P = T1F + T1G;
117
                    T16 = TF + TL;
118
                    T17 = TS + TY;
119
                    T18 = T16 + T17;
120
                    T1s = T1p - T1r;
121
                    T1x = T1u - T1w;
122
                    T25 = T1s + T1x;
123
               }
124
               {
125
                    E Te, T1e, Tx, T1l, Tk, T1g, Tr, T1j;
126
                    {
127
                         E Ta, Td, Tb, T1d, T9, Tc;
128
                         Ta = ri[WS(rs, 2)];
129
                         Td = ii[WS(rs, 2)];
130
                         T9 = W[2];
131
                         Tb = T9 * Ta;
132
                         T1d = T9 * Td;
133
                         Tc = W[3];
134
                         Te = FMA(Tc, Td, Tb);
135
                         T1e = FNMS(Tc, Ta, T1d);
136
                    }
137
                    {
138
                         E Tt, Tw, Tu, T1k, Ts, Tv;
139
                         Tt = ri[WS(rs, 3)];
140
                         Tw = ii[WS(rs, 3)];
141
                         Ts = W[4];
142
                         Tu = Ts * Tt;
143
                         T1k = Ts * Tw;
144
                         Tv = W[5];
145
                         Tx = FMA(Tv, Tw, Tu);
146
                         T1l = FNMS(Tv, Tt, T1k);
147
                    }
148
                    {
149
                         E Tg, Tj, Th, T1f, Tf, Ti;
150
                         Tg = ri[WS(rs, 7)];
151
                         Tj = ii[WS(rs, 7)];
152
                         Tf = W[12];
153
                         Th = Tf * Tg;
154
                         T1f = Tf * Tj;
155
                         Ti = W[13];
156
                         Tk = FMA(Ti, Tj, Th);
157
                         T1g = FNMS(Ti, Tg, T1f);
158
                    }
159
                    {
160
                         E Tn, Tq, To, T1i, Tm, Tp;
161
                         Tn = ri[WS(rs, 8)];
162
                         Tq = ii[WS(rs, 8)];
163
                         Tm = W[14];
164
                         To = Tm * Tn;
165
                         T1i = Tm * Tq;
166
                         Tp = W[15];
167
                         Tr = FMA(Tp, Tq, To);
168
                         T1j = FNMS(Tp, Tn, T1i);
169
                    }
170
                    Tl = Te - Tk;
171
                    Ty = Tr - Tx;
172
                    Tz = Tl + Ty;
173
                    T1I = T1e + T1g;
174
                    T1J = T1j + T1l;
175
                    T1O = T1I + T1J;
176
                    T13 = Te + Tk;
177
                    T14 = Tr + Tx;
178
                    T15 = T13 + T14;
179
                    T1h = T1e - T1g;
180
                    T1m = T1j - T1l;
181
                    T24 = T1h + T1m;
182
               }
183
               {
184
                    E T1b, T11, T1a, T1z, T1B, T1n, T1y, T1A, T1c;
185
                    T1b = Tz - T10;
186
                    T11 = Tz + T10;
187
                    T1a = FNMS(KP250000000, T11, T8);
188
                    T1n = T1h - T1m;
189
                    T1y = T1s - T1x;
190
                    T1z = FMA(KP618033988, T1y, T1n);
191
                    T1B = FNMS(KP618033988, T1n, T1y);
192
                    ri[WS(rs, 5)] = T8 + T11;
193
                    T1A = FNMS(KP559016994, T1b, T1a);
194
                    ri[WS(rs, 7)] = FNMS(KP951056516, T1B, T1A);
195
                    ri[WS(rs, 3)] = FMA(KP951056516, T1B, T1A);
196
                    T1c = FMA(KP559016994, T1b, T1a);
197
                    ri[WS(rs, 9)] = FNMS(KP951056516, T1z, T1c);
198
                    ri[WS(rs, 1)] = FMA(KP951056516, T1z, T1c);
199
               }
200
               {
201
                    E T28, T26, T27, T2c, T2e, T2a, T2b, T2d, T29;
202
                    T28 = T24 - T25;
203
                    T26 = T24 + T25;
204
                    T27 = FNMS(KP250000000, T26, T23);
205
                    T2a = Tl - Ty;
206
                    T2b = TM - TZ;
207
                    T2c = FMA(KP618033988, T2b, T2a);
208
                    T2e = FNMS(KP618033988, T2a, T2b);
209
                    ii[WS(rs, 5)] = T26 + T23;
210
                    T2d = FNMS(KP559016994, T28, T27);
211
                    ii[WS(rs, 3)] = FNMS(KP951056516, T2e, T2d);
212
                    ii[WS(rs, 7)] = FMA(KP951056516, T2e, T2d);
213
                    T29 = FMA(KP559016994, T28, T27);
214
                    ii[WS(rs, 1)] = FNMS(KP951056516, T2c, T29);
215
                    ii[WS(rs, 9)] = FMA(KP951056516, T2c, T29);
216
               }
217
               {
218
                    E T1D, T19, T1C, T1L, T1N, T1H, T1K, T1M, T1E;
219
                    T1D = T15 - T18;
220
                    T19 = T15 + T18;
221
                    T1C = FNMS(KP250000000, T19, T12);
222
                    T1H = T1F - T1G;
223
                    T1K = T1I - T1J;
224
                    T1L = FNMS(KP618033988, T1K, T1H);
225
                    T1N = FMA(KP618033988, T1H, T1K);
226
                    ri[0] = T12 + T19;
227
                    T1M = FMA(KP559016994, T1D, T1C);
228
                    ri[WS(rs, 4)] = FNMS(KP951056516, T1N, T1M);
229
                    ri[WS(rs, 6)] = FMA(KP951056516, T1N, T1M);
230
                    T1E = FNMS(KP559016994, T1D, T1C);
231
                    ri[WS(rs, 2)] = FNMS(KP951056516, T1L, T1E);
232
                    ri[WS(rs, 8)] = FMA(KP951056516, T1L, T1E);
233
               }
234
               {
235
                    E T1W, T1Q, T1V, T20, T22, T1Y, T1Z, T21, T1X;
236
                    T1W = T1O - T1P;
237
                    T1Q = T1O + T1P;
238
                    T1V = FNMS(KP250000000, T1Q, T1U);
239
                    T1Y = T16 - T17;
240
                    T1Z = T13 - T14;
241
                    T20 = FNMS(KP618033988, T1Z, T1Y);
242
                    T22 = FMA(KP618033988, T1Y, T1Z);
243
                    ii[0] = T1Q + T1U;
244
                    T21 = FMA(KP559016994, T1W, T1V);
245
                    ii[WS(rs, 4)] = FMA(KP951056516, T22, T21);
246
                    ii[WS(rs, 6)] = FNMS(KP951056516, T22, T21);
247
                    T1X = FNMS(KP559016994, T1W, T1V);
248
                    ii[WS(rs, 2)] = FMA(KP951056516, T20, T1X);
249
                    ii[WS(rs, 8)] = FNMS(KP951056516, T20, T1X);
250
               }
251
          }
252
     }
253
}
254

    
255
static const tw_instr twinstr[] = {
256
     {TW_FULL, 0, 10},
257
     {TW_NEXT, 1, 0}
258
};
259

    
260
static const ct_desc desc = { 10, "t1_10", twinstr, &GENUS, {48, 18, 54, 0}, 0, 0, 0 };
261

    
262
void X(codelet_t1_10) (planner *p) {
263
     X(kdft_dit_register) (p, t1_10, &desc);
264
}
265
#else
266

    
267
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 10 -name t1_10 -include dft/scalar/t.h */
268

    
269
/*
270
 * This function contains 102 FP additions, 60 FP multiplications,
271
 * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
272
 * 45 stack variables, 4 constants, and 40 memory accesses
273
 */
274
#include "dft/scalar/t.h"
275

    
276
static void t1_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
277
{
278
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
279
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
280
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
281
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
282
     {
283
          INT m;
284
          for (m = mb, W = W + (mb * 18); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 18, MAKE_VOLATILE_STRIDE(20, rs)) {
285
               E T7, T1O, TT, T1C, TF, TQ, TR, T1o, T1p, T1y, TX, TY, TZ, T1d, T1g;
286
               E T1M, Ti, Tt, Tu, T1r, T1s, T1x, TU, TV, TW, T16, T19, T1L;
287
               {
288
                    E T1, T1B, T6, T1A;
289
                    T1 = ri[0];
290
                    T1B = ii[0];
291
                    {
292
                         E T3, T5, T2, T4;
293
                         T3 = ri[WS(rs, 5)];
294
                         T5 = ii[WS(rs, 5)];
295
                         T2 = W[8];
296
                         T4 = W[9];
297
                         T6 = FMA(T2, T3, T4 * T5);
298
                         T1A = FNMS(T4, T3, T2 * T5);
299
                    }
300
                    T7 = T1 - T6;
301
                    T1O = T1B - T1A;
302
                    TT = T1 + T6;
303
                    T1C = T1A + T1B;
304
               }
305
               {
306
                    E Tz, T1b, TP, T1f, TE, T1c, TK, T1e;
307
                    {
308
                         E Tw, Ty, Tv, Tx;
309
                         Tw = ri[WS(rs, 4)];
310
                         Ty = ii[WS(rs, 4)];
311
                         Tv = W[6];
312
                         Tx = W[7];
313
                         Tz = FMA(Tv, Tw, Tx * Ty);
314
                         T1b = FNMS(Tx, Tw, Tv * Ty);
315
                    }
316
                    {
317
                         E TM, TO, TL, TN;
318
                         TM = ri[WS(rs, 1)];
319
                         TO = ii[WS(rs, 1)];
320
                         TL = W[0];
321
                         TN = W[1];
322
                         TP = FMA(TL, TM, TN * TO);
323
                         T1f = FNMS(TN, TM, TL * TO);
324
                    }
325
                    {
326
                         E TB, TD, TA, TC;
327
                         TB = ri[WS(rs, 9)];
328
                         TD = ii[WS(rs, 9)];
329
                         TA = W[16];
330
                         TC = W[17];
331
                         TE = FMA(TA, TB, TC * TD);
332
                         T1c = FNMS(TC, TB, TA * TD);
333
                    }
334
                    {
335
                         E TH, TJ, TG, TI;
336
                         TH = ri[WS(rs, 6)];
337
                         TJ = ii[WS(rs, 6)];
338
                         TG = W[10];
339
                         TI = W[11];
340
                         TK = FMA(TG, TH, TI * TJ);
341
                         T1e = FNMS(TI, TH, TG * TJ);
342
                    }
343
                    TF = Tz - TE;
344
                    TQ = TK - TP;
345
                    TR = TF + TQ;
346
                    T1o = T1b + T1c;
347
                    T1p = T1e + T1f;
348
                    T1y = T1o + T1p;
349
                    TX = Tz + TE;
350
                    TY = TK + TP;
351
                    TZ = TX + TY;
352
                    T1d = T1b - T1c;
353
                    T1g = T1e - T1f;
354
                    T1M = T1d + T1g;
355
               }
356
               {
357
                    E Tc, T14, Ts, T18, Th, T15, Tn, T17;
358
                    {
359
                         E T9, Tb, T8, Ta;
360
                         T9 = ri[WS(rs, 2)];
361
                         Tb = ii[WS(rs, 2)];
362
                         T8 = W[2];
363
                         Ta = W[3];
364
                         Tc = FMA(T8, T9, Ta * Tb);
365
                         T14 = FNMS(Ta, T9, T8 * Tb);
366
                    }
367
                    {
368
                         E Tp, Tr, To, Tq;
369
                         Tp = ri[WS(rs, 3)];
370
                         Tr = ii[WS(rs, 3)];
371
                         To = W[4];
372
                         Tq = W[5];
373
                         Ts = FMA(To, Tp, Tq * Tr);
374
                         T18 = FNMS(Tq, Tp, To * Tr);
375
                    }
376
                    {
377
                         E Te, Tg, Td, Tf;
378
                         Te = ri[WS(rs, 7)];
379
                         Tg = ii[WS(rs, 7)];
380
                         Td = W[12];
381
                         Tf = W[13];
382
                         Th = FMA(Td, Te, Tf * Tg);
383
                         T15 = FNMS(Tf, Te, Td * Tg);
384
                    }
385
                    {
386
                         E Tk, Tm, Tj, Tl;
387
                         Tk = ri[WS(rs, 8)];
388
                         Tm = ii[WS(rs, 8)];
389
                         Tj = W[14];
390
                         Tl = W[15];
391
                         Tn = FMA(Tj, Tk, Tl * Tm);
392
                         T17 = FNMS(Tl, Tk, Tj * Tm);
393
                    }
394
                    Ti = Tc - Th;
395
                    Tt = Tn - Ts;
396
                    Tu = Ti + Tt;
397
                    T1r = T14 + T15;
398
                    T1s = T17 + T18;
399
                    T1x = T1r + T1s;
400
                    TU = Tc + Th;
401
                    TV = Tn + Ts;
402
                    TW = TU + TV;
403
                    T16 = T14 - T15;
404
                    T19 = T17 - T18;
405
                    T1L = T16 + T19;
406
               }
407
               {
408
                    E T11, TS, T12, T1i, T1k, T1a, T1h, T1j, T13;
409
                    T11 = KP559016994 * (Tu - TR);
410
                    TS = Tu + TR;
411
                    T12 = FNMS(KP250000000, TS, T7);
412
                    T1a = T16 - T19;
413
                    T1h = T1d - T1g;
414
                    T1i = FMA(KP951056516, T1a, KP587785252 * T1h);
415
                    T1k = FNMS(KP587785252, T1a, KP951056516 * T1h);
416
                    ri[WS(rs, 5)] = T7 + TS;
417
                    T1j = T12 - T11;
418
                    ri[WS(rs, 7)] = T1j - T1k;
419
                    ri[WS(rs, 3)] = T1j + T1k;
420
                    T13 = T11 + T12;
421
                    ri[WS(rs, 9)] = T13 - T1i;
422
                    ri[WS(rs, 1)] = T13 + T1i;
423
               }
424
               {
425
                    E T1N, T1P, T1Q, T1U, T1W, T1S, T1T, T1V, T1R;
426
                    T1N = KP559016994 * (T1L - T1M);
427
                    T1P = T1L + T1M;
428
                    T1Q = FNMS(KP250000000, T1P, T1O);
429
                    T1S = Ti - Tt;
430
                    T1T = TF - TQ;
431
                    T1U = FMA(KP951056516, T1S, KP587785252 * T1T);
432
                    T1W = FNMS(KP587785252, T1S, KP951056516 * T1T);
433
                    ii[WS(rs, 5)] = T1P + T1O;
434
                    T1V = T1Q - T1N;
435
                    ii[WS(rs, 3)] = T1V - T1W;
436
                    ii[WS(rs, 7)] = T1W + T1V;
437
                    T1R = T1N + T1Q;
438
                    ii[WS(rs, 1)] = T1R - T1U;
439
                    ii[WS(rs, 9)] = T1U + T1R;
440
               }
441
               {
442
                    E T1m, T10, T1l, T1u, T1w, T1q, T1t, T1v, T1n;
443
                    T1m = KP559016994 * (TW - TZ);
444
                    T10 = TW + TZ;
445
                    T1l = FNMS(KP250000000, T10, TT);
446
                    T1q = T1o - T1p;
447
                    T1t = T1r - T1s;
448
                    T1u = FNMS(KP587785252, T1t, KP951056516 * T1q);
449
                    T1w = FMA(KP951056516, T1t, KP587785252 * T1q);
450
                    ri[0] = TT + T10;
451
                    T1v = T1m + T1l;
452
                    ri[WS(rs, 4)] = T1v - T1w;
453
                    ri[WS(rs, 6)] = T1v + T1w;
454
                    T1n = T1l - T1m;
455
                    ri[WS(rs, 2)] = T1n - T1u;
456
                    ri[WS(rs, 8)] = T1n + T1u;
457
               }
458
               {
459
                    E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I;
460
                    T1H = KP559016994 * (T1x - T1y);
461
                    T1z = T1x + T1y;
462
                    T1G = FNMS(KP250000000, T1z, T1C);
463
                    T1D = TX - TY;
464
                    T1E = TU - TV;
465
                    T1F = FNMS(KP587785252, T1E, KP951056516 * T1D);
466
                    T1J = FMA(KP951056516, T1E, KP587785252 * T1D);
467
                    ii[0] = T1z + T1C;
468
                    T1K = T1H + T1G;
469
                    ii[WS(rs, 4)] = T1J + T1K;
470
                    ii[WS(rs, 6)] = T1K - T1J;
471
                    T1I = T1G - T1H;
472
                    ii[WS(rs, 2)] = T1F + T1I;
473
                    ii[WS(rs, 8)] = T1I - T1F;
474
               }
475
          }
476
     }
477
}
478

    
479
static const tw_instr twinstr[] = {
480
     {TW_FULL, 0, 10},
481
     {TW_NEXT, 1, 0}
482
};
483

    
484
static const ct_desc desc = { 10, "t1_10", twinstr, &GENUS, {72, 30, 30, 0}, 0, 0, 0 };
485

    
486
void X(codelet_t1_10) (planner *p) {
487
     X(kdft_dit_register) (p, t1_10, &desc);
488
}
489
#endif