To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_9.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (11.1 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:10 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 9 -name n1_9 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 80 FP additions, 56 FP multiplications,
32
 * (or, 24 additions, 0 multiplications, 56 fused multiply/add),
33
 * 41 stack variables, 10 constants, and 36 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP954188894, +0.954188894138671133499268364187245676532219158);
40
     DK(KP363970234, +0.363970234266202361351047882776834043890471784);
41
     DK(KP852868531, +0.852868531952443209628250963940074071936020296);
42
     DK(KP492403876, +0.492403876506104029683371512294761506835321626);
43
     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
44
     DK(KP777861913, +0.777861913430206160028177977318626690410586096);
45
     DK(KP839099631, +0.839099631177280011763127298123181364687434283);
46
     DK(KP176326980, +0.176326980708464973471090386868618986121633062);
47
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
48
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
49
     {
50
          INT i;
51
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(36, is), MAKE_VOLATILE_STRIDE(36, os)) {
52
               E T5, TL, Tm, Tl, T1f, TM, Ta, T1c, TF, TW, TI, TX, Tf, T1d, Ts;
53
               E TZ, Tx, T10;
54
               {
55
                    E T1, T2, T3, T4;
56
                    T1 = ri[0];
57
                    T2 = ri[WS(is, 3)];
58
                    T3 = ri[WS(is, 6)];
59
                    T4 = T2 + T3;
60
                    T5 = T1 + T4;
61
                    TL = FNMS(KP500000000, T4, T1);
62
                    Tm = T3 - T2;
63
               }
64
               {
65
                    E Th, Ti, Tj, Tk;
66
                    Th = ii[0];
67
                    Ti = ii[WS(is, 3)];
68
                    Tj = ii[WS(is, 6)];
69
                    Tk = Ti + Tj;
70
                    Tl = FNMS(KP500000000, Tk, Th);
71
                    T1f = Th + Tk;
72
                    TM = Ti - Tj;
73
               }
74
               {
75
                    E T6, Tz, T9, TE, TC, TH, TD, TG;
76
                    T6 = ri[WS(is, 1)];
77
                    Tz = ii[WS(is, 1)];
78
                    {
79
                         E T7, T8, TA, TB;
80
                         T7 = ri[WS(is, 4)];
81
                         T8 = ri[WS(is, 7)];
82
                         T9 = T7 + T8;
83
                         TE = T7 - T8;
84
                         TA = ii[WS(is, 4)];
85
                         TB = ii[WS(is, 7)];
86
                         TC = TA + TB;
87
                         TH = TB - TA;
88
                    }
89
                    Ta = T6 + T9;
90
                    T1c = Tz + TC;
91
                    TD = FNMS(KP500000000, TC, Tz);
92
                    TF = FNMS(KP866025403, TE, TD);
93
                    TW = FMA(KP866025403, TE, TD);
94
                    TG = FNMS(KP500000000, T9, T6);
95
                    TI = FNMS(KP866025403, TH, TG);
96
                    TX = FMA(KP866025403, TH, TG);
97
               }
98
               {
99
                    E Tb, Tt, Te, Tw, Tr, Tu, To, Tv;
100
                    Tb = ri[WS(is, 2)];
101
                    Tt = ii[WS(is, 2)];
102
                    {
103
                         E Tc, Td, Tp, Tq;
104
                         Tc = ri[WS(is, 5)];
105
                         Td = ri[WS(is, 8)];
106
                         Te = Tc + Td;
107
                         Tw = Td - Tc;
108
                         Tp = ii[WS(is, 5)];
109
                         Tq = ii[WS(is, 8)];
110
                         Tr = Tp - Tq;
111
                         Tu = Tp + Tq;
112
                    }
113
                    Tf = Tb + Te;
114
                    T1d = Tt + Tu;
115
                    To = FNMS(KP500000000, Te, Tb);
116
                    Ts = FMA(KP866025403, Tr, To);
117
                    TZ = FNMS(KP866025403, Tr, To);
118
                    Tv = FNMS(KP500000000, Tu, Tt);
119
                    Tx = FMA(KP866025403, Tw, Tv);
120
                    T10 = FNMS(KP866025403, Tw, Tv);
121
               }
122
               {
123
                    E T1e, Tg, T1b, T1i, T1g, T1h;
124
                    T1e = T1c - T1d;
125
                    Tg = Ta + Tf;
126
                    T1b = FNMS(KP500000000, Tg, T5);
127
                    ro[0] = T5 + Tg;
128
                    ro[WS(os, 3)] = FMA(KP866025403, T1e, T1b);
129
                    ro[WS(os, 6)] = FNMS(KP866025403, T1e, T1b);
130
                    T1i = Tf - Ta;
131
                    T1g = T1c + T1d;
132
                    T1h = FNMS(KP500000000, T1g, T1f);
133
                    io[WS(os, 3)] = FMA(KP866025403, T1i, T1h);
134
                    io[0] = T1f + T1g;
135
                    io[WS(os, 6)] = FNMS(KP866025403, T1i, T1h);
136
               }
137
               {
138
                    E Tn, TN, TK, TS, TQ, TU, TR, TT;
139
                    Tn = FMA(KP866025403, Tm, Tl);
140
                    TN = FMA(KP866025403, TM, TL);
141
                    {
142
                         E Ty, TJ, TO, TP;
143
                         Ty = FNMS(KP176326980, Tx, Ts);
144
                         TJ = FNMS(KP839099631, TI, TF);
145
                         TK = FNMS(KP777861913, TJ, Ty);
146
                         TS = FMA(KP777861913, TJ, Ty);
147
                         TO = FMA(KP176326980, Ts, Tx);
148
                         TP = FMA(KP839099631, TF, TI);
149
                         TQ = FMA(KP777861913, TP, TO);
150
                         TU = FNMS(KP777861913, TP, TO);
151
                    }
152
                    io[WS(os, 1)] = FNMS(KP984807753, TK, Tn);
153
                    ro[WS(os, 1)] = FMA(KP984807753, TQ, TN);
154
                    TR = FNMS(KP492403876, TQ, TN);
155
                    ro[WS(os, 4)] = FMA(KP852868531, TS, TR);
156
                    ro[WS(os, 7)] = FNMS(KP852868531, TS, TR);
157
                    TT = FMA(KP492403876, TK, Tn);
158
                    io[WS(os, 7)] = FNMS(KP852868531, TU, TT);
159
                    io[WS(os, 4)] = FMA(KP852868531, TU, TT);
160
               }
161
               {
162
                    E TV, T17, T12, T1a, T16, T18, T13, T19;
163
                    TV = FNMS(KP866025403, TM, TL);
164
                    T17 = FNMS(KP866025403, Tm, Tl);
165
                    {
166
                         E TY, T11, T14, T15;
167
                         TY = FMA(KP176326980, TX, TW);
168
                         T11 = FNMS(KP363970234, T10, TZ);
169
                         T12 = FNMS(KP954188894, T11, TY);
170
                         T1a = FMA(KP954188894, T11, TY);
171
                         T14 = FNMS(KP176326980, TW, TX);
172
                         T15 = FMA(KP363970234, TZ, T10);
173
                         T16 = FNMS(KP954188894, T15, T14);
174
                         T18 = FMA(KP954188894, T15, T14);
175
                    }
176
                    ro[WS(os, 2)] = FMA(KP984807753, T12, TV);
177
                    io[WS(os, 2)] = FNMS(KP984807753, T18, T17);
178
                    T13 = FNMS(KP492403876, T12, TV);
179
                    ro[WS(os, 5)] = FNMS(KP852868531, T16, T13);
180
                    ro[WS(os, 8)] = FMA(KP852868531, T16, T13);
181
                    T19 = FMA(KP492403876, T18, T17);
182
                    io[WS(os, 5)] = FNMS(KP852868531, T1a, T19);
183
                    io[WS(os, 8)] = FMA(KP852868531, T1a, T19);
184
               }
185
          }
186
     }
187
}
188

    
189
static const kdft_desc desc = { 9, "n1_9", {24, 0, 56, 0}, &GENUS, 0, 0, 0, 0 };
190

    
191
void X(codelet_n1_9) (planner *p) {
192
     X(kdft_register) (p, n1_9, &desc);
193
}
194

    
195
#else
196

    
197
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 9 -name n1_9 -include dft/scalar/n.h */
198

    
199
/*
200
 * This function contains 80 FP additions, 40 FP multiplications,
201
 * (or, 60 additions, 20 multiplications, 20 fused multiply/add),
202
 * 39 stack variables, 8 constants, and 36 memory accesses
203
 */
204
#include "dft/scalar/n.h"
205

    
206
static void n1_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
207
{
208
     DK(KP939692620, +0.939692620785908384054109277324731469936208134);
209
     DK(KP342020143, +0.342020143325668733044099614682259580763083368);
210
     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
211
     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
212
     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
213
     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
214
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
215
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
216
     {
217
          INT i;
218
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(36, is), MAKE_VOLATILE_STRIDE(36, os)) {
219
               E T5, TO, Th, Tk, T1g, TR, Ta, T1c, Tq, TW, Tv, TX, Tf, T1d, TB;
220
               E T10, TG, TZ;
221
               {
222
                    E T1, T2, T3, T4;
223
                    T1 = ri[0];
224
                    T2 = ri[WS(is, 3)];
225
                    T3 = ri[WS(is, 6)];
226
                    T4 = T2 + T3;
227
                    T5 = T1 + T4;
228
                    TO = KP866025403 * (T3 - T2);
229
                    Th = FNMS(KP500000000, T4, T1);
230
               }
231
               {
232
                    E TP, Ti, Tj, TQ;
233
                    TP = ii[0];
234
                    Ti = ii[WS(is, 3)];
235
                    Tj = ii[WS(is, 6)];
236
                    TQ = Ti + Tj;
237
                    Tk = KP866025403 * (Ti - Tj);
238
                    T1g = TP + TQ;
239
                    TR = FNMS(KP500000000, TQ, TP);
240
               }
241
               {
242
                    E T6, Ts, T9, Tr, Tp, Tt, Tm, Tu;
243
                    T6 = ri[WS(is, 1)];
244
                    Ts = ii[WS(is, 1)];
245
                    {
246
                         E T7, T8, Tn, To;
247
                         T7 = ri[WS(is, 4)];
248
                         T8 = ri[WS(is, 7)];
249
                         T9 = T7 + T8;
250
                         Tr = KP866025403 * (T8 - T7);
251
                         Tn = ii[WS(is, 4)];
252
                         To = ii[WS(is, 7)];
253
                         Tp = KP866025403 * (Tn - To);
254
                         Tt = Tn + To;
255
                    }
256
                    Ta = T6 + T9;
257
                    T1c = Ts + Tt;
258
                    Tm = FNMS(KP500000000, T9, T6);
259
                    Tq = Tm + Tp;
260
                    TW = Tm - Tp;
261
                    Tu = FNMS(KP500000000, Tt, Ts);
262
                    Tv = Tr + Tu;
263
                    TX = Tu - Tr;
264
               }
265
               {
266
                    E Tb, TD, Te, TC, TA, TE, Tx, TF;
267
                    Tb = ri[WS(is, 2)];
268
                    TD = ii[WS(is, 2)];
269
                    {
270
                         E Tc, Td, Ty, Tz;
271
                         Tc = ri[WS(is, 5)];
272
                         Td = ri[WS(is, 8)];
273
                         Te = Tc + Td;
274
                         TC = KP866025403 * (Td - Tc);
275
                         Ty = ii[WS(is, 5)];
276
                         Tz = ii[WS(is, 8)];
277
                         TA = KP866025403 * (Ty - Tz);
278
                         TE = Ty + Tz;
279
                    }
280
                    Tf = Tb + Te;
281
                    T1d = TD + TE;
282
                    Tx = FNMS(KP500000000, Te, Tb);
283
                    TB = Tx + TA;
284
                    T10 = Tx - TA;
285
                    TF = FNMS(KP500000000, TE, TD);
286
                    TG = TC + TF;
287
                    TZ = TF - TC;
288
               }
289
               {
290
                    E T1e, Tg, T1b, T1f, T1h, T1i;
291
                    T1e = KP866025403 * (T1c - T1d);
292
                    Tg = Ta + Tf;
293
                    T1b = FNMS(KP500000000, Tg, T5);
294
                    ro[0] = T5 + Tg;
295
                    ro[WS(os, 3)] = T1b + T1e;
296
                    ro[WS(os, 6)] = T1b - T1e;
297
                    T1f = KP866025403 * (Tf - Ta);
298
                    T1h = T1c + T1d;
299
                    T1i = FNMS(KP500000000, T1h, T1g);
300
                    io[WS(os, 3)] = T1f + T1i;
301
                    io[0] = T1g + T1h;
302
                    io[WS(os, 6)] = T1i - T1f;
303
               }
304
               {
305
                    E Tl, TS, TI, TN, TM, TT, TJ, TU;
306
                    Tl = Th + Tk;
307
                    TS = TO + TR;
308
                    {
309
                         E Tw, TH, TK, TL;
310
                         Tw = FMA(KP766044443, Tq, KP642787609 * Tv);
311
                         TH = FMA(KP173648177, TB, KP984807753 * TG);
312
                         TI = Tw + TH;
313
                         TN = KP866025403 * (TH - Tw);
314
                         TK = FNMS(KP642787609, Tq, KP766044443 * Tv);
315
                         TL = FNMS(KP984807753, TB, KP173648177 * TG);
316
                         TM = KP866025403 * (TK - TL);
317
                         TT = TK + TL;
318
                    }
319
                    ro[WS(os, 1)] = Tl + TI;
320
                    io[WS(os, 1)] = TS + TT;
321
                    TJ = FNMS(KP500000000, TI, Tl);
322
                    ro[WS(os, 7)] = TJ - TM;
323
                    ro[WS(os, 4)] = TJ + TM;
324
                    TU = FNMS(KP500000000, TT, TS);
325
                    io[WS(os, 4)] = TN + TU;
326
                    io[WS(os, 7)] = TU - TN;
327
               }
328
               {
329
                    E TV, T14, T12, T13, T17, T1a, T18, T19;
330
                    TV = Th - Tk;
331
                    T14 = TR - TO;
332
                    {
333
                         E TY, T11, T15, T16;
334
                         TY = FMA(KP173648177, TW, KP984807753 * TX);
335
                         T11 = FNMS(KP939692620, T10, KP342020143 * TZ);
336
                         T12 = TY + T11;
337
                         T13 = KP866025403 * (T11 - TY);
338
                         T15 = FNMS(KP984807753, TW, KP173648177 * TX);
339
                         T16 = FMA(KP342020143, T10, KP939692620 * TZ);
340
                         T17 = T15 - T16;
341
                         T1a = KP866025403 * (T15 + T16);
342
                    }
343
                    ro[WS(os, 2)] = TV + T12;
344
                    io[WS(os, 2)] = T14 + T17;
345
                    T18 = FNMS(KP500000000, T17, T14);
346
                    io[WS(os, 5)] = T13 + T18;
347
                    io[WS(os, 8)] = T18 - T13;
348
                    T19 = FNMS(KP500000000, T12, TV);
349
                    ro[WS(os, 8)] = T19 - T1a;
350
                    ro[WS(os, 5)] = T19 + T1a;
351
               }
352
          }
353
     }
354
}
355

    
356
static const kdft_desc desc = { 9, "n1_9", {60, 20, 20, 0}, &GENUS, 0, 0, 0, 0 };
357

    
358
void X(codelet_n1_9) (planner *p) {
359
     X(kdft_register) (p, n1_9, &desc);
360
}
361

    
362
#endif