To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_10.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (10.2 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:10 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 10 -name n1_10 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 84 FP additions, 36 FP multiplications,
32
 * (or, 48 additions, 0 multiplications, 36 fused multiply/add),
33
 * 41 stack variables, 4 constants, and 40 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
42
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
43
     {
44
          INT i;
45
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) {
46
               E T3, Tj, TN, T1b, TU, TV, T1j, T1i, Tm, Tp, Tq, Ta, Th, Ti, TA;
47
               E TH, T17, T14, T1c, T1d, T1e, TO, TP, TQ;
48
               {
49
                    E T1, T2, TL, TM;
50
                    T1 = ri[0];
51
                    T2 = ri[WS(is, 5)];
52
                    T3 = T1 - T2;
53
                    Tj = T1 + T2;
54
                    TL = ii[0];
55
                    TM = ii[WS(is, 5)];
56
                    TN = TL - TM;
57
                    T1b = TL + TM;
58
               }
59
               {
60
                    E T6, Tk, Tg, To, T9, Tl, Td, Tn;
61
                    {
62
                         E T4, T5, Te, Tf;
63
                         T4 = ri[WS(is, 2)];
64
                         T5 = ri[WS(is, 7)];
65
                         T6 = T4 - T5;
66
                         Tk = T4 + T5;
67
                         Te = ri[WS(is, 6)];
68
                         Tf = ri[WS(is, 1)];
69
                         Tg = Te - Tf;
70
                         To = Te + Tf;
71
                    }
72
                    {
73
                         E T7, T8, Tb, Tc;
74
                         T7 = ri[WS(is, 8)];
75
                         T8 = ri[WS(is, 3)];
76
                         T9 = T7 - T8;
77
                         Tl = T7 + T8;
78
                         Tb = ri[WS(is, 4)];
79
                         Tc = ri[WS(is, 9)];
80
                         Td = Tb - Tc;
81
                         Tn = Tb + Tc;
82
                    }
83
                    TU = T6 - T9;
84
                    TV = Td - Tg;
85
                    T1j = Tk - Tl;
86
                    T1i = Tn - To;
87
                    Tm = Tk + Tl;
88
                    Tp = Tn + To;
89
                    Tq = Tm + Tp;
90
                    Ta = T6 + T9;
91
                    Th = Td + Tg;
92
                    Ti = Ta + Th;
93
               }
94
               {
95
                    E Tw, T15, TG, T13, Tz, T16, TD, T12;
96
                    {
97
                         E Tu, Tv, TE, TF;
98
                         Tu = ii[WS(is, 2)];
99
                         Tv = ii[WS(is, 7)];
100
                         Tw = Tu - Tv;
101
                         T15 = Tu + Tv;
102
                         TE = ii[WS(is, 6)];
103
                         TF = ii[WS(is, 1)];
104
                         TG = TE - TF;
105
                         T13 = TE + TF;
106
                    }
107
                    {
108
                         E Tx, Ty, TB, TC;
109
                         Tx = ii[WS(is, 8)];
110
                         Ty = ii[WS(is, 3)];
111
                         Tz = Tx - Ty;
112
                         T16 = Tx + Ty;
113
                         TB = ii[WS(is, 4)];
114
                         TC = ii[WS(is, 9)];
115
                         TD = TB - TC;
116
                         T12 = TB + TC;
117
                    }
118
                    TA = Tw - Tz;
119
                    TH = TD - TG;
120
                    T17 = T15 - T16;
121
                    T14 = T12 - T13;
122
                    T1c = T15 + T16;
123
                    T1d = T12 + T13;
124
                    T1e = T1c + T1d;
125
                    TO = Tw + Tz;
126
                    TP = TD + TG;
127
                    TQ = TO + TP;
128
               }
129
               ro[WS(os, 5)] = T3 + Ti;
130
               io[WS(os, 5)] = TN + TQ;
131
               ro[0] = Tj + Tq;
132
               io[0] = T1b + T1e;
133
               {
134
                    E TI, TK, Tt, TJ, Tr, Ts;
135
                    TI = FMA(KP618033988, TH, TA);
136
                    TK = FNMS(KP618033988, TA, TH);
137
                    Tr = FNMS(KP250000000, Ti, T3);
138
                    Ts = Ta - Th;
139
                    Tt = FMA(KP559016994, Ts, Tr);
140
                    TJ = FNMS(KP559016994, Ts, Tr);
141
                    ro[WS(os, 9)] = FNMS(KP951056516, TI, Tt);
142
                    ro[WS(os, 3)] = FMA(KP951056516, TK, TJ);
143
                    ro[WS(os, 1)] = FMA(KP951056516, TI, Tt);
144
                    ro[WS(os, 7)] = FNMS(KP951056516, TK, TJ);
145
               }
146
               {
147
                    E TW, TY, TT, TX, TR, TS;
148
                    TW = FMA(KP618033988, TV, TU);
149
                    TY = FNMS(KP618033988, TU, TV);
150
                    TR = FNMS(KP250000000, TQ, TN);
151
                    TS = TO - TP;
152
                    TT = FMA(KP559016994, TS, TR);
153
                    TX = FNMS(KP559016994, TS, TR);
154
                    io[WS(os, 1)] = FNMS(KP951056516, TW, TT);
155
                    io[WS(os, 7)] = FMA(KP951056516, TY, TX);
156
                    io[WS(os, 9)] = FMA(KP951056516, TW, TT);
157
                    io[WS(os, 3)] = FNMS(KP951056516, TY, TX);
158
               }
159
               {
160
                    E T18, T1a, T11, T19, TZ, T10;
161
                    T18 = FNMS(KP618033988, T17, T14);
162
                    T1a = FMA(KP618033988, T14, T17);
163
                    TZ = FNMS(KP250000000, Tq, Tj);
164
                    T10 = Tm - Tp;
165
                    T11 = FNMS(KP559016994, T10, TZ);
166
                    T19 = FMA(KP559016994, T10, TZ);
167
                    ro[WS(os, 2)] = FNMS(KP951056516, T18, T11);
168
                    ro[WS(os, 6)] = FMA(KP951056516, T1a, T19);
169
                    ro[WS(os, 8)] = FMA(KP951056516, T18, T11);
170
                    ro[WS(os, 4)] = FNMS(KP951056516, T1a, T19);
171
               }
172
               {
173
                    E T1k, T1m, T1h, T1l, T1f, T1g;
174
                    T1k = FNMS(KP618033988, T1j, T1i);
175
                    T1m = FMA(KP618033988, T1i, T1j);
176
                    T1f = FNMS(KP250000000, T1e, T1b);
177
                    T1g = T1c - T1d;
178
                    T1h = FNMS(KP559016994, T1g, T1f);
179
                    T1l = FMA(KP559016994, T1g, T1f);
180
                    io[WS(os, 2)] = FMA(KP951056516, T1k, T1h);
181
                    io[WS(os, 6)] = FNMS(KP951056516, T1m, T1l);
182
                    io[WS(os, 8)] = FNMS(KP951056516, T1k, T1h);
183
                    io[WS(os, 4)] = FMA(KP951056516, T1m, T1l);
184
               }
185
          }
186
     }
187
}
188

    
189
static const kdft_desc desc = { 10, "n1_10", {48, 0, 36, 0}, &GENUS, 0, 0, 0, 0 };
190

    
191
void X(codelet_n1_10) (planner *p) {
192
     X(kdft_register) (p, n1_10, &desc);
193
}
194

    
195
#else
196

    
197
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 10 -name n1_10 -include dft/scalar/n.h */
198

    
199
/*
200
 * This function contains 84 FP additions, 24 FP multiplications,
201
 * (or, 72 additions, 12 multiplications, 12 fused multiply/add),
202
 * 41 stack variables, 4 constants, and 40 memory accesses
203
 */
204
#include "dft/scalar/n.h"
205

    
206
static void n1_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
207
{
208
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
209
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
210
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
211
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
212
     {
213
          INT i;
214
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) {
215
               E T3, Tj, TQ, T1e, TU, TV, T1c, T1b, Tm, Tp, Tq, Ta, Th, Ti, TA;
216
               E TH, T17, T14, T1f, T1g, T1h, TL, TM, TR;
217
               {
218
                    E T1, T2, TO, TP;
219
                    T1 = ri[0];
220
                    T2 = ri[WS(is, 5)];
221
                    T3 = T1 - T2;
222
                    Tj = T1 + T2;
223
                    TO = ii[0];
224
                    TP = ii[WS(is, 5)];
225
                    TQ = TO - TP;
226
                    T1e = TO + TP;
227
               }
228
               {
229
                    E T6, Tk, Tg, To, T9, Tl, Td, Tn;
230
                    {
231
                         E T4, T5, Te, Tf;
232
                         T4 = ri[WS(is, 2)];
233
                         T5 = ri[WS(is, 7)];
234
                         T6 = T4 - T5;
235
                         Tk = T4 + T5;
236
                         Te = ri[WS(is, 6)];
237
                         Tf = ri[WS(is, 1)];
238
                         Tg = Te - Tf;
239
                         To = Te + Tf;
240
                    }
241
                    {
242
                         E T7, T8, Tb, Tc;
243
                         T7 = ri[WS(is, 8)];
244
                         T8 = ri[WS(is, 3)];
245
                         T9 = T7 - T8;
246
                         Tl = T7 + T8;
247
                         Tb = ri[WS(is, 4)];
248
                         Tc = ri[WS(is, 9)];
249
                         Td = Tb - Tc;
250
                         Tn = Tb + Tc;
251
                    }
252
                    TU = T6 - T9;
253
                    TV = Td - Tg;
254
                    T1c = Tk - Tl;
255
                    T1b = Tn - To;
256
                    Tm = Tk + Tl;
257
                    Tp = Tn + To;
258
                    Tq = Tm + Tp;
259
                    Ta = T6 + T9;
260
                    Th = Td + Tg;
261
                    Ti = Ta + Th;
262
               }
263
               {
264
                    E Tw, T15, TG, T13, Tz, T16, TD, T12;
265
                    {
266
                         E Tu, Tv, TE, TF;
267
                         Tu = ii[WS(is, 2)];
268
                         Tv = ii[WS(is, 7)];
269
                         Tw = Tu - Tv;
270
                         T15 = Tu + Tv;
271
                         TE = ii[WS(is, 6)];
272
                         TF = ii[WS(is, 1)];
273
                         TG = TE - TF;
274
                         T13 = TE + TF;
275
                    }
276
                    {
277
                         E Tx, Ty, TB, TC;
278
                         Tx = ii[WS(is, 8)];
279
                         Ty = ii[WS(is, 3)];
280
                         Tz = Tx - Ty;
281
                         T16 = Tx + Ty;
282
                         TB = ii[WS(is, 4)];
283
                         TC = ii[WS(is, 9)];
284
                         TD = TB - TC;
285
                         T12 = TB + TC;
286
                    }
287
                    TA = Tw - Tz;
288
                    TH = TD - TG;
289
                    T17 = T15 - T16;
290
                    T14 = T12 - T13;
291
                    T1f = T15 + T16;
292
                    T1g = T12 + T13;
293
                    T1h = T1f + T1g;
294
                    TL = Tw + Tz;
295
                    TM = TD + TG;
296
                    TR = TL + TM;
297
               }
298
               ro[WS(os, 5)] = T3 + Ti;
299
               io[WS(os, 5)] = TQ + TR;
300
               ro[0] = Tj + Tq;
301
               io[0] = T1e + T1h;
302
               {
303
                    E TI, TK, Tt, TJ, Tr, Ts;
304
                    TI = FMA(KP951056516, TA, KP587785252 * TH);
305
                    TK = FNMS(KP587785252, TA, KP951056516 * TH);
306
                    Tr = KP559016994 * (Ta - Th);
307
                    Ts = FNMS(KP250000000, Ti, T3);
308
                    Tt = Tr + Ts;
309
                    TJ = Ts - Tr;
310
                    ro[WS(os, 9)] = Tt - TI;
311
                    ro[WS(os, 3)] = TJ + TK;
312
                    ro[WS(os, 1)] = Tt + TI;
313
                    ro[WS(os, 7)] = TJ - TK;
314
               }
315
               {
316
                    E TW, TY, TT, TX, TN, TS;
317
                    TW = FMA(KP951056516, TU, KP587785252 * TV);
318
                    TY = FNMS(KP587785252, TU, KP951056516 * TV);
319
                    TN = KP559016994 * (TL - TM);
320
                    TS = FNMS(KP250000000, TR, TQ);
321
                    TT = TN + TS;
322
                    TX = TS - TN;
323
                    io[WS(os, 1)] = TT - TW;
324
                    io[WS(os, 7)] = TY + TX;
325
                    io[WS(os, 9)] = TW + TT;
326
                    io[WS(os, 3)] = TX - TY;
327
               }
328
               {
329
                    E T18, T1a, T11, T19, TZ, T10;
330
                    T18 = FNMS(KP587785252, T17, KP951056516 * T14);
331
                    T1a = FMA(KP951056516, T17, KP587785252 * T14);
332
                    TZ = FNMS(KP250000000, Tq, Tj);
333
                    T10 = KP559016994 * (Tm - Tp);
334
                    T11 = TZ - T10;
335
                    T19 = T10 + TZ;
336
                    ro[WS(os, 2)] = T11 - T18;
337
                    ro[WS(os, 6)] = T19 + T1a;
338
                    ro[WS(os, 8)] = T11 + T18;
339
                    ro[WS(os, 4)] = T19 - T1a;
340
               }
341
               {
342
                    E T1d, T1l, T1k, T1m, T1i, T1j;
343
                    T1d = FNMS(KP587785252, T1c, KP951056516 * T1b);
344
                    T1l = FMA(KP951056516, T1c, KP587785252 * T1b);
345
                    T1i = FNMS(KP250000000, T1h, T1e);
346
                    T1j = KP559016994 * (T1f - T1g);
347
                    T1k = T1i - T1j;
348
                    T1m = T1j + T1i;
349
                    io[WS(os, 2)] = T1d + T1k;
350
                    io[WS(os, 6)] = T1m - T1l;
351
                    io[WS(os, 8)] = T1k - T1d;
352
                    io[WS(os, 4)] = T1l + T1m;
353
               }
354
          }
355
     }
356
}
357

    
358
static const kdft_desc desc = { 10, "n1_10", {72, 12, 12, 0}, &GENUS, 0, 0, 0, 0 };
359

    
360
void X(codelet_n1_10) (planner *p) {
361
     X(kdft_register) (p, n1_10, &desc);
362
}
363

    
364
#endif