To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_12.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (11 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:10 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 12 -name n1_12 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 96 FP additions, 24 FP multiplications,
32
 * (or, 72 additions, 0 multiplications, 24 fused multiply/add),
33
 * 43 stack variables, 2 constants, and 48 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
     {
42
          INT i;
43
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(48, is), MAKE_VOLATILE_STRIDE(48, os)) {
44
               E T5, TR, TA, Ts, TS, Tz, Ta, TU, TD, Tx, TV, TC, Tg, T1d, TG;
45
               E TJ, T1u, T1c, Tl, T1i, TL, TO, T1v, T1h;
46
               {
47
                    E T1, T2, T3, T4;
48
                    T1 = ri[0];
49
                    T2 = ri[WS(is, 4)];
50
                    T3 = ri[WS(is, 8)];
51
                    T4 = T2 + T3;
52
                    T5 = T1 + T4;
53
                    TR = FNMS(KP500000000, T4, T1);
54
                    TA = T3 - T2;
55
               }
56
               {
57
                    E To, Tp, Tq, Tr;
58
                    To = ii[0];
59
                    Tp = ii[WS(is, 4)];
60
                    Tq = ii[WS(is, 8)];
61
                    Tr = Tp + Tq;
62
                    Ts = To + Tr;
63
                    TS = Tp - Tq;
64
                    Tz = FNMS(KP500000000, Tr, To);
65
               }
66
               {
67
                    E T6, T7, T8, T9;
68
                    T6 = ri[WS(is, 6)];
69
                    T7 = ri[WS(is, 10)];
70
                    T8 = ri[WS(is, 2)];
71
                    T9 = T7 + T8;
72
                    Ta = T6 + T9;
73
                    TU = FNMS(KP500000000, T9, T6);
74
                    TD = T8 - T7;
75
               }
76
               {
77
                    E Tt, Tu, Tv, Tw;
78
                    Tt = ii[WS(is, 6)];
79
                    Tu = ii[WS(is, 10)];
80
                    Tv = ii[WS(is, 2)];
81
                    Tw = Tu + Tv;
82
                    Tx = Tt + Tw;
83
                    TV = Tu - Tv;
84
                    TC = FNMS(KP500000000, Tw, Tt);
85
               }
86
               {
87
                    E Tc, Td, Te, Tf;
88
                    Tc = ri[WS(is, 3)];
89
                    Td = ri[WS(is, 7)];
90
                    Te = ri[WS(is, 11)];
91
                    Tf = Td + Te;
92
                    Tg = Tc + Tf;
93
                    T1d = Te - Td;
94
                    TG = FNMS(KP500000000, Tf, Tc);
95
               }
96
               {
97
                    E T1a, TH, TI, T1b;
98
                    T1a = ii[WS(is, 3)];
99
                    TH = ii[WS(is, 7)];
100
                    TI = ii[WS(is, 11)];
101
                    T1b = TH + TI;
102
                    TJ = TH - TI;
103
                    T1u = T1a + T1b;
104
                    T1c = FNMS(KP500000000, T1b, T1a);
105
               }
106
               {
107
                    E Th, Ti, Tj, Tk;
108
                    Th = ri[WS(is, 9)];
109
                    Ti = ri[WS(is, 1)];
110
                    Tj = ri[WS(is, 5)];
111
                    Tk = Ti + Tj;
112
                    Tl = Th + Tk;
113
                    T1i = Tj - Ti;
114
                    TL = FNMS(KP500000000, Tk, Th);
115
               }
116
               {
117
                    E T1f, TM, TN, T1g;
118
                    T1f = ii[WS(is, 9)];
119
                    TM = ii[WS(is, 1)];
120
                    TN = ii[WS(is, 5)];
121
                    T1g = TM + TN;
122
                    TO = TM - TN;
123
                    T1v = T1f + T1g;
124
                    T1h = FNMS(KP500000000, T1g, T1f);
125
               }
126
               {
127
                    E Tb, Tm, T1t, T1w;
128
                    Tb = T5 + Ta;
129
                    Tm = Tg + Tl;
130
                    ro[WS(os, 6)] = Tb - Tm;
131
                    ro[0] = Tb + Tm;
132
                    {
133
                         E T1x, T1y, Tn, Ty;
134
                         T1x = Ts + Tx;
135
                         T1y = T1u + T1v;
136
                         io[WS(os, 6)] = T1x - T1y;
137
                         io[0] = T1x + T1y;
138
                         Tn = Tg - Tl;
139
                         Ty = Ts - Tx;
140
                         io[WS(os, 3)] = Tn + Ty;
141
                         io[WS(os, 9)] = Ty - Tn;
142
                    }
143
                    T1t = T5 - Ta;
144
                    T1w = T1u - T1v;
145
                    ro[WS(os, 3)] = T1t - T1w;
146
                    ro[WS(os, 9)] = T1t + T1w;
147
                    {
148
                         E T11, T1l, T1k, T1m, T14, T18, T17, T19;
149
                         {
150
                              E TZ, T10, T1e, T1j;
151
                              TZ = FMA(KP866025403, TA, Tz);
152
                              T10 = FMA(KP866025403, TD, TC);
153
                              T11 = TZ - T10;
154
                              T1l = TZ + T10;
155
                              T1e = FMA(KP866025403, T1d, T1c);
156
                              T1j = FMA(KP866025403, T1i, T1h);
157
                              T1k = T1e - T1j;
158
                              T1m = T1e + T1j;
159
                         }
160
                         {
161
                              E T12, T13, T15, T16;
162
                              T12 = FMA(KP866025403, TJ, TG);
163
                              T13 = FMA(KP866025403, TO, TL);
164
                              T14 = T12 - T13;
165
                              T18 = T12 + T13;
166
                              T15 = FMA(KP866025403, TS, TR);
167
                              T16 = FMA(KP866025403, TV, TU);
168
                              T17 = T15 + T16;
169
                              T19 = T15 - T16;
170
                         }
171
                         io[WS(os, 1)] = T11 - T14;
172
                         ro[WS(os, 1)] = T19 + T1k;
173
                         io[WS(os, 7)] = T11 + T14;
174
                         ro[WS(os, 7)] = T19 - T1k;
175
                         ro[WS(os, 10)] = T17 - T18;
176
                         io[WS(os, 10)] = T1l - T1m;
177
                         ro[WS(os, 4)] = T17 + T18;
178
                         io[WS(os, 4)] = T1l + T1m;
179
                    }
180
                    {
181
                         E TF, T1r, T1q, T1s, TQ, TY, TX, T1n;
182
                         {
183
                              E TB, TE, T1o, T1p;
184
                              TB = FNMS(KP866025403, TA, Tz);
185
                              TE = FNMS(KP866025403, TD, TC);
186
                              TF = TB - TE;
187
                              T1r = TB + TE;
188
                              T1o = FNMS(KP866025403, T1d, T1c);
189
                              T1p = FNMS(KP866025403, T1i, T1h);
190
                              T1q = T1o - T1p;
191
                              T1s = T1o + T1p;
192
                         }
193
                         {
194
                              E TK, TP, TT, TW;
195
                              TK = FNMS(KP866025403, TJ, TG);
196
                              TP = FNMS(KP866025403, TO, TL);
197
                              TQ = TK - TP;
198
                              TY = TK + TP;
199
                              TT = FNMS(KP866025403, TS, TR);
200
                              TW = FNMS(KP866025403, TV, TU);
201
                              TX = TT + TW;
202
                              T1n = TT - TW;
203
                         }
204
                         io[WS(os, 5)] = TF - TQ;
205
                         ro[WS(os, 5)] = T1n + T1q;
206
                         io[WS(os, 11)] = TF + TQ;
207
                         ro[WS(os, 11)] = T1n - T1q;
208
                         ro[WS(os, 2)] = TX - TY;
209
                         io[WS(os, 2)] = T1r - T1s;
210
                         ro[WS(os, 8)] = TX + TY;
211
                         io[WS(os, 8)] = T1r + T1s;
212
                    }
213
               }
214
          }
215
     }
216
}
217

    
218
static const kdft_desc desc = { 12, "n1_12", {72, 0, 24, 0}, &GENUS, 0, 0, 0, 0 };
219

    
220
void X(codelet_n1_12) (planner *p) {
221
     X(kdft_register) (p, n1_12, &desc);
222
}
223

    
224
#else
225

    
226
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 12 -name n1_12 -include dft/scalar/n.h */
227

    
228
/*
229
 * This function contains 96 FP additions, 16 FP multiplications,
230
 * (or, 88 additions, 8 multiplications, 8 fused multiply/add),
231
 * 43 stack variables, 2 constants, and 48 memory accesses
232
 */
233
#include "dft/scalar/n.h"
234

    
235
static void n1_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
236
{
237
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
238
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
239
     {
240
          INT i;
241
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(48, is), MAKE_VOLATILE_STRIDE(48, os)) {
242
               E T5, TR, TA, Ts, TS, Tz, Ta, TU, TD, Tx, TV, TC, Tg, T1a, TG;
243
               E TJ, T1u, T1d, Tl, T1f, TL, TO, T1v, T1i;
244
               {
245
                    E T1, T2, T3, T4;
246
                    T1 = ri[0];
247
                    T2 = ri[WS(is, 4)];
248
                    T3 = ri[WS(is, 8)];
249
                    T4 = T2 + T3;
250
                    T5 = T1 + T4;
251
                    TR = FNMS(KP500000000, T4, T1);
252
                    TA = KP866025403 * (T3 - T2);
253
               }
254
               {
255
                    E To, Tp, Tq, Tr;
256
                    To = ii[0];
257
                    Tp = ii[WS(is, 4)];
258
                    Tq = ii[WS(is, 8)];
259
                    Tr = Tp + Tq;
260
                    Ts = To + Tr;
261
                    TS = KP866025403 * (Tp - Tq);
262
                    Tz = FNMS(KP500000000, Tr, To);
263
               }
264
               {
265
                    E T6, T7, T8, T9;
266
                    T6 = ri[WS(is, 6)];
267
                    T7 = ri[WS(is, 10)];
268
                    T8 = ri[WS(is, 2)];
269
                    T9 = T7 + T8;
270
                    Ta = T6 + T9;
271
                    TU = FNMS(KP500000000, T9, T6);
272
                    TD = KP866025403 * (T8 - T7);
273
               }
274
               {
275
                    E Tt, Tu, Tv, Tw;
276
                    Tt = ii[WS(is, 6)];
277
                    Tu = ii[WS(is, 10)];
278
                    Tv = ii[WS(is, 2)];
279
                    Tw = Tu + Tv;
280
                    Tx = Tt + Tw;
281
                    TV = KP866025403 * (Tu - Tv);
282
                    TC = FNMS(KP500000000, Tw, Tt);
283
               }
284
               {
285
                    E Tc, Td, Te, Tf;
286
                    Tc = ri[WS(is, 3)];
287
                    Td = ri[WS(is, 7)];
288
                    Te = ri[WS(is, 11)];
289
                    Tf = Td + Te;
290
                    Tg = Tc + Tf;
291
                    T1a = KP866025403 * (Te - Td);
292
                    TG = FNMS(KP500000000, Tf, Tc);
293
               }
294
               {
295
                    E T1b, TH, TI, T1c;
296
                    T1b = ii[WS(is, 3)];
297
                    TH = ii[WS(is, 7)];
298
                    TI = ii[WS(is, 11)];
299
                    T1c = TH + TI;
300
                    TJ = KP866025403 * (TH - TI);
301
                    T1u = T1b + T1c;
302
                    T1d = FNMS(KP500000000, T1c, T1b);
303
               }
304
               {
305
                    E Th, Ti, Tj, Tk;
306
                    Th = ri[WS(is, 9)];
307
                    Ti = ri[WS(is, 1)];
308
                    Tj = ri[WS(is, 5)];
309
                    Tk = Ti + Tj;
310
                    Tl = Th + Tk;
311
                    T1f = KP866025403 * (Tj - Ti);
312
                    TL = FNMS(KP500000000, Tk, Th);
313
               }
314
               {
315
                    E T1g, TM, TN, T1h;
316
                    T1g = ii[WS(is, 9)];
317
                    TM = ii[WS(is, 1)];
318
                    TN = ii[WS(is, 5)];
319
                    T1h = TM + TN;
320
                    TO = KP866025403 * (TM - TN);
321
                    T1v = T1g + T1h;
322
                    T1i = FNMS(KP500000000, T1h, T1g);
323
               }
324
               {
325
                    E Tb, Tm, T1t, T1w;
326
                    Tb = T5 + Ta;
327
                    Tm = Tg + Tl;
328
                    ro[WS(os, 6)] = Tb - Tm;
329
                    ro[0] = Tb + Tm;
330
                    {
331
                         E T1x, T1y, Tn, Ty;
332
                         T1x = Ts + Tx;
333
                         T1y = T1u + T1v;
334
                         io[WS(os, 6)] = T1x - T1y;
335
                         io[0] = T1x + T1y;
336
                         Tn = Tg - Tl;
337
                         Ty = Ts - Tx;
338
                         io[WS(os, 3)] = Tn + Ty;
339
                         io[WS(os, 9)] = Ty - Tn;
340
                    }
341
                    T1t = T5 - Ta;
342
                    T1w = T1u - T1v;
343
                    ro[WS(os, 3)] = T1t - T1w;
344
                    ro[WS(os, 9)] = T1t + T1w;
345
                    {
346
                         E T11, T1l, T1k, T1m, T14, T18, T17, T19;
347
                         {
348
                              E TZ, T10, T1e, T1j;
349
                              TZ = TA + Tz;
350
                              T10 = TD + TC;
351
                              T11 = TZ - T10;
352
                              T1l = TZ + T10;
353
                              T1e = T1a + T1d;
354
                              T1j = T1f + T1i;
355
                              T1k = T1e - T1j;
356
                              T1m = T1e + T1j;
357
                         }
358
                         {
359
                              E T12, T13, T15, T16;
360
                              T12 = TG + TJ;
361
                              T13 = TL + TO;
362
                              T14 = T12 - T13;
363
                              T18 = T12 + T13;
364
                              T15 = TR + TS;
365
                              T16 = TU + TV;
366
                              T17 = T15 + T16;
367
                              T19 = T15 - T16;
368
                         }
369
                         io[WS(os, 1)] = T11 - T14;
370
                         ro[WS(os, 1)] = T19 + T1k;
371
                         io[WS(os, 7)] = T11 + T14;
372
                         ro[WS(os, 7)] = T19 - T1k;
373
                         ro[WS(os, 10)] = T17 - T18;
374
                         io[WS(os, 10)] = T1l - T1m;
375
                         ro[WS(os, 4)] = T17 + T18;
376
                         io[WS(os, 4)] = T1l + T1m;
377
                    }
378
                    {
379
                         E TF, T1r, T1q, T1s, TQ, TY, TX, T1n;
380
                         {
381
                              E TB, TE, T1o, T1p;
382
                              TB = Tz - TA;
383
                              TE = TC - TD;
384
                              TF = TB - TE;
385
                              T1r = TB + TE;
386
                              T1o = T1d - T1a;
387
                              T1p = T1i - T1f;
388
                              T1q = T1o - T1p;
389
                              T1s = T1o + T1p;
390
                         }
391
                         {
392
                              E TK, TP, TT, TW;
393
                              TK = TG - TJ;
394
                              TP = TL - TO;
395
                              TQ = TK - TP;
396
                              TY = TK + TP;
397
                              TT = TR - TS;
398
                              TW = TU - TV;
399
                              TX = TT + TW;
400
                              T1n = TT - TW;
401
                         }
402
                         io[WS(os, 5)] = TF - TQ;
403
                         ro[WS(os, 5)] = T1n + T1q;
404
                         io[WS(os, 11)] = TF + TQ;
405
                         ro[WS(os, 11)] = T1n - T1q;
406
                         ro[WS(os, 2)] = TX - TY;
407
                         io[WS(os, 2)] = T1r - T1s;
408
                         ro[WS(os, 8)] = TX + TY;
409
                         io[WS(os, 8)] = T1r + T1s;
410
                    }
411
               }
412
          }
413
     }
414
}
415

    
416
static const kdft_desc desc = { 12, "n1_12", {88, 8, 8, 0}, &GENUS, 0, 0, 0, 0 };
417

    
418
void X(codelet_n1_12) (planner *p) {
419
     X(kdft_register) (p, n1_12, &desc);
420
}
421

    
422
#endif