To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / q1_3.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (9.07 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:30 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twidsq.native -fma -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 3 -name q1_3 -include dft/scalar/q.h */
29

    
30
/*
31
 * This function contains 48 FP additions, 42 FP multiplications,
32
 * (or, 18 additions, 12 multiplications, 30 fused multiply/add),
33
 * 35 stack variables, 2 constants, and 36 memory accesses
34
 */
35
#include "dft/scalar/q.h"
36

    
37
static void q1_3(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
38
{
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
     {
42
          INT m;
43
          for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
44
               E T1, T4, T6, Tg, Td, Te, T9, Tf, Tp, Ts, Tu, TE, TB, TC, Tx;
45
               E TD, TZ, T10, TV, T11, TN, TQ, TS, T12;
46
               {
47
                    E T2, T3, Tv, Tw;
48
                    T1 = rio[0];
49
                    T2 = rio[WS(rs, 1)];
50
                    T3 = rio[WS(rs, 2)];
51
                    T4 = T2 + T3;
52
                    T6 = FNMS(KP500000000, T4, T1);
53
                    Tg = T3 - T2;
54
                    {
55
                         E T7, T8, Tq, Tr;
56
                         Td = iio[0];
57
                         T7 = iio[WS(rs, 1)];
58
                         T8 = iio[WS(rs, 2)];
59
                         Te = T7 + T8;
60
                         T9 = T7 - T8;
61
                         Tf = FNMS(KP500000000, Te, Td);
62
                         Tp = rio[WS(vs, 1)];
63
                         Tq = rio[WS(vs, 1) + WS(rs, 1)];
64
                         Tr = rio[WS(vs, 1) + WS(rs, 2)];
65
                         Ts = Tq + Tr;
66
                         Tu = FNMS(KP500000000, Ts, Tp);
67
                         TE = Tr - Tq;
68
                    }
69
                    TB = iio[WS(vs, 1)];
70
                    Tv = iio[WS(vs, 1) + WS(rs, 1)];
71
                    Tw = iio[WS(vs, 1) + WS(rs, 2)];
72
                    TC = Tv + Tw;
73
                    Tx = Tv - Tw;
74
                    TD = FNMS(KP500000000, TC, TB);
75
                    {
76
                         E TT, TU, TO, TP;
77
                         TZ = iio[WS(vs, 2)];
78
                         TT = iio[WS(vs, 2) + WS(rs, 1)];
79
                         TU = iio[WS(vs, 2) + WS(rs, 2)];
80
                         T10 = TT + TU;
81
                         TV = TT - TU;
82
                         T11 = FNMS(KP500000000, T10, TZ);
83
                         TN = rio[WS(vs, 2)];
84
                         TO = rio[WS(vs, 2) + WS(rs, 1)];
85
                         TP = rio[WS(vs, 2) + WS(rs, 2)];
86
                         TQ = TO + TP;
87
                         TS = FNMS(KP500000000, TQ, TN);
88
                         T12 = TP - TO;
89
                    }
90
               }
91
               rio[0] = T1 + T4;
92
               iio[0] = Td + Te;
93
               rio[WS(rs, 1)] = Tp + Ts;
94
               iio[WS(rs, 1)] = TB + TC;
95
               iio[WS(rs, 2)] = TZ + T10;
96
               rio[WS(rs, 2)] = TN + TQ;
97
               {
98
                    E Ta, Th, Tb, Ti, T5, Tc;
99
                    Ta = FMA(KP866025403, T9, T6);
100
                    Th = FMA(KP866025403, Tg, Tf);
101
                    T5 = W[0];
102
                    Tb = T5 * Ta;
103
                    Ti = T5 * Th;
104
                    Tc = W[1];
105
                    rio[WS(vs, 1)] = FMA(Tc, Th, Tb);
106
                    iio[WS(vs, 1)] = FNMS(Tc, Ta, Ti);
107
               }
108
               {
109
                    E T16, T19, T17, T1a, T15, T18;
110
                    T16 = FNMS(KP866025403, TV, TS);
111
                    T19 = FNMS(KP866025403, T12, T11);
112
                    T15 = W[2];
113
                    T17 = T15 * T16;
114
                    T1a = T15 * T19;
115
                    T18 = W[3];
116
                    rio[WS(vs, 2) + WS(rs, 2)] = FMA(T18, T19, T17);
117
                    iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T18, T16, T1a);
118
               }
119
               {
120
                    E TI, TL, TJ, TM, TH, TK;
121
                    TI = FNMS(KP866025403, Tx, Tu);
122
                    TL = FNMS(KP866025403, TE, TD);
123
                    TH = W[2];
124
                    TJ = TH * TI;
125
                    TM = TH * TL;
126
                    TK = W[3];
127
                    rio[WS(vs, 2) + WS(rs, 1)] = FMA(TK, TL, TJ);
128
                    iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TK, TI, TM);
129
               }
130
               {
131
                    E Ty, TF, Tz, TG, Tt, TA;
132
                    Ty = FMA(KP866025403, Tx, Tu);
133
                    TF = FMA(KP866025403, TE, TD);
134
                    Tt = W[0];
135
                    Tz = Tt * Ty;
136
                    TG = Tt * TF;
137
                    TA = W[1];
138
                    rio[WS(vs, 1) + WS(rs, 1)] = FMA(TA, TF, Tz);
139
                    iio[WS(vs, 1) + WS(rs, 1)] = FNMS(TA, Ty, TG);
140
               }
141
               {
142
                    E TW, T13, TX, T14, TR, TY;
143
                    TW = FMA(KP866025403, TV, TS);
144
                    T13 = FMA(KP866025403, T12, T11);
145
                    TR = W[0];
146
                    TX = TR * TW;
147
                    T14 = TR * T13;
148
                    TY = W[1];
149
                    rio[WS(vs, 1) + WS(rs, 2)] = FMA(TY, T13, TX);
150
                    iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TY, TW, T14);
151
               }
152
               {
153
                    E Tk, Tn, Tl, To, Tj, Tm;
154
                    Tk = FNMS(KP866025403, T9, T6);
155
                    Tn = FNMS(KP866025403, Tg, Tf);
156
                    Tj = W[2];
157
                    Tl = Tj * Tk;
158
                    To = Tj * Tn;
159
                    Tm = W[3];
160
                    rio[WS(vs, 2)] = FMA(Tm, Tn, Tl);
161
                    iio[WS(vs, 2)] = FNMS(Tm, Tk, To);
162
               }
163
          }
164
     }
165
}
166

    
167
static const tw_instr twinstr[] = {
168
     {TW_FULL, 0, 3},
169
     {TW_NEXT, 1, 0}
170
};
171

    
172
static const ct_desc desc = { 3, "q1_3", twinstr, &GENUS, {18, 12, 30, 0}, 0, 0, 0 };
173

    
174
void X(codelet_q1_3) (planner *p) {
175
     X(kdft_difsq_register) (p, q1_3, &desc);
176
}
177
#else
178

    
179
/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 3 -name q1_3 -include dft/scalar/q.h */
180

    
181
/*
182
 * This function contains 48 FP additions, 36 FP multiplications,
183
 * (or, 30 additions, 18 multiplications, 18 fused multiply/add),
184
 * 35 stack variables, 2 constants, and 36 memory accesses
185
 */
186
#include "dft/scalar/q.h"
187

    
188
static void q1_3(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
189
{
190
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
191
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
192
     {
193
          INT m;
194
          for (m = mb, W = W + (mb * 4); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
195
               E T1, T4, T6, Tc, Td, Te, T9, Tf, Tl, To, Tq, Tw, Tx, Ty, Tt;
196
               E Tz, TR, TS, TN, TT, TF, TI, TK, TQ;
197
               {
198
                    E T2, T3, Tr, Ts;
199
                    T1 = rio[0];
200
                    T2 = rio[WS(rs, 1)];
201
                    T3 = rio[WS(rs, 2)];
202
                    T4 = T2 + T3;
203
                    T6 = FNMS(KP500000000, T4, T1);
204
                    Tc = KP866025403 * (T3 - T2);
205
                    {
206
                         E T7, T8, Tm, Tn;
207
                         Td = iio[0];
208
                         T7 = iio[WS(rs, 1)];
209
                         T8 = iio[WS(rs, 2)];
210
                         Te = T7 + T8;
211
                         T9 = KP866025403 * (T7 - T8);
212
                         Tf = FNMS(KP500000000, Te, Td);
213
                         Tl = rio[WS(vs, 1)];
214
                         Tm = rio[WS(vs, 1) + WS(rs, 1)];
215
                         Tn = rio[WS(vs, 1) + WS(rs, 2)];
216
                         To = Tm + Tn;
217
                         Tq = FNMS(KP500000000, To, Tl);
218
                         Tw = KP866025403 * (Tn - Tm);
219
                    }
220
                    Tx = iio[WS(vs, 1)];
221
                    Tr = iio[WS(vs, 1) + WS(rs, 1)];
222
                    Ts = iio[WS(vs, 1) + WS(rs, 2)];
223
                    Ty = Tr + Ts;
224
                    Tt = KP866025403 * (Tr - Ts);
225
                    Tz = FNMS(KP500000000, Ty, Tx);
226
                    {
227
                         E TL, TM, TG, TH;
228
                         TR = iio[WS(vs, 2)];
229
                         TL = iio[WS(vs, 2) + WS(rs, 1)];
230
                         TM = iio[WS(vs, 2) + WS(rs, 2)];
231
                         TS = TL + TM;
232
                         TN = KP866025403 * (TL - TM);
233
                         TT = FNMS(KP500000000, TS, TR);
234
                         TF = rio[WS(vs, 2)];
235
                         TG = rio[WS(vs, 2) + WS(rs, 1)];
236
                         TH = rio[WS(vs, 2) + WS(rs, 2)];
237
                         TI = TG + TH;
238
                         TK = FNMS(KP500000000, TI, TF);
239
                         TQ = KP866025403 * (TH - TG);
240
                    }
241
               }
242
               rio[0] = T1 + T4;
243
               iio[0] = Td + Te;
244
               rio[WS(rs, 1)] = Tl + To;
245
               iio[WS(rs, 1)] = Tx + Ty;
246
               iio[WS(rs, 2)] = TR + TS;
247
               rio[WS(rs, 2)] = TF + TI;
248
               {
249
                    E Ta, Tg, T5, Tb;
250
                    Ta = T6 + T9;
251
                    Tg = Tc + Tf;
252
                    T5 = W[0];
253
                    Tb = W[1];
254
                    rio[WS(vs, 1)] = FMA(T5, Ta, Tb * Tg);
255
                    iio[WS(vs, 1)] = FNMS(Tb, Ta, T5 * Tg);
256
               }
257
               {
258
                    E TW, TY, TV, TX;
259
                    TW = TK - TN;
260
                    TY = TT - TQ;
261
                    TV = W[2];
262
                    TX = W[3];
263
                    rio[WS(vs, 2) + WS(rs, 2)] = FMA(TV, TW, TX * TY);
264
                    iio[WS(vs, 2) + WS(rs, 2)] = FNMS(TX, TW, TV * TY);
265
               }
266
               {
267
                    E TC, TE, TB, TD;
268
                    TC = Tq - Tt;
269
                    TE = Tz - Tw;
270
                    TB = W[2];
271
                    TD = W[3];
272
                    rio[WS(vs, 2) + WS(rs, 1)] = FMA(TB, TC, TD * TE);
273
                    iio[WS(vs, 2) + WS(rs, 1)] = FNMS(TD, TC, TB * TE);
274
               }
275
               {
276
                    E Tu, TA, Tp, Tv;
277
                    Tu = Tq + Tt;
278
                    TA = Tw + Tz;
279
                    Tp = W[0];
280
                    Tv = W[1];
281
                    rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tp, Tu, Tv * TA);
282
                    iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tv, Tu, Tp * TA);
283
               }
284
               {
285
                    E TO, TU, TJ, TP;
286
                    TO = TK + TN;
287
                    TU = TQ + TT;
288
                    TJ = W[0];
289
                    TP = W[1];
290
                    rio[WS(vs, 1) + WS(rs, 2)] = FMA(TJ, TO, TP * TU);
291
                    iio[WS(vs, 1) + WS(rs, 2)] = FNMS(TP, TO, TJ * TU);
292
               }
293
               {
294
                    E Ti, Tk, Th, Tj;
295
                    Ti = T6 - T9;
296
                    Tk = Tf - Tc;
297
                    Th = W[2];
298
                    Tj = W[3];
299
                    rio[WS(vs, 2)] = FMA(Th, Ti, Tj * Tk);
300
                    iio[WS(vs, 2)] = FNMS(Tj, Ti, Th * Tk);
301
               }
302
          }
303
     }
304
}
305

    
306
static const tw_instr twinstr[] = {
307
     {TW_FULL, 0, 3},
308
     {TW_NEXT, 1, 0}
309
};
310

    
311
static const ct_desc desc = { 3, "q1_3", twinstr, &GENUS, {30, 18, 18, 0}, 0, 0, 0 };
312

    
313
void X(codelet_q1_3) (planner *p) {
314
     X(kdft_difsq_register) (p, q1_3, &desc);
315
}
316
#endif