To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_5.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (7.25 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:12 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 5 -name t1_5 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 40 FP additions, 34 FP multiplications,
32
 * (or, 14 additions, 8 multiplications, 26 fused multiply/add),
33
 * 31 stack variables, 4 constants, and 20 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
42
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
43
     {
44
          INT m;
45
          for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) {
46
               E T1, TM, T7, Tx, Td, Tz, Te, TJ, Tk, TC, Tq, TE, Tr, TK;
47
               T1 = ri[0];
48
               TM = ii[0];
49
               {
50
                    E T3, T6, T4, Tw, T9, Tc, Ta, Ty, T2, T8, T5, Tb;
51
                    T3 = ri[WS(rs, 1)];
52
                    T6 = ii[WS(rs, 1)];
53
                    T2 = W[0];
54
                    T4 = T2 * T3;
55
                    Tw = T2 * T6;
56
                    T9 = ri[WS(rs, 4)];
57
                    Tc = ii[WS(rs, 4)];
58
                    T8 = W[6];
59
                    Ta = T8 * T9;
60
                    Ty = T8 * Tc;
61
                    T5 = W[1];
62
                    T7 = FMA(T5, T6, T4);
63
                    Tx = FNMS(T5, T3, Tw);
64
                    Tb = W[7];
65
                    Td = FMA(Tb, Tc, Ta);
66
                    Tz = FNMS(Tb, T9, Ty);
67
                    Te = T7 + Td;
68
                    TJ = Tx + Tz;
69
               }
70
               {
71
                    E Tg, Tj, Th, TB, Tm, Tp, Tn, TD, Tf, Tl, Ti, To;
72
                    Tg = ri[WS(rs, 2)];
73
                    Tj = ii[WS(rs, 2)];
74
                    Tf = W[2];
75
                    Th = Tf * Tg;
76
                    TB = Tf * Tj;
77
                    Tm = ri[WS(rs, 3)];
78
                    Tp = ii[WS(rs, 3)];
79
                    Tl = W[4];
80
                    Tn = Tl * Tm;
81
                    TD = Tl * Tp;
82
                    Ti = W[3];
83
                    Tk = FMA(Ti, Tj, Th);
84
                    TC = FNMS(Ti, Tg, TB);
85
                    To = W[5];
86
                    Tq = FMA(To, Tp, Tn);
87
                    TE = FNMS(To, Tm, TD);
88
                    Tr = Tk + Tq;
89
                    TK = TC + TE;
90
               }
91
               {
92
                    E Tu, Ts, Tt, TG, TI, TA, TF, TH, Tv;
93
                    Tu = Te - Tr;
94
                    Ts = Te + Tr;
95
                    Tt = FNMS(KP250000000, Ts, T1);
96
                    TA = Tx - Tz;
97
                    TF = TC - TE;
98
                    TG = FMA(KP618033988, TF, TA);
99
                    TI = FNMS(KP618033988, TA, TF);
100
                    ri[0] = T1 + Ts;
101
                    TH = FNMS(KP559016994, Tu, Tt);
102
                    ri[WS(rs, 2)] = FNMS(KP951056516, TI, TH);
103
                    ri[WS(rs, 3)] = FMA(KP951056516, TI, TH);
104
                    Tv = FMA(KP559016994, Tu, Tt);
105
                    ri[WS(rs, 4)] = FNMS(KP951056516, TG, Tv);
106
                    ri[WS(rs, 1)] = FMA(KP951056516, TG, Tv);
107
               }
108
               {
109
                    E TO, TL, TN, TS, TU, TQ, TR, TT, TP;
110
                    TO = TJ - TK;
111
                    TL = TJ + TK;
112
                    TN = FNMS(KP250000000, TL, TM);
113
                    TQ = T7 - Td;
114
                    TR = Tk - Tq;
115
                    TS = FMA(KP618033988, TR, TQ);
116
                    TU = FNMS(KP618033988, TQ, TR);
117
                    ii[0] = TL + TM;
118
                    TT = FNMS(KP559016994, TO, TN);
119
                    ii[WS(rs, 2)] = FMA(KP951056516, TU, TT);
120
                    ii[WS(rs, 3)] = FNMS(KP951056516, TU, TT);
121
                    TP = FMA(KP559016994, TO, TN);
122
                    ii[WS(rs, 1)] = FNMS(KP951056516, TS, TP);
123
                    ii[WS(rs, 4)] = FMA(KP951056516, TS, TP);
124
               }
125
          }
126
     }
127
}
128

    
129
static const tw_instr twinstr[] = {
130
     {TW_FULL, 0, 5},
131
     {TW_NEXT, 1, 0}
132
};
133

    
134
static const ct_desc desc = { 5, "t1_5", twinstr, &GENUS, {14, 8, 26, 0}, 0, 0, 0 };
135

    
136
void X(codelet_t1_5) (planner *p) {
137
     X(kdft_dit_register) (p, t1_5, &desc);
138
}
139
#else
140

    
141
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 5 -name t1_5 -include dft/scalar/t.h */
142

    
143
/*
144
 * This function contains 40 FP additions, 28 FP multiplications,
145
 * (or, 26 additions, 14 multiplications, 14 fused multiply/add),
146
 * 29 stack variables, 4 constants, and 20 memory accesses
147
 */
148
#include "dft/scalar/t.h"
149

    
150
static void t1_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
151
{
152
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
153
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
154
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
155
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
156
     {
157
          INT m;
158
          for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(10, rs)) {
159
               E T1, TE, Tu, Tx, TJ, TI, TB, TC, TD, Tc, Tn, To;
160
               T1 = ri[0];
161
               TE = ii[0];
162
               {
163
                    E T6, Ts, Tm, Tw, Tb, Tt, Th, Tv;
164
                    {
165
                         E T3, T5, T2, T4;
166
                         T3 = ri[WS(rs, 1)];
167
                         T5 = ii[WS(rs, 1)];
168
                         T2 = W[0];
169
                         T4 = W[1];
170
                         T6 = FMA(T2, T3, T4 * T5);
171
                         Ts = FNMS(T4, T3, T2 * T5);
172
                    }
173
                    {
174
                         E Tj, Tl, Ti, Tk;
175
                         Tj = ri[WS(rs, 3)];
176
                         Tl = ii[WS(rs, 3)];
177
                         Ti = W[4];
178
                         Tk = W[5];
179
                         Tm = FMA(Ti, Tj, Tk * Tl);
180
                         Tw = FNMS(Tk, Tj, Ti * Tl);
181
                    }
182
                    {
183
                         E T8, Ta, T7, T9;
184
                         T8 = ri[WS(rs, 4)];
185
                         Ta = ii[WS(rs, 4)];
186
                         T7 = W[6];
187
                         T9 = W[7];
188
                         Tb = FMA(T7, T8, T9 * Ta);
189
                         Tt = FNMS(T9, T8, T7 * Ta);
190
                    }
191
                    {
192
                         E Te, Tg, Td, Tf;
193
                         Te = ri[WS(rs, 2)];
194
                         Tg = ii[WS(rs, 2)];
195
                         Td = W[2];
196
                         Tf = W[3];
197
                         Th = FMA(Td, Te, Tf * Tg);
198
                         Tv = FNMS(Tf, Te, Td * Tg);
199
                    }
200
                    Tu = Ts - Tt;
201
                    Tx = Tv - Tw;
202
                    TJ = Th - Tm;
203
                    TI = T6 - Tb;
204
                    TB = Ts + Tt;
205
                    TC = Tv + Tw;
206
                    TD = TB + TC;
207
                    Tc = T6 + Tb;
208
                    Tn = Th + Tm;
209
                    To = Tc + Tn;
210
               }
211
               ri[0] = T1 + To;
212
               ii[0] = TD + TE;
213
               {
214
                    E Ty, TA, Tr, Tz, Tp, Tq;
215
                    Ty = FMA(KP951056516, Tu, KP587785252 * Tx);
216
                    TA = FNMS(KP587785252, Tu, KP951056516 * Tx);
217
                    Tp = KP559016994 * (Tc - Tn);
218
                    Tq = FNMS(KP250000000, To, T1);
219
                    Tr = Tp + Tq;
220
                    Tz = Tq - Tp;
221
                    ri[WS(rs, 4)] = Tr - Ty;
222
                    ri[WS(rs, 3)] = Tz + TA;
223
                    ri[WS(rs, 1)] = Tr + Ty;
224
                    ri[WS(rs, 2)] = Tz - TA;
225
               }
226
               {
227
                    E TK, TL, TH, TM, TF, TG;
228
                    TK = FMA(KP951056516, TI, KP587785252 * TJ);
229
                    TL = FNMS(KP587785252, TI, KP951056516 * TJ);
230
                    TF = KP559016994 * (TB - TC);
231
                    TG = FNMS(KP250000000, TD, TE);
232
                    TH = TF + TG;
233
                    TM = TG - TF;
234
                    ii[WS(rs, 1)] = TH - TK;
235
                    ii[WS(rs, 3)] = TM - TL;
236
                    ii[WS(rs, 4)] = TK + TH;
237
                    ii[WS(rs, 2)] = TL + TM;
238
               }
239
          }
240
     }
241
}
242

    
243
static const tw_instr twinstr[] = {
244
     {TW_FULL, 0, 5},
245
     {TW_NEXT, 1, 0}
246
};
247

    
248
static const ct_desc desc = { 5, "t1_5", twinstr, &GENUS, {26, 14, 14, 0}, 0, 0, 0 };
249

    
250
void X(codelet_t1_5) (planner *p) {
251
     X(kdft_dit_register) (p, t1_5, &desc);
252
}
253
#endif