To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t2_5.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (7.5 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:25 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -name t2_5 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 44 FP additions, 40 FP multiplications,
32
 * (or, 14 additions, 10 multiplications, 30 fused multiply/add),
33
 * 38 stack variables, 4 constants, and 20 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t2_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
42
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
43
     {
44
          INT m;
45
          for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
46
               E T2, Ta, T8, T5, Tb, Tm, Tf, Tj, T9, Te;
47
               T2 = W[0];
48
               Ta = W[3];
49
               T8 = W[2];
50
               T9 = T2 * T8;
51
               Te = T2 * Ta;
52
               T5 = W[1];
53
               Tb = FNMS(T5, Ta, T9);
54
               Tm = FNMS(T5, T8, Te);
55
               Tf = FMA(T5, T8, Te);
56
               Tj = FMA(T5, Ta, T9);
57
               {
58
                    E T1, TO, T7, Th, Ti, Tz, TB, TL, To, Ts, Tt, TE, TG, TM;
59
                    T1 = ri[0];
60
                    TO = ii[0];
61
                    {
62
                         E T3, T4, T6, Ty, Tc, Td, Tg, TA;
63
                         T3 = ri[WS(rs, 1)];
64
                         T4 = T2 * T3;
65
                         T6 = ii[WS(rs, 1)];
66
                         Ty = T2 * T6;
67
                         Tc = ri[WS(rs, 4)];
68
                         Td = Tb * Tc;
69
                         Tg = ii[WS(rs, 4)];
70
                         TA = Tb * Tg;
71
                         T7 = FMA(T5, T6, T4);
72
                         Th = FMA(Tf, Tg, Td);
73
                         Ti = T7 + Th;
74
                         Tz = FNMS(T5, T3, Ty);
75
                         TB = FNMS(Tf, Tc, TA);
76
                         TL = Tz + TB;
77
                    }
78
                    {
79
                         E Tk, Tl, Tn, TD, Tp, Tq, Tr, TF;
80
                         Tk = ri[WS(rs, 2)];
81
                         Tl = Tj * Tk;
82
                         Tn = ii[WS(rs, 2)];
83
                         TD = Tj * Tn;
84
                         Tp = ri[WS(rs, 3)];
85
                         Tq = T8 * Tp;
86
                         Tr = ii[WS(rs, 3)];
87
                         TF = T8 * Tr;
88
                         To = FMA(Tm, Tn, Tl);
89
                         Ts = FMA(Ta, Tr, Tq);
90
                         Tt = To + Ts;
91
                         TE = FNMS(Tm, Tk, TD);
92
                         TG = FNMS(Ta, Tp, TF);
93
                         TM = TE + TG;
94
                    }
95
                    {
96
                         E Tw, Tu, Tv, TI, TK, TC, TH, TJ, Tx;
97
                         Tw = Ti - Tt;
98
                         Tu = Ti + Tt;
99
                         Tv = FNMS(KP250000000, Tu, T1);
100
                         TC = Tz - TB;
101
                         TH = TE - TG;
102
                         TI = FMA(KP618033988, TH, TC);
103
                         TK = FNMS(KP618033988, TC, TH);
104
                         ri[0] = T1 + Tu;
105
                         TJ = FNMS(KP559016994, Tw, Tv);
106
                         ri[WS(rs, 2)] = FNMS(KP951056516, TK, TJ);
107
                         ri[WS(rs, 3)] = FMA(KP951056516, TK, TJ);
108
                         Tx = FMA(KP559016994, Tw, Tv);
109
                         ri[WS(rs, 4)] = FNMS(KP951056516, TI, Tx);
110
                         ri[WS(rs, 1)] = FMA(KP951056516, TI, Tx);
111
                    }
112
                    {
113
                         E TQ, TN, TP, TU, TW, TS, TT, TV, TR;
114
                         TQ = TL - TM;
115
                         TN = TL + TM;
116
                         TP = FNMS(KP250000000, TN, TO);
117
                         TS = T7 - Th;
118
                         TT = To - Ts;
119
                         TU = FMA(KP618033988, TT, TS);
120
                         TW = FNMS(KP618033988, TS, TT);
121
                         ii[0] = TN + TO;
122
                         TV = FNMS(KP559016994, TQ, TP);
123
                         ii[WS(rs, 2)] = FMA(KP951056516, TW, TV);
124
                         ii[WS(rs, 3)] = FNMS(KP951056516, TW, TV);
125
                         TR = FMA(KP559016994, TQ, TP);
126
                         ii[WS(rs, 1)] = FNMS(KP951056516, TU, TR);
127
                         ii[WS(rs, 4)] = FMA(KP951056516, TU, TR);
128
                    }
129
               }
130
          }
131
     }
132
}
133

    
134
static const tw_instr twinstr[] = {
135
     {TW_CEXP, 0, 1},
136
     {TW_CEXP, 0, 3},
137
     {TW_NEXT, 1, 0}
138
};
139

    
140
static const ct_desc desc = { 5, "t2_5", twinstr, &GENUS, {14, 10, 30, 0}, 0, 0, 0 };
141

    
142
void X(codelet_t2_5) (planner *p) {
143
     X(kdft_dit_register) (p, t2_5, &desc);
144
}
145
#else
146

    
147
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 5 -name t2_5 -include dft/scalar/t.h */
148

    
149
/*
150
 * This function contains 44 FP additions, 32 FP multiplications,
151
 * (or, 30 additions, 18 multiplications, 14 fused multiply/add),
152
 * 37 stack variables, 4 constants, and 20 memory accesses
153
 */
154
#include "dft/scalar/t.h"
155

    
156
static void t2_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
157
{
158
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
159
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
160
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
161
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
162
     {
163
          INT m;
164
          for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(10, rs)) {
165
               E T2, T4, T7, T9, Tb, Tl, Tf, Tj;
166
               {
167
                    E T8, Te, Ta, Td;
168
                    T2 = W[0];
169
                    T4 = W[1];
170
                    T7 = W[2];
171
                    T9 = W[3];
172
                    T8 = T2 * T7;
173
                    Te = T4 * T7;
174
                    Ta = T4 * T9;
175
                    Td = T2 * T9;
176
                    Tb = T8 - Ta;
177
                    Tl = Td - Te;
178
                    Tf = Td + Te;
179
                    Tj = T8 + Ta;
180
               }
181
               {
182
                    E T1, TI, Ty, TB, TN, TM, TF, TG, TH, Ti, Tr, Ts;
183
                    T1 = ri[0];
184
                    TI = ii[0];
185
                    {
186
                         E T6, Tw, Tq, TA, Th, Tx, Tn, Tz;
187
                         {
188
                              E T3, T5, To, Tp;
189
                              T3 = ri[WS(rs, 1)];
190
                              T5 = ii[WS(rs, 1)];
191
                              T6 = FMA(T2, T3, T4 * T5);
192
                              Tw = FNMS(T4, T3, T2 * T5);
193
                              To = ri[WS(rs, 3)];
194
                              Tp = ii[WS(rs, 3)];
195
                              Tq = FMA(T7, To, T9 * Tp);
196
                              TA = FNMS(T9, To, T7 * Tp);
197
                         }
198
                         {
199
                              E Tc, Tg, Tk, Tm;
200
                              Tc = ri[WS(rs, 4)];
201
                              Tg = ii[WS(rs, 4)];
202
                              Th = FMA(Tb, Tc, Tf * Tg);
203
                              Tx = FNMS(Tf, Tc, Tb * Tg);
204
                              Tk = ri[WS(rs, 2)];
205
                              Tm = ii[WS(rs, 2)];
206
                              Tn = FMA(Tj, Tk, Tl * Tm);
207
                              Tz = FNMS(Tl, Tk, Tj * Tm);
208
                         }
209
                         Ty = Tw - Tx;
210
                         TB = Tz - TA;
211
                         TN = Tn - Tq;
212
                         TM = T6 - Th;
213
                         TF = Tw + Tx;
214
                         TG = Tz + TA;
215
                         TH = TF + TG;
216
                         Ti = T6 + Th;
217
                         Tr = Tn + Tq;
218
                         Ts = Ti + Tr;
219
                    }
220
                    ri[0] = T1 + Ts;
221
                    ii[0] = TH + TI;
222
                    {
223
                         E TC, TE, Tv, TD, Tt, Tu;
224
                         TC = FMA(KP951056516, Ty, KP587785252 * TB);
225
                         TE = FNMS(KP587785252, Ty, KP951056516 * TB);
226
                         Tt = KP559016994 * (Ti - Tr);
227
                         Tu = FNMS(KP250000000, Ts, T1);
228
                         Tv = Tt + Tu;
229
                         TD = Tu - Tt;
230
                         ri[WS(rs, 4)] = Tv - TC;
231
                         ri[WS(rs, 3)] = TD + TE;
232
                         ri[WS(rs, 1)] = Tv + TC;
233
                         ri[WS(rs, 2)] = TD - TE;
234
                    }
235
                    {
236
                         E TO, TP, TL, TQ, TJ, TK;
237
                         TO = FMA(KP951056516, TM, KP587785252 * TN);
238
                         TP = FNMS(KP587785252, TM, KP951056516 * TN);
239
                         TJ = KP559016994 * (TF - TG);
240
                         TK = FNMS(KP250000000, TH, TI);
241
                         TL = TJ + TK;
242
                         TQ = TK - TJ;
243
                         ii[WS(rs, 1)] = TL - TO;
244
                         ii[WS(rs, 3)] = TQ - TP;
245
                         ii[WS(rs, 4)] = TO + TL;
246
                         ii[WS(rs, 2)] = TP + TQ;
247
                    }
248
               }
249
          }
250
     }
251
}
252

    
253
static const tw_instr twinstr[] = {
254
     {TW_CEXP, 0, 1},
255
     {TW_CEXP, 0, 3},
256
     {TW_NEXT, 1, 0}
257
};
258

    
259
static const ct_desc desc = { 5, "t2_5", twinstr, &GENUS, {30, 18, 14, 0}, 0, 0, 0 };
260

    
261
void X(codelet_t2_5) (planner *p) {
262
     X(kdft_dit_register) (p, t2_5, &desc);
263
}
264
#endif