To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_7.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (8.38 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:10 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 7 -name n1_7 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 60 FP additions, 42 FP multiplications,
32
 * (or, 18 additions, 0 multiplications, 42 fused multiply/add),
33
 * 41 stack variables, 6 constants, and 28 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
40
     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
41
     DK(KP692021471, +0.692021471630095869627814897002069140197260599);
42
     DK(KP801937735, +0.801937735804838252472204639014890102331838324);
43
     DK(KP554958132, +0.554958132087371191422194871006410481067288862);
44
     DK(KP356895867, +0.356895867892209443894399510021300583399127187);
45
     {
46
          INT i;
47
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
48
               E T1, Tz, T4, TI, Ta, TG, T7, TH, Tb, Tp, TT, TO, TJ, Tu, Tg;
49
               E TB, Tm, TC, Tj, TA, Tn, Ts, TQ, TL, TD, Tx;
50
               T1 = ri[0];
51
               Tz = ii[0];
52
               {
53
                    E T2, T3, Te, Tf;
54
                    T2 = ri[WS(is, 1)];
55
                    T3 = ri[WS(is, 6)];
56
                    T4 = T2 + T3;
57
                    TI = T3 - T2;
58
                    {
59
                         E T8, T9, T5, T6;
60
                         T8 = ri[WS(is, 3)];
61
                         T9 = ri[WS(is, 4)];
62
                         Ta = T8 + T9;
63
                         TG = T9 - T8;
64
                         T5 = ri[WS(is, 2)];
65
                         T6 = ri[WS(is, 5)];
66
                         T7 = T5 + T6;
67
                         TH = T6 - T5;
68
                    }
69
                    Tb = FNMS(KP356895867, T7, T4);
70
                    Tp = FNMS(KP356895867, T4, Ta);
71
                    TT = FMA(KP554958132, TG, TI);
72
                    TO = FMA(KP554958132, TH, TG);
73
                    TJ = FNMS(KP554958132, TI, TH);
74
                    Tu = FNMS(KP356895867, Ta, T7);
75
                    Te = ii[WS(is, 2)];
76
                    Tf = ii[WS(is, 5)];
77
                    Tg = Te - Tf;
78
                    TB = Te + Tf;
79
                    {
80
                         E Tk, Tl, Th, Ti;
81
                         Tk = ii[WS(is, 3)];
82
                         Tl = ii[WS(is, 4)];
83
                         Tm = Tk - Tl;
84
                         TC = Tk + Tl;
85
                         Th = ii[WS(is, 1)];
86
                         Ti = ii[WS(is, 6)];
87
                         Tj = Th - Ti;
88
                         TA = Th + Ti;
89
                    }
90
                    Tn = FMA(KP554958132, Tm, Tj);
91
                    Ts = FMA(KP554958132, Tg, Tm);
92
                    TQ = FNMS(KP356895867, TB, TA);
93
                    TL = FNMS(KP356895867, TA, TC);
94
                    TD = FNMS(KP356895867, TC, TB);
95
                    Tx = FNMS(KP554958132, Tj, Tg);
96
               }
97
               ro[0] = T1 + T4 + T7 + Ta;
98
               io[0] = Tz + TA + TB + TC;
99
               {
100
                    E To, Td, Tc, TU, TS, TR;
101
                    To = FMA(KP801937735, Tn, Tg);
102
                    Tc = FNMS(KP692021471, Tb, Ta);
103
                    Td = FNMS(KP900968867, Tc, T1);
104
                    ro[WS(os, 6)] = FNMS(KP974927912, To, Td);
105
                    ro[WS(os, 1)] = FMA(KP974927912, To, Td);
106
                    TU = FMA(KP801937735, TT, TH);
107
                    TR = FNMS(KP692021471, TQ, TC);
108
                    TS = FNMS(KP900968867, TR, Tz);
109
                    io[WS(os, 1)] = FMA(KP974927912, TU, TS);
110
                    io[WS(os, 6)] = FNMS(KP974927912, TU, TS);
111
               }
112
               {
113
                    E Tt, Tr, Tq, TP, TN, TM;
114
                    Tt = FNMS(KP801937735, Ts, Tj);
115
                    Tq = FNMS(KP692021471, Tp, T7);
116
                    Tr = FNMS(KP900968867, Tq, T1);
117
                    ro[WS(os, 5)] = FNMS(KP974927912, Tt, Tr);
118
                    ro[WS(os, 2)] = FMA(KP974927912, Tt, Tr);
119
                    TP = FNMS(KP801937735, TO, TI);
120
                    TM = FNMS(KP692021471, TL, TB);
121
                    TN = FNMS(KP900968867, TM, Tz);
122
                    io[WS(os, 2)] = FMA(KP974927912, TP, TN);
123
                    io[WS(os, 5)] = FNMS(KP974927912, TP, TN);
124
               }
125
               {
126
                    E Ty, Tw, Tv, TK, TF, TE;
127
                    Ty = FNMS(KP801937735, Tx, Tm);
128
                    Tv = FNMS(KP692021471, Tu, T4);
129
                    Tw = FNMS(KP900968867, Tv, T1);
130
                    ro[WS(os, 4)] = FNMS(KP974927912, Ty, Tw);
131
                    ro[WS(os, 3)] = FMA(KP974927912, Ty, Tw);
132
                    TK = FNMS(KP801937735, TJ, TG);
133
                    TE = FNMS(KP692021471, TD, TA);
134
                    TF = FNMS(KP900968867, TE, Tz);
135
                    io[WS(os, 3)] = FMA(KP974927912, TK, TF);
136
                    io[WS(os, 4)] = FNMS(KP974927912, TK, TF);
137
               }
138
          }
139
     }
140
}
141

    
142
static const kdft_desc desc = { 7, "n1_7", {18, 0, 42, 0}, &GENUS, 0, 0, 0, 0 };
143

    
144
void X(codelet_n1_7) (planner *p) {
145
     X(kdft_register) (p, n1_7, &desc);
146
}
147

    
148
#else
149

    
150
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 7 -name n1_7 -include dft/scalar/n.h */
151

    
152
/*
153
 * This function contains 60 FP additions, 36 FP multiplications,
154
 * (or, 36 additions, 12 multiplications, 24 fused multiply/add),
155
 * 25 stack variables, 6 constants, and 28 memory accesses
156
 */
157
#include "dft/scalar/n.h"
158

    
159
static void n1_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
160
{
161
     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
162
     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
163
     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
164
     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
165
     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
166
     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
167
     {
168
          INT i;
169
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(28, is), MAKE_VOLATILE_STRIDE(28, os)) {
170
               E T1, Tu, T4, Tq, Te, Tx, T7, Ts, Tk, Tv, Ta, Tr, Th, Tw;
171
               T1 = ri[0];
172
               Tu = ii[0];
173
               {
174
                    E T2, T3, Tc, Td;
175
                    T2 = ri[WS(is, 1)];
176
                    T3 = ri[WS(is, 6)];
177
                    T4 = T2 + T3;
178
                    Tq = T3 - T2;
179
                    Tc = ii[WS(is, 1)];
180
                    Td = ii[WS(is, 6)];
181
                    Te = Tc - Td;
182
                    Tx = Tc + Td;
183
               }
184
               {
185
                    E T5, T6, Ti, Tj;
186
                    T5 = ri[WS(is, 2)];
187
                    T6 = ri[WS(is, 5)];
188
                    T7 = T5 + T6;
189
                    Ts = T6 - T5;
190
                    Ti = ii[WS(is, 2)];
191
                    Tj = ii[WS(is, 5)];
192
                    Tk = Ti - Tj;
193
                    Tv = Ti + Tj;
194
               }
195
               {
196
                    E T8, T9, Tf, Tg;
197
                    T8 = ri[WS(is, 3)];
198
                    T9 = ri[WS(is, 4)];
199
                    Ta = T8 + T9;
200
                    Tr = T9 - T8;
201
                    Tf = ii[WS(is, 3)];
202
                    Tg = ii[WS(is, 4)];
203
                    Th = Tf - Tg;
204
                    Tw = Tf + Tg;
205
               }
206
               ro[0] = T1 + T4 + T7 + Ta;
207
               io[0] = Tu + Tx + Tv + Tw;
208
               {
209
                    E Tl, Tb, TB, TC;
210
                    Tl = FNMS(KP781831482, Th, KP974927912 * Te) - (KP433883739 * Tk);
211
                    Tb = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4);
212
                    ro[WS(os, 5)] = Tb - Tl;
213
                    ro[WS(os, 2)] = Tb + Tl;
214
                    TB = FNMS(KP781831482, Tr, KP974927912 * Tq) - (KP433883739 * Ts);
215
                    TC = FMA(KP623489801, Tw, Tu) + FNMA(KP900968867, Tv, KP222520933 * Tx);
216
                    io[WS(os, 2)] = TB + TC;
217
                    io[WS(os, 5)] = TC - TB;
218
               }
219
               {
220
                    E Tn, Tm, Tz, TA;
221
                    Tn = FMA(KP781831482, Te, KP974927912 * Tk) + (KP433883739 * Th);
222
                    Tm = FMA(KP623489801, T4, T1) + FNMA(KP900968867, Ta, KP222520933 * T7);
223
                    ro[WS(os, 6)] = Tm - Tn;
224
                    ro[WS(os, 1)] = Tm + Tn;
225
                    Tz = FMA(KP781831482, Tq, KP974927912 * Ts) + (KP433883739 * Tr);
226
                    TA = FMA(KP623489801, Tx, Tu) + FNMA(KP900968867, Tw, KP222520933 * Tv);
227
                    io[WS(os, 1)] = Tz + TA;
228
                    io[WS(os, 6)] = TA - Tz;
229
               }
230
               {
231
                    E Tp, To, Tt, Ty;
232
                    Tp = FMA(KP433883739, Te, KP974927912 * Th) - (KP781831482 * Tk);
233
                    To = FMA(KP623489801, T7, T1) + FNMA(KP222520933, Ta, KP900968867 * T4);
234
                    ro[WS(os, 4)] = To - Tp;
235
                    ro[WS(os, 3)] = To + Tp;
236
                    Tt = FMA(KP433883739, Tq, KP974927912 * Tr) - (KP781831482 * Ts);
237
                    Ty = FMA(KP623489801, Tv, Tu) + FNMA(KP222520933, Tw, KP900968867 * Tx);
238
                    io[WS(os, 3)] = Tt + Ty;
239
                    io[WS(os, 4)] = Ty - Tt;
240
               }
241
          }
242
     }
243
}
244

    
245
static const kdft_desc desc = { 7, "n1_7", {36, 12, 24, 0}, &GENUS, 0, 0, 0, 0 };
246

    
247
void X(codelet_n1_7) (planner *p) {
248
     X(kdft_register) (p, n1_7, &desc);
249
}
250

    
251
#endif