To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_8.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (6.83 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:10 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 8 -name n1_8 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 52 FP additions, 8 FP multiplications,
32
 * (or, 44 additions, 0 multiplications, 8 fused multiply/add),
33
 * 28 stack variables, 1 constants, and 32 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
40
     {
41
          INT i;
42
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) {
43
               E T3, Tn, Ti, TC, T6, TB, Tl, To, Td, TN, Tz, TH, Ta, TM, Tu;
44
               E TG;
45
               {
46
                    E T1, T2, Tj, Tk;
47
                    T1 = ri[0];
48
                    T2 = ri[WS(is, 4)];
49
                    T3 = T1 + T2;
50
                    Tn = T1 - T2;
51
                    {
52
                         E Tg, Th, T4, T5;
53
                         Tg = ii[0];
54
                         Th = ii[WS(is, 4)];
55
                         Ti = Tg + Th;
56
                         TC = Tg - Th;
57
                         T4 = ri[WS(is, 2)];
58
                         T5 = ri[WS(is, 6)];
59
                         T6 = T4 + T5;
60
                         TB = T4 - T5;
61
                    }
62
                    Tj = ii[WS(is, 2)];
63
                    Tk = ii[WS(is, 6)];
64
                    Tl = Tj + Tk;
65
                    To = Tj - Tk;
66
                    {
67
                         E Tb, Tc, Tv, Tw, Tx, Ty;
68
                         Tb = ri[WS(is, 7)];
69
                         Tc = ri[WS(is, 3)];
70
                         Tv = Tb - Tc;
71
                         Tw = ii[WS(is, 7)];
72
                         Tx = ii[WS(is, 3)];
73
                         Ty = Tw - Tx;
74
                         Td = Tb + Tc;
75
                         TN = Tw + Tx;
76
                         Tz = Tv - Ty;
77
                         TH = Tv + Ty;
78
                    }
79
                    {
80
                         E T8, T9, Tq, Tr, Ts, Tt;
81
                         T8 = ri[WS(is, 1)];
82
                         T9 = ri[WS(is, 5)];
83
                         Tq = T8 - T9;
84
                         Tr = ii[WS(is, 1)];
85
                         Ts = ii[WS(is, 5)];
86
                         Tt = Tr - Ts;
87
                         Ta = T8 + T9;
88
                         TM = Tr + Ts;
89
                         Tu = Tq + Tt;
90
                         TG = Tt - Tq;
91
                    }
92
               }
93
               {
94
                    E T7, Te, TP, TQ;
95
                    T7 = T3 + T6;
96
                    Te = Ta + Td;
97
                    ro[WS(os, 4)] = T7 - Te;
98
                    ro[0] = T7 + Te;
99
                    TP = Ti + Tl;
100
                    TQ = TM + TN;
101
                    io[WS(os, 4)] = TP - TQ;
102
                    io[0] = TP + TQ;
103
               }
104
               {
105
                    E Tf, Tm, TL, TO;
106
                    Tf = Td - Ta;
107
                    Tm = Ti - Tl;
108
                    io[WS(os, 2)] = Tf + Tm;
109
                    io[WS(os, 6)] = Tm - Tf;
110
                    TL = T3 - T6;
111
                    TO = TM - TN;
112
                    ro[WS(os, 6)] = TL - TO;
113
                    ro[WS(os, 2)] = TL + TO;
114
               }
115
               {
116
                    E Tp, TA, TJ, TK;
117
                    Tp = Tn + To;
118
                    TA = Tu + Tz;
119
                    ro[WS(os, 5)] = FNMS(KP707106781, TA, Tp);
120
                    ro[WS(os, 1)] = FMA(KP707106781, TA, Tp);
121
                    TJ = TC - TB;
122
                    TK = TG + TH;
123
                    io[WS(os, 5)] = FNMS(KP707106781, TK, TJ);
124
                    io[WS(os, 1)] = FMA(KP707106781, TK, TJ);
125
               }
126
               {
127
                    E TD, TE, TF, TI;
128
                    TD = TB + TC;
129
                    TE = Tz - Tu;
130
                    io[WS(os, 7)] = FNMS(KP707106781, TE, TD);
131
                    io[WS(os, 3)] = FMA(KP707106781, TE, TD);
132
                    TF = Tn - To;
133
                    TI = TG - TH;
134
                    ro[WS(os, 7)] = FNMS(KP707106781, TI, TF);
135
                    ro[WS(os, 3)] = FMA(KP707106781, TI, TF);
136
               }
137
          }
138
     }
139
}
140

    
141
static const kdft_desc desc = { 8, "n1_8", {44, 0, 8, 0}, &GENUS, 0, 0, 0, 0 };
142

    
143
void X(codelet_n1_8) (planner *p) {
144
     X(kdft_register) (p, n1_8, &desc);
145
}
146

    
147
#else
148

    
149
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 8 -name n1_8 -include dft/scalar/n.h */
150

    
151
/*
152
 * This function contains 52 FP additions, 4 FP multiplications,
153
 * (or, 52 additions, 4 multiplications, 0 fused multiply/add),
154
 * 28 stack variables, 1 constants, and 32 memory accesses
155
 */
156
#include "dft/scalar/n.h"
157

    
158
static void n1_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
159
{
160
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
161
     {
162
          INT i;
163
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(32, is), MAKE_VOLATILE_STRIDE(32, os)) {
164
               E T3, Tn, Ti, TC, T6, TB, Tl, To, Td, TN, Tz, TH, Ta, TM, Tu;
165
               E TG;
166
               {
167
                    E T1, T2, Tj, Tk;
168
                    T1 = ri[0];
169
                    T2 = ri[WS(is, 4)];
170
                    T3 = T1 + T2;
171
                    Tn = T1 - T2;
172
                    {
173
                         E Tg, Th, T4, T5;
174
                         Tg = ii[0];
175
                         Th = ii[WS(is, 4)];
176
                         Ti = Tg + Th;
177
                         TC = Tg - Th;
178
                         T4 = ri[WS(is, 2)];
179
                         T5 = ri[WS(is, 6)];
180
                         T6 = T4 + T5;
181
                         TB = T4 - T5;
182
                    }
183
                    Tj = ii[WS(is, 2)];
184
                    Tk = ii[WS(is, 6)];
185
                    Tl = Tj + Tk;
186
                    To = Tj - Tk;
187
                    {
188
                         E Tb, Tc, Tv, Tw, Tx, Ty;
189
                         Tb = ri[WS(is, 7)];
190
                         Tc = ri[WS(is, 3)];
191
                         Tv = Tb - Tc;
192
                         Tw = ii[WS(is, 7)];
193
                         Tx = ii[WS(is, 3)];
194
                         Ty = Tw - Tx;
195
                         Td = Tb + Tc;
196
                         TN = Tw + Tx;
197
                         Tz = Tv - Ty;
198
                         TH = Tv + Ty;
199
                    }
200
                    {
201
                         E T8, T9, Tq, Tr, Ts, Tt;
202
                         T8 = ri[WS(is, 1)];
203
                         T9 = ri[WS(is, 5)];
204
                         Tq = T8 - T9;
205
                         Tr = ii[WS(is, 1)];
206
                         Ts = ii[WS(is, 5)];
207
                         Tt = Tr - Ts;
208
                         Ta = T8 + T9;
209
                         TM = Tr + Ts;
210
                         Tu = Tq + Tt;
211
                         TG = Tt - Tq;
212
                    }
213
               }
214
               {
215
                    E T7, Te, TP, TQ;
216
                    T7 = T3 + T6;
217
                    Te = Ta + Td;
218
                    ro[WS(os, 4)] = T7 - Te;
219
                    ro[0] = T7 + Te;
220
                    TP = Ti + Tl;
221
                    TQ = TM + TN;
222
                    io[WS(os, 4)] = TP - TQ;
223
                    io[0] = TP + TQ;
224
               }
225
               {
226
                    E Tf, Tm, TL, TO;
227
                    Tf = Td - Ta;
228
                    Tm = Ti - Tl;
229
                    io[WS(os, 2)] = Tf + Tm;
230
                    io[WS(os, 6)] = Tm - Tf;
231
                    TL = T3 - T6;
232
                    TO = TM - TN;
233
                    ro[WS(os, 6)] = TL - TO;
234
                    ro[WS(os, 2)] = TL + TO;
235
               }
236
               {
237
                    E Tp, TA, TJ, TK;
238
                    Tp = Tn + To;
239
                    TA = KP707106781 * (Tu + Tz);
240
                    ro[WS(os, 5)] = Tp - TA;
241
                    ro[WS(os, 1)] = Tp + TA;
242
                    TJ = TC - TB;
243
                    TK = KP707106781 * (TG + TH);
244
                    io[WS(os, 5)] = TJ - TK;
245
                    io[WS(os, 1)] = TJ + TK;
246
               }
247
               {
248
                    E TD, TE, TF, TI;
249
                    TD = TB + TC;
250
                    TE = KP707106781 * (Tz - Tu);
251
                    io[WS(os, 7)] = TD - TE;
252
                    io[WS(os, 3)] = TD + TE;
253
                    TF = Tn - To;
254
                    TI = KP707106781 * (TG - TH);
255
                    ro[WS(os, 7)] = TF - TI;
256
                    ro[WS(os, 3)] = TF + TI;
257
               }
258
          }
259
     }
260
}
261

    
262
static const kdft_desc desc = { 8, "n1_8", {52, 4, 0, 0}, &GENUS, 0, 0, 0, 0 };
263

    
264
void X(codelet_n1_8) (planner *p) {
265
     X(kdft_register) (p, n1_8, &desc);
266
}
267

    
268
#endif