To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_6.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (6.09 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:10 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 36 FP additions, 12 FP multiplications,
32
 * (or, 24 additions, 0 multiplications, 12 fused multiply/add),
33
 * 23 stack variables, 2 constants, and 24 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
     {
42
          INT i;
43
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
44
               E T3, Tb, Tp, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tq;
45
               E Ty;
46
               {
47
                    E T1, T2, Tn, To;
48
                    T1 = ri[0];
49
                    T2 = ri[WS(is, 3)];
50
                    T3 = T1 - T2;
51
                    Tb = T1 + T2;
52
                    Tn = ii[0];
53
                    To = ii[WS(is, 3)];
54
                    Tp = Tn - To;
55
                    Tx = Tn + To;
56
               }
57
               {
58
                    E T4, T5, T7, T8;
59
                    T4 = ri[WS(is, 2)];
60
                    T5 = ri[WS(is, 5)];
61
                    T6 = T4 - T5;
62
                    Tc = T4 + T5;
63
                    T7 = ri[WS(is, 4)];
64
                    T8 = ri[WS(is, 1)];
65
                    T9 = T7 - T8;
66
                    Td = T7 + T8;
67
               }
68
               Ta = T6 + T9;
69
               Te = Tc + Td;
70
               {
71
                    E Tg, Th, Tj, Tk;
72
                    Tg = ii[WS(is, 2)];
73
                    Th = ii[WS(is, 5)];
74
                    Ti = Tg - Th;
75
                    Tu = Tg + Th;
76
                    Tj = ii[WS(is, 4)];
77
                    Tk = ii[WS(is, 1)];
78
                    Tl = Tj - Tk;
79
                    Tv = Tj + Tk;
80
               }
81
               Tq = Ti + Tl;
82
               Ty = Tu + Tv;
83
               ro[WS(os, 3)] = T3 + Ta;
84
               io[WS(os, 3)] = Tp + Tq;
85
               ro[0] = Tb + Te;
86
               io[0] = Tx + Ty;
87
               {
88
                    E Tf, Tm, Tr, Ts;
89
                    Tf = FNMS(KP500000000, Ta, T3);
90
                    Tm = Ti - Tl;
91
                    ro[WS(os, 5)] = FNMS(KP866025403, Tm, Tf);
92
                    ro[WS(os, 1)] = FMA(KP866025403, Tm, Tf);
93
                    Tr = FNMS(KP500000000, Tq, Tp);
94
                    Ts = T9 - T6;
95
                    io[WS(os, 1)] = FMA(KP866025403, Ts, Tr);
96
                    io[WS(os, 5)] = FNMS(KP866025403, Ts, Tr);
97
               }
98
               {
99
                    E Tt, Tw, Tz, TA;
100
                    Tt = FNMS(KP500000000, Te, Tb);
101
                    Tw = Tu - Tv;
102
                    ro[WS(os, 2)] = FNMS(KP866025403, Tw, Tt);
103
                    ro[WS(os, 4)] = FMA(KP866025403, Tw, Tt);
104
                    Tz = FNMS(KP500000000, Ty, Tx);
105
                    TA = Td - Tc;
106
                    io[WS(os, 2)] = FNMS(KP866025403, TA, Tz);
107
                    io[WS(os, 4)] = FMA(KP866025403, TA, Tz);
108
               }
109
          }
110
     }
111
}
112

    
113
static const kdft_desc desc = { 6, "n1_6", {24, 0, 12, 0}, &GENUS, 0, 0, 0, 0 };
114

    
115
void X(codelet_n1_6) (planner *p) {
116
     X(kdft_register) (p, n1_6, &desc);
117
}
118

    
119
#else
120

    
121
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include dft/scalar/n.h */
122

    
123
/*
124
 * This function contains 36 FP additions, 8 FP multiplications,
125
 * (or, 32 additions, 4 multiplications, 4 fused multiply/add),
126
 * 23 stack variables, 2 constants, and 24 memory accesses
127
 */
128
#include "dft/scalar/n.h"
129

    
130
static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
131
{
132
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
133
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
134
     {
135
          INT i;
136
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
137
               E T3, Tb, Tq, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tr;
138
               E Ty;
139
               {
140
                    E T1, T2, To, Tp;
141
                    T1 = ri[0];
142
                    T2 = ri[WS(is, 3)];
143
                    T3 = T1 - T2;
144
                    Tb = T1 + T2;
145
                    To = ii[0];
146
                    Tp = ii[WS(is, 3)];
147
                    Tq = To - Tp;
148
                    Tx = To + Tp;
149
               }
150
               {
151
                    E T4, T5, T7, T8;
152
                    T4 = ri[WS(is, 2)];
153
                    T5 = ri[WS(is, 5)];
154
                    T6 = T4 - T5;
155
                    Tc = T4 + T5;
156
                    T7 = ri[WS(is, 4)];
157
                    T8 = ri[WS(is, 1)];
158
                    T9 = T7 - T8;
159
                    Td = T7 + T8;
160
               }
161
               Ta = T6 + T9;
162
               Te = Tc + Td;
163
               {
164
                    E Tg, Th, Tj, Tk;
165
                    Tg = ii[WS(is, 2)];
166
                    Th = ii[WS(is, 5)];
167
                    Ti = Tg - Th;
168
                    Tu = Tg + Th;
169
                    Tj = ii[WS(is, 4)];
170
                    Tk = ii[WS(is, 1)];
171
                    Tl = Tj - Tk;
172
                    Tv = Tj + Tk;
173
               }
174
               Tr = Ti + Tl;
175
               Ty = Tu + Tv;
176
               ro[WS(os, 3)] = T3 + Ta;
177
               io[WS(os, 3)] = Tq + Tr;
178
               ro[0] = Tb + Te;
179
               io[0] = Tx + Ty;
180
               {
181
                    E Tf, Tm, Tn, Ts;
182
                    Tf = FNMS(KP500000000, Ta, T3);
183
                    Tm = KP866025403 * (Ti - Tl);
184
                    ro[WS(os, 5)] = Tf - Tm;
185
                    ro[WS(os, 1)] = Tf + Tm;
186
                    Tn = KP866025403 * (T9 - T6);
187
                    Ts = FNMS(KP500000000, Tr, Tq);
188
                    io[WS(os, 1)] = Tn + Ts;
189
                    io[WS(os, 5)] = Ts - Tn;
190
               }
191
               {
192
                    E Tt, Tw, Tz, TA;
193
                    Tt = FNMS(KP500000000, Te, Tb);
194
                    Tw = KP866025403 * (Tu - Tv);
195
                    ro[WS(os, 2)] = Tt - Tw;
196
                    ro[WS(os, 4)] = Tt + Tw;
197
                    Tz = FNMS(KP500000000, Ty, Tx);
198
                    TA = KP866025403 * (Td - Tc);
199
                    io[WS(os, 2)] = Tz - TA;
200
                    io[WS(os, 4)] = TA + Tz;
201
               }
202
          }
203
     }
204
}
205

    
206
static const kdft_desc desc = { 6, "n1_6", {32, 4, 4, 0}, &GENUS, 0, 0, 0, 0 };
207

    
208
void X(codelet_n1_6) (planner *p) {
209
     X(kdft_register) (p, n1_6, &desc);
210
}
211

    
212
#endif