To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_5.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (6.11 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:10 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 5 -name n1_5 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 32 FP additions, 18 FP multiplications,
32
 * (or, 14 additions, 0 multiplications, 18 fused multiply/add),
33
 * 21 stack variables, 4 constants, and 20 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
40
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
41
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
42
     DK(KP618033988, +0.618033988749894848204586834365638117720309180);
43
     {
44
          INT i;
45
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) {
46
               E T1, Tl, T8, Tt, Ta, Ts, Te, Tq, Th, To;
47
               T1 = ri[0];
48
               Tl = ii[0];
49
               {
50
                    E T2, T3, T4, T5, T6, T7;
51
                    T2 = ri[WS(is, 1)];
52
                    T3 = ri[WS(is, 4)];
53
                    T4 = T2 + T3;
54
                    T5 = ri[WS(is, 2)];
55
                    T6 = ri[WS(is, 3)];
56
                    T7 = T5 + T6;
57
                    T8 = T4 + T7;
58
                    Tt = T5 - T6;
59
                    Ta = T4 - T7;
60
                    Ts = T2 - T3;
61
               }
62
               {
63
                    E Tc, Td, Tm, Tf, Tg, Tn;
64
                    Tc = ii[WS(is, 1)];
65
                    Td = ii[WS(is, 4)];
66
                    Tm = Tc + Td;
67
                    Tf = ii[WS(is, 2)];
68
                    Tg = ii[WS(is, 3)];
69
                    Tn = Tf + Tg;
70
                    Te = Tc - Td;
71
                    Tq = Tm - Tn;
72
                    Th = Tf - Tg;
73
                    To = Tm + Tn;
74
               }
75
               ro[0] = T1 + T8;
76
               io[0] = Tl + To;
77
               {
78
                    E Ti, Tk, Tb, Tj, T9;
79
                    Ti = FMA(KP618033988, Th, Te);
80
                    Tk = FNMS(KP618033988, Te, Th);
81
                    T9 = FNMS(KP250000000, T8, T1);
82
                    Tb = FMA(KP559016994, Ta, T9);
83
                    Tj = FNMS(KP559016994, Ta, T9);
84
                    ro[WS(os, 4)] = FNMS(KP951056516, Ti, Tb);
85
                    ro[WS(os, 3)] = FMA(KP951056516, Tk, Tj);
86
                    ro[WS(os, 1)] = FMA(KP951056516, Ti, Tb);
87
                    ro[WS(os, 2)] = FNMS(KP951056516, Tk, Tj);
88
               }
89
               {
90
                    E Tu, Tw, Tr, Tv, Tp;
91
                    Tu = FMA(KP618033988, Tt, Ts);
92
                    Tw = FNMS(KP618033988, Ts, Tt);
93
                    Tp = FNMS(KP250000000, To, Tl);
94
                    Tr = FMA(KP559016994, Tq, Tp);
95
                    Tv = FNMS(KP559016994, Tq, Tp);
96
                    io[WS(os, 1)] = FNMS(KP951056516, Tu, Tr);
97
                    io[WS(os, 3)] = FNMS(KP951056516, Tw, Tv);
98
                    io[WS(os, 4)] = FMA(KP951056516, Tu, Tr);
99
                    io[WS(os, 2)] = FMA(KP951056516, Tw, Tv);
100
               }
101
          }
102
     }
103
}
104

    
105
static const kdft_desc desc = { 5, "n1_5", {14, 0, 18, 0}, &GENUS, 0, 0, 0, 0 };
106

    
107
void X(codelet_n1_5) (planner *p) {
108
     X(kdft_register) (p, n1_5, &desc);
109
}
110

    
111
#else
112

    
113
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 5 -name n1_5 -include dft/scalar/n.h */
114

    
115
/*
116
 * This function contains 32 FP additions, 12 FP multiplications,
117
 * (or, 26 additions, 6 multiplications, 6 fused multiply/add),
118
 * 21 stack variables, 4 constants, and 20 memory accesses
119
 */
120
#include "dft/scalar/n.h"
121

    
122
static void n1_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
123
{
124
     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
125
     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
126
     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
127
     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
128
     {
129
          INT i;
130
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) {
131
               E T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn;
132
               T1 = ri[0];
133
               To = ii[0];
134
               {
135
                    E T2, T3, T4, T5, T6, T7;
136
                    T2 = ri[WS(is, 1)];
137
                    T3 = ri[WS(is, 4)];
138
                    T4 = T2 + T3;
139
                    T5 = ri[WS(is, 2)];
140
                    T6 = ri[WS(is, 3)];
141
                    T7 = T5 + T6;
142
                    T8 = T4 + T7;
143
                    Tt = T5 - T6;
144
                    T9 = KP559016994 * (T4 - T7);
145
                    Ts = T2 - T3;
146
               }
147
               {
148
                    E Tc, Td, Tl, Tf, Tg, Tm;
149
                    Tc = ii[WS(is, 1)];
150
                    Td = ii[WS(is, 4)];
151
                    Tl = Tc + Td;
152
                    Tf = ii[WS(is, 2)];
153
                    Tg = ii[WS(is, 3)];
154
                    Tm = Tf + Tg;
155
                    Te = Tc - Td;
156
                    Tp = Tl + Tm;
157
                    Th = Tf - Tg;
158
                    Tn = KP559016994 * (Tl - Tm);
159
               }
160
               ro[0] = T1 + T8;
161
               io[0] = To + Tp;
162
               {
163
                    E Ti, Tk, Tb, Tj, Ta;
164
                    Ti = FMA(KP951056516, Te, KP587785252 * Th);
165
                    Tk = FNMS(KP587785252, Te, KP951056516 * Th);
166
                    Ta = FNMS(KP250000000, T8, T1);
167
                    Tb = T9 + Ta;
168
                    Tj = Ta - T9;
169
                    ro[WS(os, 4)] = Tb - Ti;
170
                    ro[WS(os, 3)] = Tj + Tk;
171
                    ro[WS(os, 1)] = Tb + Ti;
172
                    ro[WS(os, 2)] = Tj - Tk;
173
               }
174
               {
175
                    E Tu, Tv, Tr, Tw, Tq;
176
                    Tu = FMA(KP951056516, Ts, KP587785252 * Tt);
177
                    Tv = FNMS(KP587785252, Ts, KP951056516 * Tt);
178
                    Tq = FNMS(KP250000000, Tp, To);
179
                    Tr = Tn + Tq;
180
                    Tw = Tq - Tn;
181
                    io[WS(os, 1)] = Tr - Tu;
182
                    io[WS(os, 3)] = Tw - Tv;
183
                    io[WS(os, 4)] = Tu + Tr;
184
                    io[WS(os, 2)] = Tv + Tw;
185
               }
186
          }
187
     }
188
}
189

    
190
static const kdft_desc desc = { 5, "n1_5", {26, 6, 6, 0}, &GENUS, 0, 0, 0, 0 };
191

    
192
void X(codelet_n1_5) (planner *p) {
193
     X(kdft_register) (p, n1_5, &desc);
194
}
195

    
196
#endif