To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_4.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (5.06 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:12 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 4 -name t1_4 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 22 FP additions, 12 FP multiplications,
32
 * (or, 16 additions, 6 multiplications, 6 fused multiply/add),
33
 * 15 stack variables, 0 constants, and 16 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     {
40
          INT m;
41
          for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs)) {
42
               E T1, Tv, T7, Tu, Te, To, Tk, Tq;
43
               T1 = ri[0];
44
               Tv = ii[0];
45
               {
46
                    E T3, T6, T4, Tt, T2, T5;
47
                    T3 = ri[WS(rs, 2)];
48
                    T6 = ii[WS(rs, 2)];
49
                    T2 = W[2];
50
                    T4 = T2 * T3;
51
                    Tt = T2 * T6;
52
                    T5 = W[3];
53
                    T7 = FMA(T5, T6, T4);
54
                    Tu = FNMS(T5, T3, Tt);
55
               }
56
               {
57
                    E Ta, Td, Tb, Tn, T9, Tc;
58
                    Ta = ri[WS(rs, 1)];
59
                    Td = ii[WS(rs, 1)];
60
                    T9 = W[0];
61
                    Tb = T9 * Ta;
62
                    Tn = T9 * Td;
63
                    Tc = W[1];
64
                    Te = FMA(Tc, Td, Tb);
65
                    To = FNMS(Tc, Ta, Tn);
66
               }
67
               {
68
                    E Tg, Tj, Th, Tp, Tf, Ti;
69
                    Tg = ri[WS(rs, 3)];
70
                    Tj = ii[WS(rs, 3)];
71
                    Tf = W[4];
72
                    Th = Tf * Tg;
73
                    Tp = Tf * Tj;
74
                    Ti = W[5];
75
                    Tk = FMA(Ti, Tj, Th);
76
                    Tq = FNMS(Ti, Tg, Tp);
77
               }
78
               {
79
                    E T8, Tl, Ts, Tw;
80
                    T8 = T1 + T7;
81
                    Tl = Te + Tk;
82
                    ri[WS(rs, 2)] = T8 - Tl;
83
                    ri[0] = T8 + Tl;
84
                    Ts = To + Tq;
85
                    Tw = Tu + Tv;
86
                    ii[0] = Ts + Tw;
87
                    ii[WS(rs, 2)] = Tw - Ts;
88
               }
89
               {
90
                    E Tm, Tr, Tx, Ty;
91
                    Tm = T1 - T7;
92
                    Tr = To - Tq;
93
                    ri[WS(rs, 3)] = Tm - Tr;
94
                    ri[WS(rs, 1)] = Tm + Tr;
95
                    Tx = Tv - Tu;
96
                    Ty = Te - Tk;
97
                    ii[WS(rs, 1)] = Tx - Ty;
98
                    ii[WS(rs, 3)] = Ty + Tx;
99
               }
100
          }
101
     }
102
}
103

    
104
static const tw_instr twinstr[] = {
105
     {TW_FULL, 0, 4},
106
     {TW_NEXT, 1, 0}
107
};
108

    
109
static const ct_desc desc = { 4, "t1_4", twinstr, &GENUS, {16, 6, 6, 0}, 0, 0, 0 };
110

    
111
void X(codelet_t1_4) (planner *p) {
112
     X(kdft_dit_register) (p, t1_4, &desc);
113
}
114
#else
115

    
116
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 4 -name t1_4 -include dft/scalar/t.h */
117

    
118
/*
119
 * This function contains 22 FP additions, 12 FP multiplications,
120
 * (or, 16 additions, 6 multiplications, 6 fused multiply/add),
121
 * 13 stack variables, 0 constants, and 16 memory accesses
122
 */
123
#include "dft/scalar/t.h"
124

    
125
static void t1_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
126
{
127
     {
128
          INT m;
129
          for (m = mb, W = W + (mb * 6); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 6, MAKE_VOLATILE_STRIDE(8, rs)) {
130
               E T1, Tp, T6, To, Tc, Tk, Th, Tl;
131
               T1 = ri[0];
132
               Tp = ii[0];
133
               {
134
                    E T3, T5, T2, T4;
135
                    T3 = ri[WS(rs, 2)];
136
                    T5 = ii[WS(rs, 2)];
137
                    T2 = W[2];
138
                    T4 = W[3];
139
                    T6 = FMA(T2, T3, T4 * T5);
140
                    To = FNMS(T4, T3, T2 * T5);
141
               }
142
               {
143
                    E T9, Tb, T8, Ta;
144
                    T9 = ri[WS(rs, 1)];
145
                    Tb = ii[WS(rs, 1)];
146
                    T8 = W[0];
147
                    Ta = W[1];
148
                    Tc = FMA(T8, T9, Ta * Tb);
149
                    Tk = FNMS(Ta, T9, T8 * Tb);
150
               }
151
               {
152
                    E Te, Tg, Td, Tf;
153
                    Te = ri[WS(rs, 3)];
154
                    Tg = ii[WS(rs, 3)];
155
                    Td = W[4];
156
                    Tf = W[5];
157
                    Th = FMA(Td, Te, Tf * Tg);
158
                    Tl = FNMS(Tf, Te, Td * Tg);
159
               }
160
               {
161
                    E T7, Ti, Tn, Tq;
162
                    T7 = T1 + T6;
163
                    Ti = Tc + Th;
164
                    ri[WS(rs, 2)] = T7 - Ti;
165
                    ri[0] = T7 + Ti;
166
                    Tn = Tk + Tl;
167
                    Tq = To + Tp;
168
                    ii[0] = Tn + Tq;
169
                    ii[WS(rs, 2)] = Tq - Tn;
170
               }
171
               {
172
                    E Tj, Tm, Tr, Ts;
173
                    Tj = T1 - T6;
174
                    Tm = Tk - Tl;
175
                    ri[WS(rs, 3)] = Tj - Tm;
176
                    ri[WS(rs, 1)] = Tj + Tm;
177
                    Tr = Tp - To;
178
                    Ts = Tc - Th;
179
                    ii[WS(rs, 1)] = Tr - Ts;
180
                    ii[WS(rs, 3)] = Ts + Tr;
181
               }
182
          }
183
     }
184
}
185

    
186
static const tw_instr twinstr[] = {
187
     {TW_FULL, 0, 4},
188
     {TW_NEXT, 1, 0}
189
};
190

    
191
static const ct_desc desc = { 4, "t1_4", twinstr, &GENUS, {16, 6, 6, 0}, 0, 0, 0 };
192

    
193
void X(codelet_t1_4) (planner *p) {
194
     X(kdft_dit_register) (p, t1_4, &desc);
195
}
196
#endif