To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t2_4.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (5.13 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:19 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -name t2_4 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 24 FP additions, 16 FP multiplications,
32
 * (or, 16 additions, 8 multiplications, 8 fused multiply/add),
33
 * 21 stack variables, 0 constants, and 16 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t2_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     {
40
          INT m;
41
          for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(8, rs)) {
42
               E T2, T6, T3, T5, T7, Tb, T4, Ta;
43
               T2 = W[0];
44
               T6 = W[3];
45
               T3 = W[2];
46
               T4 = T2 * T3;
47
               Ta = T2 * T6;
48
               T5 = W[1];
49
               T7 = FMA(T5, T6, T4);
50
               Tb = FNMS(T5, T3, Ta);
51
               {
52
                    E T1, Tx, Td, Tw, Ti, Tq, Tm, Ts;
53
                    T1 = ri[0];
54
                    Tx = ii[0];
55
                    {
56
                         E T8, T9, Tc, Tv;
57
                         T8 = ri[WS(rs, 2)];
58
                         T9 = T7 * T8;
59
                         Tc = ii[WS(rs, 2)];
60
                         Tv = T7 * Tc;
61
                         Td = FMA(Tb, Tc, T9);
62
                         Tw = FNMS(Tb, T8, Tv);
63
                    }
64
                    {
65
                         E Tf, Tg, Th, Tp;
66
                         Tf = ri[WS(rs, 1)];
67
                         Tg = T2 * Tf;
68
                         Th = ii[WS(rs, 1)];
69
                         Tp = T2 * Th;
70
                         Ti = FMA(T5, Th, Tg);
71
                         Tq = FNMS(T5, Tf, Tp);
72
                    }
73
                    {
74
                         E Tj, Tk, Tl, Tr;
75
                         Tj = ri[WS(rs, 3)];
76
                         Tk = T3 * Tj;
77
                         Tl = ii[WS(rs, 3)];
78
                         Tr = T3 * Tl;
79
                         Tm = FMA(T6, Tl, Tk);
80
                         Ts = FNMS(T6, Tj, Tr);
81
                    }
82
                    {
83
                         E Te, Tn, Tu, Ty;
84
                         Te = T1 + Td;
85
                         Tn = Ti + Tm;
86
                         ri[WS(rs, 2)] = Te - Tn;
87
                         ri[0] = Te + Tn;
88
                         Tu = Tq + Ts;
89
                         Ty = Tw + Tx;
90
                         ii[0] = Tu + Ty;
91
                         ii[WS(rs, 2)] = Ty - Tu;
92
                    }
93
                    {
94
                         E To, Tt, Tz, TA;
95
                         To = T1 - Td;
96
                         Tt = Tq - Ts;
97
                         ri[WS(rs, 3)] = To - Tt;
98
                         ri[WS(rs, 1)] = To + Tt;
99
                         Tz = Tx - Tw;
100
                         TA = Ti - Tm;
101
                         ii[WS(rs, 1)] = Tz - TA;
102
                         ii[WS(rs, 3)] = TA + Tz;
103
                    }
104
               }
105
          }
106
     }
107
}
108

    
109
static const tw_instr twinstr[] = {
110
     {TW_CEXP, 0, 1},
111
     {TW_CEXP, 0, 3},
112
     {TW_NEXT, 1, 0}
113
};
114

    
115
static const ct_desc desc = { 4, "t2_4", twinstr, &GENUS, {16, 8, 8, 0}, 0, 0, 0 };
116

    
117
void X(codelet_t2_4) (planner *p) {
118
     X(kdft_dit_register) (p, t2_4, &desc);
119
}
120
#else
121

    
122
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 4 -name t2_4 -include dft/scalar/t.h */
123

    
124
/*
125
 * This function contains 24 FP additions, 16 FP multiplications,
126
 * (or, 16 additions, 8 multiplications, 8 fused multiply/add),
127
 * 21 stack variables, 0 constants, and 16 memory accesses
128
 */
129
#include "dft/scalar/t.h"
130

    
131
static void t2_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
132
{
133
     {
134
          INT m;
135
          for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(8, rs)) {
136
               E T2, T4, T3, T5, T6, T8;
137
               T2 = W[0];
138
               T4 = W[1];
139
               T3 = W[2];
140
               T5 = W[3];
141
               T6 = FMA(T2, T3, T4 * T5);
142
               T8 = FNMS(T4, T3, T2 * T5);
143
               {
144
                    E T1, Tp, Ta, To, Te, Tk, Th, Tl, T7, T9;
145
                    T1 = ri[0];
146
                    Tp = ii[0];
147
                    T7 = ri[WS(rs, 2)];
148
                    T9 = ii[WS(rs, 2)];
149
                    Ta = FMA(T6, T7, T8 * T9);
150
                    To = FNMS(T8, T7, T6 * T9);
151
                    {
152
                         E Tc, Td, Tf, Tg;
153
                         Tc = ri[WS(rs, 1)];
154
                         Td = ii[WS(rs, 1)];
155
                         Te = FMA(T2, Tc, T4 * Td);
156
                         Tk = FNMS(T4, Tc, T2 * Td);
157
                         Tf = ri[WS(rs, 3)];
158
                         Tg = ii[WS(rs, 3)];
159
                         Th = FMA(T3, Tf, T5 * Tg);
160
                         Tl = FNMS(T5, Tf, T3 * Tg);
161
                    }
162
                    {
163
                         E Tb, Ti, Tn, Tq;
164
                         Tb = T1 + Ta;
165
                         Ti = Te + Th;
166
                         ri[WS(rs, 2)] = Tb - Ti;
167
                         ri[0] = Tb + Ti;
168
                         Tn = Tk + Tl;
169
                         Tq = To + Tp;
170
                         ii[0] = Tn + Tq;
171
                         ii[WS(rs, 2)] = Tq - Tn;
172
                    }
173
                    {
174
                         E Tj, Tm, Tr, Ts;
175
                         Tj = T1 - Ta;
176
                         Tm = Tk - Tl;
177
                         ri[WS(rs, 3)] = Tj - Tm;
178
                         ri[WS(rs, 1)] = Tj + Tm;
179
                         Tr = Tp - To;
180
                         Ts = Te - Th;
181
                         ii[WS(rs, 1)] = Tr - Ts;
182
                         ii[WS(rs, 3)] = Ts + Tr;
183
                    }
184
               }
185
          }
186
     }
187
}
188

    
189
static const tw_instr twinstr[] = {
190
     {TW_CEXP, 0, 1},
191
     {TW_CEXP, 0, 3},
192
     {TW_NEXT, 1, 0}
193
};
194

    
195
static const ct_desc desc = { 4, "t2_4", twinstr, &GENUS, {16, 8, 8, 0}, 0, 0, 0 };
196

    
197
void X(codelet_t2_4) (planner *p) {
198
     X(kdft_dit_register) (p, t2_4, &desc);
199
}
200
#endif