To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_3.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (4.81 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:12 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 3 -name t1_3 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 16 FP additions, 14 FP multiplications,
32
 * (or, 6 additions, 4 multiplications, 10 fused multiply/add),
33
 * 15 stack variables, 2 constants, and 12 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
40
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
41
     {
42
          INT m;
43
          for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs)) {
44
               E T1, Tm, T7, Th, Td, Tj;
45
               T1 = ri[0];
46
               Tm = ii[0];
47
               {
48
                    E T3, T6, T4, Tg, T2, T5;
49
                    T3 = ri[WS(rs, 1)];
50
                    T6 = ii[WS(rs, 1)];
51
                    T2 = W[0];
52
                    T4 = T2 * T3;
53
                    Tg = T2 * T6;
54
                    T5 = W[1];
55
                    T7 = FMA(T5, T6, T4);
56
                    Th = FNMS(T5, T3, Tg);
57
               }
58
               {
59
                    E T9, Tc, Ta, Ti, T8, Tb;
60
                    T9 = ri[WS(rs, 2)];
61
                    Tc = ii[WS(rs, 2)];
62
                    T8 = W[2];
63
                    Ta = T8 * T9;
64
                    Ti = T8 * Tc;
65
                    Tb = W[3];
66
                    Td = FMA(Tb, Tc, Ta);
67
                    Tj = FNMS(Tb, T9, Ti);
68
               }
69
               {
70
                    E Tk, Te, Tf, To, Tl, Tn;
71
                    Tk = Th - Tj;
72
                    Te = T7 + Td;
73
                    Tf = FNMS(KP500000000, Te, T1);
74
                    ri[0] = T1 + Te;
75
                    ri[WS(rs, 1)] = FMA(KP866025403, Tk, Tf);
76
                    ri[WS(rs, 2)] = FNMS(KP866025403, Tk, Tf);
77
                    To = Td - T7;
78
                    Tl = Th + Tj;
79
                    Tn = FNMS(KP500000000, Tl, Tm);
80
                    ii[0] = Tl + Tm;
81
                    ii[WS(rs, 2)] = FNMS(KP866025403, To, Tn);
82
                    ii[WS(rs, 1)] = FMA(KP866025403, To, Tn);
83
               }
84
          }
85
     }
86
}
87

    
88
static const tw_instr twinstr[] = {
89
     {TW_FULL, 0, 3},
90
     {TW_NEXT, 1, 0}
91
};
92

    
93
static const ct_desc desc = { 3, "t1_3", twinstr, &GENUS, {6, 4, 10, 0}, 0, 0, 0 };
94

    
95
void X(codelet_t1_3) (planner *p) {
96
     X(kdft_dit_register) (p, t1_3, &desc);
97
}
98
#else
99

    
100
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 3 -name t1_3 -include dft/scalar/t.h */
101

    
102
/*
103
 * This function contains 16 FP additions, 12 FP multiplications,
104
 * (or, 10 additions, 6 multiplications, 6 fused multiply/add),
105
 * 15 stack variables, 2 constants, and 12 memory accesses
106
 */
107
#include "dft/scalar/t.h"
108

    
109
static void t1_3(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
110
{
111
     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
112
     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
113
     {
114
          INT m;
115
          for (m = mb, W = W + (mb * 4); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 4, MAKE_VOLATILE_STRIDE(6, rs)) {
116
               E T1, Ti, T6, Te, Tb, Tf, Tc, Th;
117
               T1 = ri[0];
118
               Ti = ii[0];
119
               {
120
                    E T3, T5, T2, T4;
121
                    T3 = ri[WS(rs, 1)];
122
                    T5 = ii[WS(rs, 1)];
123
                    T2 = W[0];
124
                    T4 = W[1];
125
                    T6 = FMA(T2, T3, T4 * T5);
126
                    Te = FNMS(T4, T3, T2 * T5);
127
               }
128
               {
129
                    E T8, Ta, T7, T9;
130
                    T8 = ri[WS(rs, 2)];
131
                    Ta = ii[WS(rs, 2)];
132
                    T7 = W[2];
133
                    T9 = W[3];
134
                    Tb = FMA(T7, T8, T9 * Ta);
135
                    Tf = FNMS(T9, T8, T7 * Ta);
136
               }
137
               Tc = T6 + Tb;
138
               Th = Te + Tf;
139
               ri[0] = T1 + Tc;
140
               ii[0] = Th + Ti;
141
               {
142
                    E Td, Tg, Tj, Tk;
143
                    Td = FNMS(KP500000000, Tc, T1);
144
                    Tg = KP866025403 * (Te - Tf);
145
                    ri[WS(rs, 2)] = Td - Tg;
146
                    ri[WS(rs, 1)] = Td + Tg;
147
                    Tj = KP866025403 * (Tb - T6);
148
                    Tk = FNMS(KP500000000, Th, Ti);
149
                    ii[WS(rs, 1)] = Tj + Tk;
150
                    ii[WS(rs, 2)] = Tk - Tj;
151
               }
152
          }
153
     }
154
}
155

    
156
static const tw_instr twinstr[] = {
157
     {TW_FULL, 0, 3},
158
     {TW_NEXT, 1, 0}
159
};
160

    
161
static const ct_desc desc = { 3, "t1_3", twinstr, &GENUS, {10, 6, 6, 0}, 0, 0, 0 };
162

    
163
void X(codelet_t1_3) (planner *p) {
164
     X(kdft_dit_register) (p, t1_3, &desc);
165
}
166
#endif