To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / q1_2.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (4.54 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:29 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twidsq.native -fma -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include dft/scalar/q.h */
29

    
30
/*
31
 * This function contains 12 FP additions, 8 FP multiplications,
32
 * (or, 8 additions, 4 multiplications, 4 fused multiply/add),
33
 * 17 stack variables, 0 constants, and 16 memory accesses
34
 */
35
#include "dft/scalar/q.h"
36

    
37
static void q1_2(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
38
{
39
     {
40
          INT m;
41
          for (m = mb, W = W + (mb * 2); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
42
               E T1, T2, T4, T7, T8, T9, Tb, Tc, Te, Th, Ti, Tj;
43
               T1 = rio[0];
44
               T2 = rio[WS(rs, 1)];
45
               T4 = T1 - T2;
46
               T7 = iio[0];
47
               T8 = iio[WS(rs, 1)];
48
               T9 = T7 - T8;
49
               Tb = rio[WS(vs, 1)];
50
               Tc = rio[WS(vs, 1) + WS(rs, 1)];
51
               Te = Tb - Tc;
52
               Th = iio[WS(vs, 1)];
53
               Ti = iio[WS(vs, 1) + WS(rs, 1)];
54
               Tj = Th - Ti;
55
               rio[0] = T1 + T2;
56
               iio[0] = T7 + T8;
57
               rio[WS(rs, 1)] = Tb + Tc;
58
               iio[WS(rs, 1)] = Th + Ti;
59
               {
60
                    E Tf, Tk, Td, Tg;
61
                    Td = W[0];
62
                    Tf = Td * Te;
63
                    Tk = Td * Tj;
64
                    Tg = W[1];
65
                    rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tg, Tj, Tf);
66
                    iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Tg, Te, Tk);
67
               }
68
               {
69
                    E T5, Ta, T3, T6;
70
                    T3 = W[0];
71
                    T5 = T3 * T4;
72
                    Ta = T3 * T9;
73
                    T6 = W[1];
74
                    rio[WS(vs, 1)] = FMA(T6, T9, T5);
75
                    iio[WS(vs, 1)] = FNMS(T6, T4, Ta);
76
               }
77
          }
78
     }
79
}
80

    
81
static const tw_instr twinstr[] = {
82
     {TW_FULL, 0, 2},
83
     {TW_NEXT, 1, 0}
84
};
85

    
86
static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, {8, 4, 4, 0}, 0, 0, 0 };
87

    
88
void X(codelet_q1_2) (planner *p) {
89
     X(kdft_difsq_register) (p, q1_2, &desc);
90
}
91
#else
92

    
93
/* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 2 -name q1_2 -include dft/scalar/q.h */
94

    
95
/*
96
 * This function contains 12 FP additions, 8 FP multiplications,
97
 * (or, 8 additions, 4 multiplications, 4 fused multiply/add),
98
 * 17 stack variables, 0 constants, and 16 memory accesses
99
 */
100
#include "dft/scalar/q.h"
101

    
102
static void q1_2(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
103
{
104
     {
105
          INT m;
106
          for (m = mb, W = W + (mb * 2); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 2, MAKE_VOLATILE_STRIDE(4, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
107
               E T1, T2, T4, T6, T7, T8, T9, Ta, Tc, Te, Tf, Tg;
108
               T1 = rio[0];
109
               T2 = rio[WS(rs, 1)];
110
               T4 = T1 - T2;
111
               T6 = iio[0];
112
               T7 = iio[WS(rs, 1)];
113
               T8 = T6 - T7;
114
               T9 = rio[WS(vs, 1)];
115
               Ta = rio[WS(vs, 1) + WS(rs, 1)];
116
               Tc = T9 - Ta;
117
               Te = iio[WS(vs, 1)];
118
               Tf = iio[WS(vs, 1) + WS(rs, 1)];
119
               Tg = Te - Tf;
120
               rio[0] = T1 + T2;
121
               iio[0] = T6 + T7;
122
               rio[WS(rs, 1)] = T9 + Ta;
123
               iio[WS(rs, 1)] = Te + Tf;
124
               {
125
                    E Tb, Td, T3, T5;
126
                    Tb = W[0];
127
                    Td = W[1];
128
                    rio[WS(vs, 1) + WS(rs, 1)] = FMA(Tb, Tc, Td * Tg);
129
                    iio[WS(vs, 1) + WS(rs, 1)] = FNMS(Td, Tc, Tb * Tg);
130
                    T3 = W[0];
131
                    T5 = W[1];
132
                    rio[WS(vs, 1)] = FMA(T3, T4, T5 * T8);
133
                    iio[WS(vs, 1)] = FNMS(T5, T4, T3 * T8);
134
               }
135
          }
136
     }
137
}
138

    
139
static const tw_instr twinstr[] = {
140
     {TW_FULL, 0, 2},
141
     {TW_NEXT, 1, 0}
142
};
143

    
144
static const ct_desc desc = { 2, "q1_2", twinstr, &GENUS, {8, 4, 4, 0}, 0, 0, 0 };
145

    
146
void X(codelet_q1_2) (planner *p) {
147
     X(kdft_difsq_register) (p, q1_2, &desc);
148
}
149
#endif