Mercurial > hg > sv-dependency-builds
comparison src/fftw-3.3.8/dft/scalar/codelets/n1_14.c @ 82:d0c2a83c1364
Add FFTW 3.3.8 source, and a Linux build
author | Chris Cannam |
---|---|
date | Tue, 19 Nov 2019 14:52:55 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
81:7029a4916348 | 82:d0c2a83c1364 |
---|---|
1 /* | |
2 * Copyright (c) 2003, 2007-14 Matteo Frigo | |
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License as published by | |
7 * the Free Software Foundation; either version 2 of the License, or | |
8 * (at your option) any later version. | |
9 * | |
10 * This program is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 * GNU General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU General Public License | |
16 * along with this program; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 * | |
19 */ | |
20 | |
21 /* This file was automatically generated --- DO NOT EDIT */ | |
22 /* Generated on Thu May 24 08:04:10 EDT 2018 */ | |
23 | |
24 #include "dft/codelet-dft.h" | |
25 | |
26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA) | |
27 | |
28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 14 -name n1_14 -include dft/scalar/n.h */ | |
29 | |
30 /* | |
31 * This function contains 148 FP additions, 84 FP multiplications, | |
32 * (or, 64 additions, 0 multiplications, 84 fused multiply/add), | |
33 * 67 stack variables, 6 constants, and 56 memory accesses | |
34 */ | |
35 #include "dft/scalar/n.h" | |
36 | |
37 static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) | |
38 { | |
39 DK(KP974927912, +0.974927912181823607018131682993931217232785801); | |
40 DK(KP801937735, +0.801937735804838252472204639014890102331838324); | |
41 DK(KP554958132, +0.554958132087371191422194871006410481067288862); | |
42 DK(KP900968867, +0.900968867902419126236102319507445051165919162); | |
43 DK(KP692021471, +0.692021471630095869627814897002069140197260599); | |
44 DK(KP356895867, +0.356895867892209443894399510021300583399127187); | |
45 { | |
46 INT i; | |
47 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(56, is), MAKE_VOLATILE_STRIDE(56, os)) { | |
48 E T3, Tp, T1b, T1x, T1i, T1L, T1M, T1j, T1k, T1K, Ta, To, Th, Tz, T14; | |
49 E TZ, Ts, Ty, Tv, T1Z, T2c, T27, TI, T23, T24, TP, TW, T22, T1c, T1e; | |
50 E T1d, T1f, T1s, T1n, T1A, T1G, T1D, T1H, T1U, T1P; | |
51 { | |
52 E T1, T2, T19, T1a; | |
53 T1 = ri[0]; | |
54 T2 = ri[WS(is, 7)]; | |
55 T3 = T1 - T2; | |
56 Tp = T1 + T2; | |
57 T19 = ii[0]; | |
58 T1a = ii[WS(is, 7)]; | |
59 T1b = T19 - T1a; | |
60 T1x = T19 + T1a; | |
61 } | |
62 { | |
63 E T6, Tq, T9, Tr, Tn, Tx, Tk, Tw, Tg, Tu, Td, Tt; | |
64 { | |
65 E T4, T5, Ti, Tj; | |
66 T4 = ri[WS(is, 2)]; | |
67 T5 = ri[WS(is, 9)]; | |
68 T6 = T4 - T5; | |
69 Tq = T4 + T5; | |
70 { | |
71 E T7, T8, Tl, Tm; | |
72 T7 = ri[WS(is, 12)]; | |
73 T8 = ri[WS(is, 5)]; | |
74 T9 = T7 - T8; | |
75 Tr = T7 + T8; | |
76 Tl = ri[WS(is, 8)]; | |
77 Tm = ri[WS(is, 1)]; | |
78 Tn = Tl - Tm; | |
79 Tx = Tl + Tm; | |
80 } | |
81 Ti = ri[WS(is, 6)]; | |
82 Tj = ri[WS(is, 13)]; | |
83 Tk = Ti - Tj; | |
84 Tw = Ti + Tj; | |
85 { | |
86 E Te, Tf, Tb, Tc; | |
87 Te = ri[WS(is, 10)]; | |
88 Tf = ri[WS(is, 3)]; | |
89 Tg = Te - Tf; | |
90 Tu = Te + Tf; | |
91 Tb = ri[WS(is, 4)]; | |
92 Tc = ri[WS(is, 11)]; | |
93 Td = Tb - Tc; | |
94 Tt = Tb + Tc; | |
95 } | |
96 } | |
97 T1i = Tn - Tk; | |
98 T1L = Tt - Tu; | |
99 T1M = Tr - Tq; | |
100 T1j = Tg - Td; | |
101 T1k = T9 - T6; | |
102 T1K = Tw - Tx; | |
103 Ta = T6 + T9; | |
104 To = Tk + Tn; | |
105 Th = Td + Tg; | |
106 Tz = FNMS(KP356895867, Th, Ta); | |
107 T14 = FNMS(KP356895867, To, Th); | |
108 TZ = FNMS(KP356895867, Ta, To); | |
109 Ts = Tq + Tr; | |
110 Ty = Tw + Tx; | |
111 Tv = Tt + Tu; | |
112 T1Z = FNMS(KP356895867, Ts, Ty); | |
113 T2c = FNMS(KP356895867, Ty, Tv); | |
114 T27 = FNMS(KP356895867, Tv, Ts); | |
115 } | |
116 { | |
117 E TE, T1B, TH, T1C, TV, T1F, TS, T1E, TO, T1z, TL, T1y; | |
118 { | |
119 E TC, TD, TQ, TR; | |
120 TC = ii[WS(is, 4)]; | |
121 TD = ii[WS(is, 11)]; | |
122 TE = TC - TD; | |
123 T1B = TC + TD; | |
124 { | |
125 E TF, TG, TT, TU; | |
126 TF = ii[WS(is, 10)]; | |
127 TG = ii[WS(is, 3)]; | |
128 TH = TF - TG; | |
129 T1C = TF + TG; | |
130 TT = ii[WS(is, 8)]; | |
131 TU = ii[WS(is, 1)]; | |
132 TV = TT - TU; | |
133 T1F = TT + TU; | |
134 } | |
135 TQ = ii[WS(is, 6)]; | |
136 TR = ii[WS(is, 13)]; | |
137 TS = TQ - TR; | |
138 T1E = TQ + TR; | |
139 { | |
140 E TM, TN, TJ, TK; | |
141 TM = ii[WS(is, 12)]; | |
142 TN = ii[WS(is, 5)]; | |
143 TO = TM - TN; | |
144 T1z = TM + TN; | |
145 TJ = ii[WS(is, 2)]; | |
146 TK = ii[WS(is, 9)]; | |
147 TL = TJ - TK; | |
148 T1y = TJ + TK; | |
149 } | |
150 } | |
151 TI = TE - TH; | |
152 T23 = T1F - T1E; | |
153 T24 = T1C - T1B; | |
154 TP = TL - TO; | |
155 TW = TS - TV; | |
156 T22 = T1y - T1z; | |
157 T1c = TL + TO; | |
158 T1e = TS + TV; | |
159 T1d = TE + TH; | |
160 T1f = FNMS(KP356895867, T1e, T1d); | |
161 T1s = FNMS(KP356895867, T1d, T1c); | |
162 T1n = FNMS(KP356895867, T1c, T1e); | |
163 T1A = T1y + T1z; | |
164 T1G = T1E + T1F; | |
165 T1D = T1B + T1C; | |
166 T1H = FNMS(KP356895867, T1G, T1D); | |
167 T1U = FNMS(KP356895867, T1D, T1A); | |
168 T1P = FNMS(KP356895867, T1A, T1G); | |
169 } | |
170 ro[WS(os, 7)] = T3 + Ta + Th + To; | |
171 io[WS(os, 7)] = T1b + T1c + T1d + T1e; | |
172 ro[0] = Tp + Ts + Tv + Ty; | |
173 io[0] = T1x + T1A + T1D + T1G; | |
174 { | |
175 E TB, TY, TA, TX; | |
176 TA = FNMS(KP692021471, Tz, To); | |
177 TB = FNMS(KP900968867, TA, T3); | |
178 TX = FMA(KP554958132, TW, TP); | |
179 TY = FMA(KP801937735, TX, TI); | |
180 ro[WS(os, 13)] = FNMS(KP974927912, TY, TB); | |
181 ro[WS(os, 1)] = FMA(KP974927912, TY, TB); | |
182 } | |
183 { | |
184 E T1u, T1w, T1t, T1v; | |
185 T1t = FNMS(KP692021471, T1s, T1e); | |
186 T1u = FNMS(KP900968867, T1t, T1b); | |
187 T1v = FMA(KP554958132, T1i, T1k); | |
188 T1w = FMA(KP801937735, T1v, T1j); | |
189 io[WS(os, 1)] = FMA(KP974927912, T1w, T1u); | |
190 io[WS(os, 13)] = FNMS(KP974927912, T1w, T1u); | |
191 } | |
192 { | |
193 E T11, T13, T10, T12; | |
194 T10 = FNMS(KP692021471, TZ, Th); | |
195 T11 = FNMS(KP900968867, T10, T3); | |
196 T12 = FMA(KP554958132, TI, TW); | |
197 T13 = FNMS(KP801937735, T12, TP); | |
198 ro[WS(os, 5)] = FNMS(KP974927912, T13, T11); | |
199 ro[WS(os, 9)] = FMA(KP974927912, T13, T11); | |
200 } | |
201 { | |
202 E T1p, T1r, T1o, T1q; | |
203 T1o = FNMS(KP692021471, T1n, T1d); | |
204 T1p = FNMS(KP900968867, T1o, T1b); | |
205 T1q = FMA(KP554958132, T1j, T1i); | |
206 T1r = FNMS(KP801937735, T1q, T1k); | |
207 io[WS(os, 5)] = FNMS(KP974927912, T1r, T1p); | |
208 io[WS(os, 9)] = FMA(KP974927912, T1r, T1p); | |
209 } | |
210 { | |
211 E T16, T18, T15, T17; | |
212 T15 = FNMS(KP692021471, T14, Ta); | |
213 T16 = FNMS(KP900968867, T15, T3); | |
214 T17 = FNMS(KP554958132, TP, TI); | |
215 T18 = FNMS(KP801937735, T17, TW); | |
216 ro[WS(os, 11)] = FNMS(KP974927912, T18, T16); | |
217 ro[WS(os, 3)] = FMA(KP974927912, T18, T16); | |
218 } | |
219 { | |
220 E T1h, T1m, T1g, T1l; | |
221 T1g = FNMS(KP692021471, T1f, T1c); | |
222 T1h = FNMS(KP900968867, T1g, T1b); | |
223 T1l = FNMS(KP554958132, T1k, T1j); | |
224 T1m = FNMS(KP801937735, T1l, T1i); | |
225 io[WS(os, 3)] = FMA(KP974927912, T1m, T1h); | |
226 io[WS(os, 11)] = FNMS(KP974927912, T1m, T1h); | |
227 } | |
228 { | |
229 E T1J, T1O, T1I, T1N; | |
230 T1I = FNMS(KP692021471, T1H, T1A); | |
231 T1J = FNMS(KP900968867, T1I, T1x); | |
232 T1N = FMA(KP554958132, T1M, T1L); | |
233 T1O = FNMS(KP801937735, T1N, T1K); | |
234 io[WS(os, 4)] = FMA(KP974927912, T1O, T1J); | |
235 io[WS(os, 10)] = FNMS(KP974927912, T1O, T1J); | |
236 } | |
237 { | |
238 E T2e, T2g, T2d, T2f; | |
239 T2d = FNMS(KP692021471, T2c, Ts); | |
240 T2e = FNMS(KP900968867, T2d, Tp); | |
241 T2f = FMA(KP554958132, T22, T24); | |
242 T2g = FNMS(KP801937735, T2f, T23); | |
243 ro[WS(os, 10)] = FNMS(KP974927912, T2g, T2e); | |
244 ro[WS(os, 4)] = FMA(KP974927912, T2g, T2e); | |
245 } | |
246 { | |
247 E T1R, T1T, T1Q, T1S; | |
248 T1Q = FNMS(KP692021471, T1P, T1D); | |
249 T1R = FNMS(KP900968867, T1Q, T1x); | |
250 T1S = FMA(KP554958132, T1L, T1K); | |
251 T1T = FMA(KP801937735, T1S, T1M); | |
252 io[WS(os, 2)] = FMA(KP974927912, T1T, T1R); | |
253 io[WS(os, 12)] = FNMS(KP974927912, T1T, T1R); | |
254 } | |
255 { | |
256 E T21, T26, T20, T25; | |
257 T20 = FNMS(KP692021471, T1Z, Tv); | |
258 T21 = FNMS(KP900968867, T20, Tp); | |
259 T25 = FMA(KP554958132, T24, T23); | |
260 T26 = FMA(KP801937735, T25, T22); | |
261 ro[WS(os, 12)] = FNMS(KP974927912, T26, T21); | |
262 ro[WS(os, 2)] = FMA(KP974927912, T26, T21); | |
263 } | |
264 { | |
265 E T1W, T1Y, T1V, T1X; | |
266 T1V = FNMS(KP692021471, T1U, T1G); | |
267 T1W = FNMS(KP900968867, T1V, T1x); | |
268 T1X = FNMS(KP554958132, T1K, T1M); | |
269 T1Y = FNMS(KP801937735, T1X, T1L); | |
270 io[WS(os, 6)] = FMA(KP974927912, T1Y, T1W); | |
271 io[WS(os, 8)] = FNMS(KP974927912, T1Y, T1W); | |
272 } | |
273 { | |
274 E T29, T2b, T28, T2a; | |
275 T28 = FNMS(KP692021471, T27, Ty); | |
276 T29 = FNMS(KP900968867, T28, Tp); | |
277 T2a = FNMS(KP554958132, T23, T22); | |
278 T2b = FNMS(KP801937735, T2a, T24); | |
279 ro[WS(os, 8)] = FNMS(KP974927912, T2b, T29); | |
280 ro[WS(os, 6)] = FMA(KP974927912, T2b, T29); | |
281 } | |
282 } | |
283 } | |
284 } | |
285 | |
286 static const kdft_desc desc = { 14, "n1_14", {64, 0, 84, 0}, &GENUS, 0, 0, 0, 0 }; | |
287 | |
288 void X(codelet_n1_14) (planner *p) { | |
289 X(kdft_register) (p, n1_14, &desc); | |
290 } | |
291 | |
292 #else | |
293 | |
294 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 14 -name n1_14 -include dft/scalar/n.h */ | |
295 | |
296 /* | |
297 * This function contains 148 FP additions, 72 FP multiplications, | |
298 * (or, 100 additions, 24 multiplications, 48 fused multiply/add), | |
299 * 43 stack variables, 6 constants, and 56 memory accesses | |
300 */ | |
301 #include "dft/scalar/n.h" | |
302 | |
303 static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs) | |
304 { | |
305 DK(KP222520933, +0.222520933956314404288902564496794759466355569); | |
306 DK(KP900968867, +0.900968867902419126236102319507445051165919162); | |
307 DK(KP623489801, +0.623489801858733530525004884004239810632274731); | |
308 DK(KP433883739, +0.433883739117558120475768332848358754609990728); | |
309 DK(KP781831482, +0.781831482468029808708444526674057750232334519); | |
310 DK(KP974927912, +0.974927912181823607018131682993931217232785801); | |
311 { | |
312 INT i; | |
313 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(56, is), MAKE_VOLATILE_STRIDE(56, os)) { | |
314 E T3, Tp, T16, T1f, Ta, T1q, Ts, T10, TG, T1z, T19, T1i, Th, T1s, Tv; | |
315 E T12, TU, T1B, T17, T1o, To, T1r, Ty, T11, TN, T1A, T18, T1l; | |
316 { | |
317 E T1, T2, T14, T15; | |
318 T1 = ri[0]; | |
319 T2 = ri[WS(is, 7)]; | |
320 T3 = T1 - T2; | |
321 Tp = T1 + T2; | |
322 T14 = ii[0]; | |
323 T15 = ii[WS(is, 7)]; | |
324 T16 = T14 - T15; | |
325 T1f = T14 + T15; | |
326 } | |
327 { | |
328 E T6, Tq, T9, Tr; | |
329 { | |
330 E T4, T5, T7, T8; | |
331 T4 = ri[WS(is, 2)]; | |
332 T5 = ri[WS(is, 9)]; | |
333 T6 = T4 - T5; | |
334 Tq = T4 + T5; | |
335 T7 = ri[WS(is, 12)]; | |
336 T8 = ri[WS(is, 5)]; | |
337 T9 = T7 - T8; | |
338 Tr = T7 + T8; | |
339 } | |
340 Ta = T6 + T9; | |
341 T1q = Tr - Tq; | |
342 Ts = Tq + Tr; | |
343 T10 = T9 - T6; | |
344 } | |
345 { | |
346 E TC, T1g, TF, T1h; | |
347 { | |
348 E TA, TB, TD, TE; | |
349 TA = ii[WS(is, 2)]; | |
350 TB = ii[WS(is, 9)]; | |
351 TC = TA - TB; | |
352 T1g = TA + TB; | |
353 TD = ii[WS(is, 12)]; | |
354 TE = ii[WS(is, 5)]; | |
355 TF = TD - TE; | |
356 T1h = TD + TE; | |
357 } | |
358 TG = TC - TF; | |
359 T1z = T1g - T1h; | |
360 T19 = TC + TF; | |
361 T1i = T1g + T1h; | |
362 } | |
363 { | |
364 E Td, Tt, Tg, Tu; | |
365 { | |
366 E Tb, Tc, Te, Tf; | |
367 Tb = ri[WS(is, 4)]; | |
368 Tc = ri[WS(is, 11)]; | |
369 Td = Tb - Tc; | |
370 Tt = Tb + Tc; | |
371 Te = ri[WS(is, 10)]; | |
372 Tf = ri[WS(is, 3)]; | |
373 Tg = Te - Tf; | |
374 Tu = Te + Tf; | |
375 } | |
376 Th = Td + Tg; | |
377 T1s = Tt - Tu; | |
378 Tv = Tt + Tu; | |
379 T12 = Tg - Td; | |
380 } | |
381 { | |
382 E TQ, T1m, TT, T1n; | |
383 { | |
384 E TO, TP, TR, TS; | |
385 TO = ii[WS(is, 4)]; | |
386 TP = ii[WS(is, 11)]; | |
387 TQ = TO - TP; | |
388 T1m = TO + TP; | |
389 TR = ii[WS(is, 10)]; | |
390 TS = ii[WS(is, 3)]; | |
391 TT = TR - TS; | |
392 T1n = TR + TS; | |
393 } | |
394 TU = TQ - TT; | |
395 T1B = T1n - T1m; | |
396 T17 = TQ + TT; | |
397 T1o = T1m + T1n; | |
398 } | |
399 { | |
400 E Tk, Tw, Tn, Tx; | |
401 { | |
402 E Ti, Tj, Tl, Tm; | |
403 Ti = ri[WS(is, 6)]; | |
404 Tj = ri[WS(is, 13)]; | |
405 Tk = Ti - Tj; | |
406 Tw = Ti + Tj; | |
407 Tl = ri[WS(is, 8)]; | |
408 Tm = ri[WS(is, 1)]; | |
409 Tn = Tl - Tm; | |
410 Tx = Tl + Tm; | |
411 } | |
412 To = Tk + Tn; | |
413 T1r = Tw - Tx; | |
414 Ty = Tw + Tx; | |
415 T11 = Tn - Tk; | |
416 } | |
417 { | |
418 E TJ, T1j, TM, T1k; | |
419 { | |
420 E TH, TI, TK, TL; | |
421 TH = ii[WS(is, 6)]; | |
422 TI = ii[WS(is, 13)]; | |
423 TJ = TH - TI; | |
424 T1j = TH + TI; | |
425 TK = ii[WS(is, 8)]; | |
426 TL = ii[WS(is, 1)]; | |
427 TM = TK - TL; | |
428 T1k = TK + TL; | |
429 } | |
430 TN = TJ - TM; | |
431 T1A = T1k - T1j; | |
432 T18 = TJ + TM; | |
433 T1l = T1j + T1k; | |
434 } | |
435 ro[WS(os, 7)] = T3 + Ta + Th + To; | |
436 io[WS(os, 7)] = T16 + T19 + T17 + T18; | |
437 ro[0] = Tp + Ts + Tv + Ty; | |
438 io[0] = T1f + T1i + T1o + T1l; | |
439 { | |
440 E TV, Tz, T1e, T1d; | |
441 TV = FNMS(KP781831482, TN, KP974927912 * TG) - (KP433883739 * TU); | |
442 Tz = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta); | |
443 ro[WS(os, 5)] = Tz - TV; | |
444 ro[WS(os, 9)] = Tz + TV; | |
445 T1e = FNMS(KP781831482, T11, KP974927912 * T10) - (KP433883739 * T12); | |
446 T1d = FMA(KP623489801, T18, T16) + FNMA(KP900968867, T17, KP222520933 * T19); | |
447 io[WS(os, 5)] = T1d - T1e; | |
448 io[WS(os, 9)] = T1e + T1d; | |
449 } | |
450 { | |
451 E TX, TW, T1b, T1c; | |
452 TX = FMA(KP781831482, TG, KP974927912 * TU) + (KP433883739 * TN); | |
453 TW = FMA(KP623489801, Ta, T3) + FNMA(KP900968867, To, KP222520933 * Th); | |
454 ro[WS(os, 13)] = TW - TX; | |
455 ro[WS(os, 1)] = TW + TX; | |
456 T1b = FMA(KP781831482, T10, KP974927912 * T12) + (KP433883739 * T11); | |
457 T1c = FMA(KP623489801, T19, T16) + FNMA(KP900968867, T18, KP222520933 * T17); | |
458 io[WS(os, 1)] = T1b + T1c; | |
459 io[WS(os, 13)] = T1c - T1b; | |
460 } | |
461 { | |
462 E TZ, TY, T13, T1a; | |
463 TZ = FMA(KP433883739, TG, KP974927912 * TN) - (KP781831482 * TU); | |
464 TY = FMA(KP623489801, Th, T3) + FNMA(KP222520933, To, KP900968867 * Ta); | |
465 ro[WS(os, 11)] = TY - TZ; | |
466 ro[WS(os, 3)] = TY + TZ; | |
467 T13 = FMA(KP433883739, T10, KP974927912 * T11) - (KP781831482 * T12); | |
468 T1a = FMA(KP623489801, T17, T16) + FNMA(KP222520933, T18, KP900968867 * T19); | |
469 io[WS(os, 3)] = T13 + T1a; | |
470 io[WS(os, 11)] = T1a - T13; | |
471 } | |
472 { | |
473 E T1t, T1p, T1C, T1y; | |
474 T1t = FNMS(KP433883739, T1r, KP781831482 * T1q) - (KP974927912 * T1s); | |
475 T1p = FMA(KP623489801, T1i, T1f) + FNMA(KP900968867, T1l, KP222520933 * T1o); | |
476 io[WS(os, 6)] = T1p - T1t; | |
477 io[WS(os, 8)] = T1t + T1p; | |
478 T1C = FNMS(KP433883739, T1A, KP781831482 * T1z) - (KP974927912 * T1B); | |
479 T1y = FMA(KP623489801, Ts, Tp) + FNMA(KP900968867, Ty, KP222520933 * Tv); | |
480 ro[WS(os, 6)] = T1y - T1C; | |
481 ro[WS(os, 8)] = T1y + T1C; | |
482 } | |
483 { | |
484 E T1v, T1u, T1E, T1D; | |
485 T1v = FMA(KP433883739, T1q, KP781831482 * T1s) - (KP974927912 * T1r); | |
486 T1u = FMA(KP623489801, T1o, T1f) + FNMA(KP222520933, T1l, KP900968867 * T1i); | |
487 io[WS(os, 4)] = T1u - T1v; | |
488 io[WS(os, 10)] = T1v + T1u; | |
489 T1E = FMA(KP433883739, T1z, KP781831482 * T1B) - (KP974927912 * T1A); | |
490 T1D = FMA(KP623489801, Tv, Tp) + FNMA(KP222520933, Ty, KP900968867 * Ts); | |
491 ro[WS(os, 4)] = T1D - T1E; | |
492 ro[WS(os, 10)] = T1D + T1E; | |
493 } | |
494 { | |
495 E T1w, T1x, T1G, T1F; | |
496 T1w = FMA(KP974927912, T1q, KP433883739 * T1s) + (KP781831482 * T1r); | |
497 T1x = FMA(KP623489801, T1l, T1f) + FNMA(KP900968867, T1o, KP222520933 * T1i); | |
498 io[WS(os, 2)] = T1w + T1x; | |
499 io[WS(os, 12)] = T1x - T1w; | |
500 T1G = FMA(KP974927912, T1z, KP433883739 * T1B) + (KP781831482 * T1A); | |
501 T1F = FMA(KP623489801, Ty, Tp) + FNMA(KP900968867, Tv, KP222520933 * Ts); | |
502 ro[WS(os, 12)] = T1F - T1G; | |
503 ro[WS(os, 2)] = T1F + T1G; | |
504 } | |
505 } | |
506 } | |
507 } | |
508 | |
509 static const kdft_desc desc = { 14, "n1_14", {100, 24, 48, 0}, &GENUS, 0, 0, 0, 0 }; | |
510 | |
511 void X(codelet_n1_14) (planner *p) { | |
512 X(kdft_register) (p, n1_14, &desc); | |
513 } | |
514 | |
515 #endif |