comparison src/fftw-3.3.8/dft/simd/common/t2fv_25.c @ 167:bd3cc4d1df30

Add FFTW 3.3.8 source, and a Linux build
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 19 Nov 2019 14:52:55 +0000
parents
children
comparison
equal deleted inserted replaced
166:cbd6d7e562c7 167:bd3cc4d1df30
1 /*
2 * Copyright (c) 2003, 2007-14 Matteo Frigo
3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21 /* This file was automatically generated --- DO NOT EDIT */
22 /* Generated on Thu May 24 08:05:47 EDT 2018 */
23
24 #include "dft/codelet-dft.h"
25
26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27
28 /* Generated by: ../../../genfft/gen_twiddle_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include dft/simd/t2f.h */
29
30 /*
31 * This function contains 248 FP additions, 241 FP multiplications,
32 * (or, 67 additions, 60 multiplications, 181 fused multiply/add),
33 * 147 stack variables, 67 constants, and 50 memory accesses
34 */
35 #include "dft/simd/t2f.h"
36
37 static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38 {
39 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
40 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
41 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
42 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
43 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
44 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
45 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
46 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
47 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
48 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
49 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
50 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
51 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
52 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
53 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
54 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
55 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
56 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
57 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
58 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
59 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
60 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
61 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
62 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
63 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
64 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
65 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
66 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
67 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
68 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
69 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
70 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
71 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
72 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
73 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
74 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
75 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
76 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
77 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
78 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
79 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
80 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
81 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
82 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
83 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
84 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
85 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
86 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
87 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
88 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
89 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
90 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
91 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
92 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
93 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
94 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
95 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
96 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
97 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
98 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
99 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
100 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
101 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
102 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
104 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
105 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
106 {
107 INT m;
108 R *x;
109 x = ri;
110 for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) {
111 V T1, Te, Tc, Td, T1O, T2X, T3Q, T1x, T2K, T1u, T2L, T1y, T27, T3b, T2R;
112 V T2M, T2f, T3M, Ty, T2E, Tv, T2D, Tz, T2a, T3e, T2U, T2F, T2i, T3N, TK;
113 V T2B, TS, T2A, TT, T2b, T3f, T2T, T2C, T2j, T3P, T1d, T2H, T1a, T2I, T1e;
114 V T28, T3c, T2Q, T2J, T2g;
115 {
116 V T8, Ta, Tb, T3, T5, T6, T1M, T1N;
117 T1 = LD(&(x[0]), ms, &(x[0]));
118 {
119 V T7, T9, T2, T4;
120 T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
121 T8 = BYTWJ(&(W[TWVL * 18]), T7);
122 T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
123 Ta = BYTWJ(&(W[TWVL * 28]), T9);
124 Tb = VADD(T8, Ta);
125 T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
126 T3 = BYTWJ(&(W[TWVL * 8]), T2);
127 T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
128 T5 = BYTWJ(&(W[TWVL * 38]), T4);
129 T6 = VADD(T3, T5);
130 }
131 Te = VSUB(T6, Tb);
132 Tc = VADD(T6, Tb);
133 Td = VFNMS(LDK(KP250000000), Tc, T1);
134 T1M = VSUB(T3, T5);
135 T1N = VSUB(T8, Ta);
136 T1O = VFMA(LDK(KP618033988), T1N, T1M);
137 T2X = VFNMS(LDK(KP618033988), T1M, T1N);
138 }
139 {
140 V T1g, T1v, T1w, T1l, T1q, T1r, T1f, T1s, T1t;
141 T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
142 T1g = BYTWJ(&(W[TWVL * 4]), T1f);
143 {
144 V T1i, T1p, T1k, T1n;
145 {
146 V T1h, T1o, T1j, T1m;
147 T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
148 T1i = BYTWJ(&(W[TWVL * 14]), T1h);
149 T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
150 T1p = BYTWJ(&(W[TWVL * 34]), T1o);
151 T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
152 T1k = BYTWJ(&(W[TWVL * 44]), T1j);
153 T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
154 T1n = BYTWJ(&(W[TWVL * 24]), T1m);
155 }
156 T1v = VSUB(T1i, T1k);
157 T1w = VSUB(T1n, T1p);
158 T1l = VADD(T1i, T1k);
159 T1q = VADD(T1n, T1p);
160 T1r = VADD(T1l, T1q);
161 }
162 T3Q = VADD(T1g, T1r);
163 T1x = VFMA(LDK(KP618033988), T1w, T1v);
164 T2K = VFNMS(LDK(KP618033988), T1v, T1w);
165 T1s = VFNMS(LDK(KP250000000), T1r, T1g);
166 T1t = VSUB(T1q, T1l);
167 T1u = VFNMS(LDK(KP559016994), T1t, T1s);
168 T2L = VFMA(LDK(KP559016994), T1t, T1s);
169 T1y = VFNMS(LDK(KP893101515), T1x, T1u);
170 T27 = VFNMS(LDK(KP120146378), T1x, T1u);
171 T3b = VFMA(LDK(KP066152395), T2L, T2K);
172 T2R = VFNMS(LDK(KP786782374), T2K, T2L);
173 T2M = VFMA(LDK(KP869845200), T2L, T2K);
174 T2f = VFMA(LDK(KP132830569), T1u, T1x);
175 }
176 {
177 V Th, Tw, Tx, Tm, Tr, Ts, Tg, Tt, Tu;
178 Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
179 Th = BYTWJ(&(W[0]), Tg);
180 {
181 V Tj, Tq, Tl, To;
182 {
183 V Ti, Tp, Tk, Tn;
184 Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
185 Tj = BYTWJ(&(W[TWVL * 10]), Ti);
186 Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
187 Tq = BYTWJ(&(W[TWVL * 30]), Tp);
188 Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
189 Tl = BYTWJ(&(W[TWVL * 40]), Tk);
190 Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
191 To = BYTWJ(&(W[TWVL * 20]), Tn);
192 }
193 Tw = VSUB(Tj, Tl);
194 Tx = VSUB(Tq, To);
195 Tm = VADD(Tj, Tl);
196 Tr = VADD(To, Tq);
197 Ts = VADD(Tm, Tr);
198 }
199 T3M = VADD(Th, Ts);
200 Ty = VFNMS(LDK(KP618033988), Tx, Tw);
201 T2E = VFMA(LDK(KP618033988), Tw, Tx);
202 Tt = VFNMS(LDK(KP250000000), Ts, Th);
203 Tu = VSUB(Tm, Tr);
204 Tv = VFMA(LDK(KP559016994), Tu, Tt);
205 T2D = VFNMS(LDK(KP559016994), Tu, Tt);
206 Tz = VFNMS(LDK(KP244189809), Ty, Tv);
207 T2a = VFMA(LDK(KP667278218), Tv, Ty);
208 T3e = VFNMS(LDK(KP522847744), T2E, T2D);
209 T2U = VFNMS(LDK(KP987388751), T2D, T2E);
210 T2F = VFMA(LDK(KP893101515), T2E, T2D);
211 T2i = VFNMS(LDK(KP603558818), Ty, Tv);
212 }
213 {
214 V TM, TE, TJ, TN, TO, TP, TL, TQ, TR;
215 TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
216 TM = BYTWJ(&(W[TWVL * 6]), TL);
217 {
218 V TB, TI, TD, TG;
219 {
220 V TA, TH, TC, TF;
221 TA = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
222 TB = BYTWJ(&(W[TWVL * 46]), TA);
223 TH = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
224 TI = BYTWJ(&(W[TWVL * 26]), TH);
225 TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
226 TD = BYTWJ(&(W[TWVL * 16]), TC);
227 TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
228 TG = BYTWJ(&(W[TWVL * 36]), TF);
229 }
230 TE = VSUB(TB, TD);
231 TJ = VSUB(TG, TI);
232 TN = VADD(TD, TB);
233 TO = VADD(TI, TG);
234 TP = VADD(TN, TO);
235 }
236 T3N = VADD(TM, TP);
237 TK = VFMA(LDK(KP618033988), TJ, TE);
238 T2B = VFNMS(LDK(KP618033988), TE, TJ);
239 TQ = VFMS(LDK(KP250000000), TP, TM);
240 TR = VSUB(TN, TO);
241 TS = VFNMS(LDK(KP559016994), TR, TQ);
242 T2A = VFMA(LDK(KP559016994), TR, TQ);
243 TT = VFNMS(LDK(KP667278218), TS, TK);
244 T2b = VFMA(LDK(KP869845200), TS, TK);
245 T3f = VFNMS(LDK(KP494780565), T2A, T2B);
246 T2T = VFNMS(LDK(KP132830569), T2A, T2B);
247 T2C = VFMA(LDK(KP120146378), T2B, T2A);
248 T2j = VFNMS(LDK(KP786782374), TK, TS);
249 }
250 {
251 V TW, T1b, T1c, T11, T16, T17, TV, T18, T19;
252 TV = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
253 TW = BYTWJ(&(W[TWVL * 2]), TV);
254 {
255 V TY, T15, T10, T13;
256 {
257 V TX, T14, TZ, T12;
258 TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
259 TY = BYTWJ(&(W[TWVL * 12]), TX);
260 T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
261 T15 = BYTWJ(&(W[TWVL * 32]), T14);
262 TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
263 T10 = BYTWJ(&(W[TWVL * 42]), TZ);
264 T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
265 T13 = BYTWJ(&(W[TWVL * 22]), T12);
266 }
267 T1b = VSUB(TY, T10);
268 T1c = VSUB(T15, T13);
269 T11 = VADD(TY, T10);
270 T16 = VADD(T13, T15);
271 T17 = VADD(T11, T16);
272 }
273 T3P = VADD(TW, T17);
274 T1d = VFNMS(LDK(KP618033988), T1c, T1b);
275 T2H = VFMA(LDK(KP618033988), T1b, T1c);
276 T18 = VFNMS(LDK(KP250000000), T17, TW);
277 T19 = VSUB(T16, T11);
278 T1a = VFNMS(LDK(KP559016994), T19, T18);
279 T2I = VFMA(LDK(KP559016994), T19, T18);
280 T1e = VFNMS(LDK(KP522847744), T1d, T1a);
281 T28 = VFNMS(LDK(KP494780565), T1a, T1d);
282 T3c = VFNMS(LDK(KP667278218), T2I, T2H);
283 T2Q = VFNMS(LDK(KP059835404), T2H, T2I);
284 T2J = VFMA(LDK(KP066152395), T2I, T2H);
285 T2g = VFMA(LDK(KP447533225), T1d, T1a);
286 }
287 {
288 V T3Y, T40, T3L, T3S, T3T, T3U, T3Z, T3V;
289 {
290 V T3W, T3X, T3O, T3R;
291 T3W = VSUB(T3M, T3N);
292 T3X = VSUB(T3P, T3Q);
293 T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W));
294 T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X));
295 T3L = VADD(T1, Tc);
296 T3O = VADD(T3M, T3N);
297 T3R = VADD(T3P, T3Q);
298 T3S = VADD(T3O, T3R);
299 T3T = VFNMS(LDK(KP250000000), T3S, T3L);
300 T3U = VSUB(T3O, T3R);
301 }
302 ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0]));
303 T3Z = VFNMS(LDK(KP559016994), T3U, T3T);
304 ST(&(x[WS(rs, 10)]), VFMAI(T40, T3Z), ms, &(x[0]));
305 ST(&(x[WS(rs, 15)]), VFNMSI(T40, T3Z), ms, &(x[WS(rs, 1)]));
306 T3V = VFMA(LDK(KP559016994), T3U, T3T);
307 ST(&(x[WS(rs, 5)]), VFNMSI(T3Y, T3V), ms, &(x[WS(rs, 1)]));
308 ST(&(x[WS(rs, 20)]), VFMAI(T3Y, T3V), ms, &(x[0]));
309 }
310 {
311 V T2Z, T35, T3B, T3I, T2W, T38, T2O, T32, T2z, T3t, T3h, T3s, T3p, T3F, T3r;
312 V T3v, T3C, T3z, T3A;
313 T2Z = VFMA(LDK(KP734762448), T2U, T2T);
314 T35 = VFNMS(LDK(KP734762448), T2F, T2C);
315 T3z = VFMA(LDK(KP845997307), T3c, T3b);
316 T3A = VFMA(LDK(KP982009705), T3f, T3e);
317 T3B = VFMA(LDK(KP570584518), T3A, T3z);
318 T3I = VFNMS(LDK(KP669429328), T3z, T3A);
319 {
320 V T2S, T2V, T37, T36;
321 T2S = VFMA(LDK(KP772036680), T2R, T2Q);
322 T2V = VFNMS(LDK(KP734762448), T2U, T2T);
323 T36 = VFMA(LDK(KP772036680), T2M, T2J);
324 T37 = VFMA(LDK(KP522616830), T2V, T36);
325 T2W = VFMA(LDK(KP945422727), T2V, T2S);
326 T38 = VFNMS(LDK(KP690983005), T37, T2S);
327 }
328 {
329 V T2N, T2G, T31, T30;
330 T2N = VFNMS(LDK(KP772036680), T2M, T2J);
331 T2G = VFMA(LDK(KP734762448), T2F, T2C);
332 T30 = VFNMS(LDK(KP772036680), T2R, T2Q);
333 T31 = VFNMS(LDK(KP522616830), T2G, T30);
334 T2O = VFMA(LDK(KP956723877), T2N, T2G);
335 T32 = VFMA(LDK(KP763932022), T31, T2N);
336 }
337 {
338 V T3o, T3u, T3l, T3m, T3n;
339 T2z = VFNMS(LDK(KP559016994), Te, Td);
340 T3m = VFMA(LDK(KP447533225), T2B, T2A);
341 T3n = VFMA(LDK(KP578046249), T2D, T2E);
342 T3o = VFNMS(LDK(KP921078979), T3n, T3m);
343 T3t = VFMA(LDK(KP921078979), T3n, T3m);
344 {
345 V T3d, T3g, T3j, T3k;
346 T3d = VFNMS(LDK(KP845997307), T3c, T3b);
347 T3g = VFNMS(LDK(KP982009705), T3f, T3e);
348 T3h = VFMA(LDK(KP923225144), T3g, T3d);
349 T3u = VFNMS(LDK(KP923225144), T3g, T3d);
350 T3j = VFNMS(LDK(KP059835404), T2K, T2L);
351 T3k = VFMA(LDK(KP603558818), T2H, T2I);
352 T3l = VFMA(LDK(KP845997307), T3k, T3j);
353 T3s = VFNMS(LDK(KP845997307), T3k, T3j);
354 }
355 T3p = VFNMS(LDK(KP906616052), T3o, T3l);
356 T3F = VFNMS(LDK(KP904508497), T3u, T3s);
357 T3r = VFNMS(LDK(KP237294955), T3h, T2z);
358 T3v = VFNMS(LDK(KP997675361), T3u, T3t);
359 T3C = VFMA(LDK(KP906616052), T3o, T3l);
360 }
361 {
362 V T2P, T2Y, T3i, T3q;
363 T2P = VFMA(LDK(KP992114701), T2O, T2z);
364 T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W));
365 ST(&(x[WS(rs, 3)]), VFNMSI(T2Y, T2P), ms, &(x[WS(rs, 1)]));
366 ST(&(x[WS(rs, 22)]), VFMAI(T2Y, T2P), ms, &(x[0]));
367 T3i = VFMA(LDK(KP949179823), T3h, T2z);
368 T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p));
369 ST(&(x[WS(rs, 2)]), VFNMSI(T3q, T3i), ms, &(x[0]));
370 ST(&(x[WS(rs, 23)]), VFMAI(T3q, T3i), ms, &(x[WS(rs, 1)]));
371 }
372 {
373 V T34, T3a, T33, T39;
374 T33 = VFNMS(LDK(KP855719849), T32, T2Z);
375 T34 = VFMA(LDK(KP897376177), T33, T2z);
376 T39 = VFMA(LDK(KP855719849), T38, T35);
377 T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X));
378 ST(&(x[WS(rs, 8)]), VFNMSI(T3a, T34), ms, &(x[0]));
379 ST(&(x[WS(rs, 17)]), VFMAI(T3a, T34), ms, &(x[WS(rs, 1)]));
380 }
381 {
382 V T3x, T3H, T3E, T3K, T3w;
383 T3w = VFMA(LDK(KP560319534), T3v, T3s);
384 T3x = VFNMS(LDK(KP949179823), T3w, T3r);
385 {
386 V T3G, T3y, T3J, T3D;
387 T3G = VFNMS(LDK(KP681693190), T3F, T3t);
388 T3H = VFNMS(LDK(KP860541664), T3G, T3r);
389 T3y = VFMA(LDK(KP262346850), T3p, T2X);
390 T3J = VFNMS(LDK(KP669429328), T3C, T3I);
391 T3D = VFMA(LDK(KP618033988), T3C, T3B);
392 T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y));
393 T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y));
394 }
395 ST(&(x[WS(rs, 13)]), VFNMSI(T3E, T3x), ms, &(x[WS(rs, 1)]));
396 ST(&(x[WS(rs, 7)]), VFMAI(T3K, T3H), ms, &(x[WS(rs, 1)]));
397 ST(&(x[WS(rs, 12)]), VFMAI(T3E, T3x), ms, &(x[0]));
398 ST(&(x[WS(rs, 18)]), VFNMSI(T3K, T3H), ms, &(x[0]));
399 }
400 }
401 {
402 V T2n, T2t, T1V, T22, T2l, T2w, T2d, T2q, Tf, T1I, T1A, T1E, T1B, T1Z, T1J;
403 V T1R, T1W, T1T, T1U;
404 T2n = VFNMS(LDK(KP912575812), T2j, T2i);
405 T2t = VFNMS(LDK(KP912575812), T2b, T2a);
406 T1T = VFNMS(LDK(KP829049696), TT, Tz);
407 T1U = VFNMS(LDK(KP831864738), T1y, T1e);
408 T1V = VFMA(LDK(KP559154169), T1U, T1T);
409 T22 = VFNMS(LDK(KP683113946), T1T, T1U);
410 {
411 V T2h, T2k, T2v, T2u;
412 T2h = VFMA(LDK(KP958953096), T2g, T2f);
413 T2k = VFMA(LDK(KP912575812), T2j, T2i);
414 T2u = VFMA(LDK(KP867381224), T28, T27);
415 T2v = VFMA(LDK(KP447417479), T2k, T2u);
416 T2l = VFMA(LDK(KP894834959), T2k, T2h);
417 T2w = VFNMS(LDK(KP763932022), T2v, T2h);
418 }
419 {
420 V T29, T2c, T2p, T2o;
421 T29 = VFNMS(LDK(KP867381224), T28, T27);
422 T2c = VFMA(LDK(KP912575812), T2b, T2a);
423 T2o = VFNMS(LDK(KP958953096), T2g, T2f);
424 T2p = VFMA(LDK(KP447417479), T2c, T2o);
425 T2d = VFNMS(LDK(KP809385824), T2c, T29);
426 T2q = VFMA(LDK(KP690983005), T2p, T29);
427 }
428 {
429 V T1Q, T1F, T1P, T1G, T1H;
430 Tf = VFMA(LDK(KP559016994), Te, Td);
431 T1G = VFMA(LDK(KP578046249), T1a, T1d);
432 T1H = VFMA(LDK(KP987388751), T1u, T1x);
433 T1I = VFNMS(LDK(KP831864738), T1H, T1G);
434 T1Q = VFMA(LDK(KP831864738), T1H, T1G);
435 {
436 V TU, T1z, T1C, T1D;
437 TU = VFMA(LDK(KP829049696), TT, Tz);
438 T1z = VFMA(LDK(KP831864738), T1y, T1e);
439 T1A = VFMA(LDK(KP904730450), T1z, TU);
440 T1F = VFNMS(LDK(KP904730450), T1z, TU);
441 T1C = VFMA(LDK(KP269969613), Tv, Ty);
442 T1D = VFMA(LDK(KP603558818), TK, TS);
443 T1E = VFMA(LDK(KP916574801), T1D, T1C);
444 T1P = VFNMS(LDK(KP916574801), T1D, T1C);
445 }
446 T1B = VFNMS(LDK(KP242145790), T1A, Tf);
447 T1Z = VADD(T1E, T1F);
448 T1J = VFNMS(LDK(KP904730450), T1I, T1F);
449 T1R = VFMA(LDK(KP904730450), T1Q, T1P);
450 T1W = VFNMS(LDK(KP904730450), T1Q, T1P);
451 }
452 {
453 V T25, T26, T2e, T2m;
454 T25 = VFMA(LDK(KP968583161), T1A, Tf);
455 T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O));
456 ST(&(x[WS(rs, 1)]), VFNMSI(T26, T25), ms, &(x[WS(rs, 1)]));
457 ST(&(x[WS(rs, 24)]), VFMAI(T26, T25), ms, &(x[0]));
458 T2e = VFNMS(LDK(KP992114701), T2d, Tf);
459 T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O));
460 ST(&(x[WS(rs, 4)]), VFMAI(T2m, T2e), ms, &(x[0]));
461 ST(&(x[WS(rs, 21)]), VFNMSI(T2m, T2e), ms, &(x[WS(rs, 1)]));
462 }
463 {
464 V T2s, T2y, T2r, T2x;
465 T2r = VFNMS(LDK(KP999544308), T2q, T2n);
466 T2s = VFNMS(LDK(KP803003575), T2r, Tf);
467 T2x = VFNMS(LDK(KP999544308), T2w, T2t);
468 T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O));
469 ST(&(x[WS(rs, 16)]), VFNMSI(T2y, T2s), ms, &(x[0]));
470 ST(&(x[WS(rs, 9)]), VFMAI(T2y, T2s), ms, &(x[WS(rs, 1)]));
471 }
472 {
473 V T1L, T21, T1Y, T24, T1K;
474 T1K = VFNMS(LDK(KP618033988), T1J, T1E);
475 T1L = VFNMS(LDK(KP876091699), T1K, T1B);
476 {
477 V T20, T1S, T23, T1X;
478 T20 = VFNMS(LDK(KP683113946), T1Z, T1I);
479 T21 = VFMA(LDK(KP792626838), T20, T1B);
480 T1S = VFNMS(LDK(KP242145790), T1R, T1O);
481 T23 = VFMA(LDK(KP617882369), T1W, T22);
482 T1X = VFMA(LDK(KP559016994), T1W, T1V);
483 T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S));
484 T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S));
485 }
486 ST(&(x[WS(rs, 6)]), VFNMSI(T1Y, T1L), ms, &(x[0]));
487 ST(&(x[WS(rs, 14)]), VFMAI(T24, T21), ms, &(x[0]));
488 ST(&(x[WS(rs, 19)]), VFMAI(T1Y, T1L), ms, &(x[WS(rs, 1)]));
489 ST(&(x[WS(rs, 11)]), VFNMSI(T24, T21), ms, &(x[WS(rs, 1)]));
490 }
491 }
492 }
493 }
494 VLEAVE();
495 }
496
497 static const tw_instr twinstr[] = {
498 VTW(0, 1),
499 VTW(0, 2),
500 VTW(0, 3),
501 VTW(0, 4),
502 VTW(0, 5),
503 VTW(0, 6),
504 VTW(0, 7),
505 VTW(0, 8),
506 VTW(0, 9),
507 VTW(0, 10),
508 VTW(0, 11),
509 VTW(0, 12),
510 VTW(0, 13),
511 VTW(0, 14),
512 VTW(0, 15),
513 VTW(0, 16),
514 VTW(0, 17),
515 VTW(0, 18),
516 VTW(0, 19),
517 VTW(0, 20),
518 VTW(0, 21),
519 VTW(0, 22),
520 VTW(0, 23),
521 VTW(0, 24),
522 {TW_NEXT, VL, 0}
523 };
524
525 static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, {67, 60, 181, 0}, 0, 0, 0 };
526
527 void XSIMD(codelet_t2fv_25) (planner *p) {
528 X(kdft_dit_register) (p, t2fv_25, &desc);
529 }
530 #else
531
532 /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include dft/simd/t2f.h */
533
534 /*
535 * This function contains 248 FP additions, 188 FP multiplications,
536 * (or, 170 additions, 110 multiplications, 78 fused multiply/add),
537 * 99 stack variables, 40 constants, and 50 memory accesses
538 */
539 #include "dft/simd/t2f.h"
540
541 static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
542 {
543 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
544 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
545 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
546 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
547 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
548 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
549 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
550 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
551 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
552 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
553 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
554 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
555 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
556 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
557 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
558 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
559 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
560 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
561 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
562 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
563 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
564 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
565 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
566 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
567 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
568 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
569 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
570 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
571 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
572 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
573 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
574 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
575 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
576 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
577 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
578 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
579 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
580 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
581 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
582 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
583 {
584 INT m;
585 R *x;
586 x = ri;
587 for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) {
588 V Tc, Tb, Td, Te, T1C, T2t, T1E, T1x, T2m, T1u, T3c, T2n, Ty, T2i, Tv;
589 V T38, T2j, TS, T2f, TP, T39, T2g, T1d, T2p, T1a, T3b, T2q;
590 {
591 V T7, T9, Ta, T2, T4, T5, T1D;
592 Tc = LD(&(x[0]), ms, &(x[0]));
593 {
594 V T6, T8, T1, T3;
595 T6 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
596 T7 = BYTWJ(&(W[TWVL * 18]), T6);
597 T8 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
598 T9 = BYTWJ(&(W[TWVL * 28]), T8);
599 Ta = VADD(T7, T9);
600 T1 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
601 T2 = BYTWJ(&(W[TWVL * 8]), T1);
602 T3 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
603 T4 = BYTWJ(&(W[TWVL * 38]), T3);
604 T5 = VADD(T2, T4);
605 }
606 Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta));
607 Td = VADD(T5, Ta);
608 Te = VFNMS(LDK(KP250000000), Td, Tc);
609 T1C = VSUB(T2, T4);
610 T1D = VSUB(T7, T9);
611 T2t = VMUL(LDK(KP951056516), T1D);
612 T1E = VFMA(LDK(KP951056516), T1C, VMUL(LDK(KP587785252), T1D));
613 }
614 {
615 V T1r, T1l, T1n, T1o, T1g, T1i, T1j, T1q;
616 T1q = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
617 T1r = BYTWJ(&(W[TWVL * 4]), T1q);
618 {
619 V T1k, T1m, T1f, T1h;
620 T1k = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
621 T1l = BYTWJ(&(W[TWVL * 24]), T1k);
622 T1m = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
623 T1n = BYTWJ(&(W[TWVL * 34]), T1m);
624 T1o = VADD(T1l, T1n);
625 T1f = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
626 T1g = BYTWJ(&(W[TWVL * 14]), T1f);
627 T1h = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
628 T1i = BYTWJ(&(W[TWVL * 44]), T1h);
629 T1j = VADD(T1g, T1i);
630 }
631 {
632 V T1v, T1w, T1p, T1s, T1t;
633 T1v = VSUB(T1g, T1i);
634 T1w = VSUB(T1l, T1n);
635 T1x = VFMA(LDK(KP475528258), T1v, VMUL(LDK(KP293892626), T1w));
636 T2m = VFNMS(LDK(KP293892626), T1v, VMUL(LDK(KP475528258), T1w));
637 T1p = VMUL(LDK(KP559016994), VSUB(T1j, T1o));
638 T1s = VADD(T1j, T1o);
639 T1t = VFNMS(LDK(KP250000000), T1s, T1r);
640 T1u = VADD(T1p, T1t);
641 T3c = VADD(T1r, T1s);
642 T2n = VSUB(T1t, T1p);
643 }
644 }
645 {
646 V Ts, Tm, To, Tp, Th, Tj, Tk, Tr;
647 Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
648 Ts = BYTWJ(&(W[0]), Tr);
649 {
650 V Tl, Tn, Tg, Ti;
651 Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
652 Tm = BYTWJ(&(W[TWVL * 20]), Tl);
653 Tn = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
654 To = BYTWJ(&(W[TWVL * 30]), Tn);
655 Tp = VADD(Tm, To);
656 Tg = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
657 Th = BYTWJ(&(W[TWVL * 10]), Tg);
658 Ti = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
659 Tj = BYTWJ(&(W[TWVL * 40]), Ti);
660 Tk = VADD(Th, Tj);
661 }
662 {
663 V Tw, Tx, Tq, Tt, Tu;
664 Tw = VSUB(Th, Tj);
665 Tx = VSUB(Tm, To);
666 Ty = VFMA(LDK(KP475528258), Tw, VMUL(LDK(KP293892626), Tx));
667 T2i = VFNMS(LDK(KP293892626), Tw, VMUL(LDK(KP475528258), Tx));
668 Tq = VMUL(LDK(KP559016994), VSUB(Tk, Tp));
669 Tt = VADD(Tk, Tp);
670 Tu = VFNMS(LDK(KP250000000), Tt, Ts);
671 Tv = VADD(Tq, Tu);
672 T38 = VADD(Ts, Tt);
673 T2j = VSUB(Tu, Tq);
674 }
675 }
676 {
677 V TM, TG, TI, TJ, TB, TD, TE, TL;
678 TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
679 TM = BYTWJ(&(W[TWVL * 6]), TL);
680 {
681 V TF, TH, TA, TC;
682 TF = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
683 TG = BYTWJ(&(W[TWVL * 26]), TF);
684 TH = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
685 TI = BYTWJ(&(W[TWVL * 36]), TH);
686 TJ = VADD(TG, TI);
687 TA = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
688 TB = BYTWJ(&(W[TWVL * 16]), TA);
689 TC = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
690 TD = BYTWJ(&(W[TWVL * 46]), TC);
691 TE = VADD(TB, TD);
692 }
693 {
694 V TQ, TR, TK, TN, TO;
695 TQ = VSUB(TB, TD);
696 TR = VSUB(TG, TI);
697 TS = VFMA(LDK(KP475528258), TQ, VMUL(LDK(KP293892626), TR));
698 T2f = VFNMS(LDK(KP293892626), TQ, VMUL(LDK(KP475528258), TR));
699 TK = VMUL(LDK(KP559016994), VSUB(TE, TJ));
700 TN = VADD(TE, TJ);
701 TO = VFNMS(LDK(KP250000000), TN, TM);
702 TP = VADD(TK, TO);
703 T39 = VADD(TM, TN);
704 T2g = VSUB(TO, TK);
705 }
706 }
707 {
708 V T17, T11, T13, T14, TW, TY, TZ, T16;
709 T16 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
710 T17 = BYTWJ(&(W[TWVL * 2]), T16);
711 {
712 V T10, T12, TV, TX;
713 T10 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
714 T11 = BYTWJ(&(W[TWVL * 22]), T10);
715 T12 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
716 T13 = BYTWJ(&(W[TWVL * 32]), T12);
717 T14 = VADD(T11, T13);
718 TV = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
719 TW = BYTWJ(&(W[TWVL * 12]), TV);
720 TX = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
721 TY = BYTWJ(&(W[TWVL * 42]), TX);
722 TZ = VADD(TW, TY);
723 }
724 {
725 V T1b, T1c, T15, T18, T19;
726 T1b = VSUB(TW, TY);
727 T1c = VSUB(T11, T13);
728 T1d = VFMA(LDK(KP475528258), T1b, VMUL(LDK(KP293892626), T1c));
729 T2p = VFNMS(LDK(KP293892626), T1b, VMUL(LDK(KP475528258), T1c));
730 T15 = VMUL(LDK(KP559016994), VSUB(TZ, T14));
731 T18 = VADD(TZ, T14);
732 T19 = VFNMS(LDK(KP250000000), T18, T17);
733 T1a = VADD(T15, T19);
734 T3b = VADD(T17, T18);
735 T2q = VSUB(T19, T15);
736 }
737 }
738 {
739 V T3l, T3m, T3f, T3g, T3e, T3h, T3n, T3i;
740 {
741 V T3j, T3k, T3a, T3d;
742 T3j = VSUB(T38, T39);
743 T3k = VSUB(T3b, T3c);
744 T3l = VBYI(VFMA(LDK(KP951056516), T3j, VMUL(LDK(KP587785252), T3k)));
745 T3m = VBYI(VFNMS(LDK(KP587785252), T3j, VMUL(LDK(KP951056516), T3k)));
746 T3f = VADD(Tc, Td);
747 T3a = VADD(T38, T39);
748 T3d = VADD(T3b, T3c);
749 T3g = VADD(T3a, T3d);
750 T3e = VMUL(LDK(KP559016994), VSUB(T3a, T3d));
751 T3h = VFNMS(LDK(KP250000000), T3g, T3f);
752 }
753 ST(&(x[0]), VADD(T3f, T3g), ms, &(x[0]));
754 T3n = VSUB(T3h, T3e);
755 ST(&(x[WS(rs, 10)]), VADD(T3m, T3n), ms, &(x[0]));
756 ST(&(x[WS(rs, 15)]), VSUB(T3n, T3m), ms, &(x[WS(rs, 1)]));
757 T3i = VADD(T3e, T3h);
758 ST(&(x[WS(rs, 5)]), VSUB(T3i, T3l), ms, &(x[WS(rs, 1)]));
759 ST(&(x[WS(rs, 20)]), VADD(T3l, T3i), ms, &(x[0]));
760 }
761 {
762 V Tf, T1Z, T20, T21, T29, T2a, T2b, T26, T27, T28, T22, T23, T24, T1L, T1U;
763 V T1Q, T1S, T1A, T1V, T1N, T1O, T2d, T2e;
764 Tf = VADD(Tb, Te);
765 T1Z = VFMA(LDK(KP1_688655851), Ty, VMUL(LDK(KP535826794), Tv));
766 T20 = VFMA(LDK(KP1_541026485), TS, VMUL(LDK(KP637423989), TP));
767 T21 = VSUB(T1Z, T20);
768 T29 = VFMA(LDK(KP851558583), T1d, VMUL(LDK(KP904827052), T1a));
769 T2a = VFMA(LDK(KP1_984229402), T1x, VMUL(LDK(KP125333233), T1u));
770 T2b = VADD(T29, T2a);
771 T26 = VFNMS(LDK(KP844327925), Tv, VMUL(LDK(KP1_071653589), Ty));
772 T27 = VFNMS(LDK(KP1_274847979), TS, VMUL(LDK(KP770513242), TP));
773 T28 = VADD(T26, T27);
774 T22 = VFNMS(LDK(KP425779291), T1a, VMUL(LDK(KP1_809654104), T1d));
775 T23 = VFNMS(LDK(KP992114701), T1u, VMUL(LDK(KP250666467), T1x));
776 T24 = VADD(T22, T23);
777 {
778 V T1F, T1G, T1H, T1I, T1J, T1K;
779 T1F = VFMA(LDK(KP1_937166322), Ty, VMUL(LDK(KP248689887), Tv));
780 T1G = VFMA(LDK(KP1_071653589), TS, VMUL(LDK(KP844327925), TP));
781 T1H = VADD(T1F, T1G);
782 T1I = VFMA(LDK(KP1_752613360), T1d, VMUL(LDK(KP481753674), T1a));
783 T1J = VFMA(LDK(KP1_457937254), T1x, VMUL(LDK(KP684547105), T1u));
784 T1K = VADD(T1I, T1J);
785 T1L = VADD(T1H, T1K);
786 T1U = VSUB(T1J, T1I);
787 T1Q = VMUL(LDK(KP559016994), VSUB(T1K, T1H));
788 T1S = VSUB(T1G, T1F);
789 }
790 {
791 V Tz, TT, TU, T1e, T1y, T1z;
792 Tz = VFNMS(LDK(KP497379774), Ty, VMUL(LDK(KP968583161), Tv));
793 TT = VFNMS(LDK(KP1_688655851), TS, VMUL(LDK(KP535826794), TP));
794 TU = VADD(Tz, TT);
795 T1e = VFNMS(LDK(KP963507348), T1d, VMUL(LDK(KP876306680), T1a));
796 T1y = VFNMS(LDK(KP1_369094211), T1x, VMUL(LDK(KP728968627), T1u));
797 T1z = VADD(T1e, T1y);
798 T1A = VADD(TU, T1z);
799 T1V = VMUL(LDK(KP559016994), VSUB(TU, T1z));
800 T1N = VSUB(TT, Tz);
801 T1O = VSUB(T1e, T1y);
802 }
803 {
804 V T1B, T1M, T25, T2c;
805 T1B = VADD(Tf, T1A);
806 T1M = VBYI(VADD(T1E, T1L));
807 ST(&(x[WS(rs, 1)]), VSUB(T1B, T1M), ms, &(x[WS(rs, 1)]));
808 ST(&(x[WS(rs, 24)]), VADD(T1B, T1M), ms, &(x[0]));
809 T25 = VADD(Tf, VADD(T21, T24));
810 T2c = VBYI(VADD(T1E, VSUB(T28, T2b)));
811 ST(&(x[WS(rs, 21)]), VSUB(T25, T2c), ms, &(x[WS(rs, 1)]));
812 ST(&(x[WS(rs, 4)]), VADD(T25, T2c), ms, &(x[0]));
813 }
814 T2d = VBYI(VADD(T1E, VFMA(LDK(KP309016994), T28, VFMA(LDK(KP587785252), VSUB(T23, T22), VFNMS(LDK(KP951056516), VADD(T1Z, T20), VMUL(LDK(KP809016994), T2b))))));
815 T2e = VFMA(LDK(KP309016994), T21, VFMA(LDK(KP951056516), VSUB(T26, T27), VFMA(LDK(KP587785252), VSUB(T2a, T29), VFNMS(LDK(KP809016994), T24, Tf))));
816 ST(&(x[WS(rs, 9)]), VADD(T2d, T2e), ms, &(x[WS(rs, 1)]));
817 ST(&(x[WS(rs, 16)]), VSUB(T2e, T2d), ms, &(x[0]));
818 {
819 V T1R, T1X, T1W, T1Y, T1P, T1T;
820 T1P = VFMS(LDK(KP250000000), T1L, T1E);
821 T1R = VBYI(VADD(VFMA(LDK(KP587785252), T1N, VMUL(LDK(KP951056516), T1O)), VSUB(T1P, T1Q)));
822 T1X = VBYI(VADD(VFNMS(LDK(KP587785252), T1O, VMUL(LDK(KP951056516), T1N)), VADD(T1P, T1Q)));
823 T1T = VFNMS(LDK(KP250000000), T1A, Tf);
824 T1W = VFMA(LDK(KP587785252), T1S, VFNMS(LDK(KP951056516), T1U, VSUB(T1T, T1V)));
825 T1Y = VFMA(LDK(KP951056516), T1S, VADD(T1V, VFMA(LDK(KP587785252), T1U, T1T)));
826 ST(&(x[WS(rs, 11)]), VADD(T1R, T1W), ms, &(x[WS(rs, 1)]));
827 ST(&(x[WS(rs, 19)]), VSUB(T1Y, T1X), ms, &(x[WS(rs, 1)]));
828 ST(&(x[WS(rs, 14)]), VSUB(T1W, T1R), ms, &(x[0]));
829 ST(&(x[WS(rs, 6)]), VADD(T1X, T1Y), ms, &(x[0]));
830 }
831 }
832 {
833 V T2u, T2w, T2h, T2k, T2l, T2A, T2B, T2C, T2o, T2r, T2s, T2x, T2y, T2z, T2M;
834 V T2X, T2N, T2W, T2R, T31, T2U, T30, T2E, T2F;
835 T2u = VFNMS(LDK(KP587785252), T1C, T2t);
836 T2w = VSUB(Te, Tb);
837 T2h = VFNMS(LDK(KP125333233), T2g, VMUL(LDK(KP1_984229402), T2f));
838 T2k = VFMA(LDK(KP1_457937254), T2i, VMUL(LDK(KP684547105), T2j));
839 T2l = VSUB(T2h, T2k);
840 T2A = VFNMS(LDK(KP1_996053456), T2p, VMUL(LDK(KP062790519), T2q));
841 T2B = VFMA(LDK(KP1_541026485), T2m, VMUL(LDK(KP637423989), T2n));
842 T2C = VSUB(T2A, T2B);
843 T2o = VFNMS(LDK(KP770513242), T2n, VMUL(LDK(KP1_274847979), T2m));
844 T2r = VFMA(LDK(KP125581039), T2p, VMUL(LDK(KP998026728), T2q));
845 T2s = VSUB(T2o, T2r);
846 T2x = VFNMS(LDK(KP1_369094211), T2i, VMUL(LDK(KP728968627), T2j));
847 T2y = VFMA(LDK(KP250666467), T2f, VMUL(LDK(KP992114701), T2g));
848 T2z = VSUB(T2x, T2y);
849 {
850 V T2G, T2H, T2I, T2J, T2K, T2L;
851 T2G = VFNMS(LDK(KP481753674), T2j, VMUL(LDK(KP1_752613360), T2i));
852 T2H = VFMA(LDK(KP851558583), T2f, VMUL(LDK(KP904827052), T2g));
853 T2I = VSUB(T2G, T2H);
854 T2J = VFNMS(LDK(KP844327925), T2q, VMUL(LDK(KP1_071653589), T2p));
855 T2K = VFNMS(LDK(KP998026728), T2n, VMUL(LDK(KP125581039), T2m));
856 T2L = VADD(T2J, T2K);
857 T2M = VMUL(LDK(KP559016994), VSUB(T2I, T2L));
858 T2X = VSUB(T2J, T2K);
859 T2N = VADD(T2I, T2L);
860 T2W = VADD(T2G, T2H);
861 }
862 {
863 V T2P, T2Q, T2Y, T2S, T2T, T2Z;
864 T2P = VFNMS(LDK(KP425779291), T2g, VMUL(LDK(KP1_809654104), T2f));
865 T2Q = VFMA(LDK(KP963507348), T2i, VMUL(LDK(KP876306680), T2j));
866 T2Y = VADD(T2Q, T2P);
867 T2S = VFMA(LDK(KP1_688655851), T2p, VMUL(LDK(KP535826794), T2q));
868 T2T = VFMA(LDK(KP1_996053456), T2m, VMUL(LDK(KP062790519), T2n));
869 T2Z = VADD(T2S, T2T);
870 T2R = VSUB(T2P, T2Q);
871 T31 = VADD(T2Y, T2Z);
872 T2U = VSUB(T2S, T2T);
873 T30 = VMUL(LDK(KP559016994), VSUB(T2Y, T2Z));
874 }
875 {
876 V T36, T37, T2v, T2D;
877 T36 = VBYI(VADD(T2u, T2N));
878 T37 = VADD(T2w, T31);
879 ST(&(x[WS(rs, 2)]), VADD(T36, T37), ms, &(x[0]));
880 ST(&(x[WS(rs, 23)]), VSUB(T37, T36), ms, &(x[WS(rs, 1)]));
881 T2v = VBYI(VSUB(VADD(T2l, T2s), T2u));
882 T2D = VADD(T2w, VADD(T2z, T2C));
883 ST(&(x[WS(rs, 3)]), VADD(T2v, T2D), ms, &(x[WS(rs, 1)]));
884 ST(&(x[WS(rs, 22)]), VSUB(T2D, T2v), ms, &(x[0]));
885 }
886 T2E = VFMA(LDK(KP309016994), T2z, VFNMS(LDK(KP809016994), T2C, VFNMS(LDK(KP587785252), VADD(T2r, T2o), VFNMS(LDK(KP951056516), VADD(T2k, T2h), T2w))));
887 T2F = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2A, T2B), VFNMS(LDK(KP809016994), T2s, VFNMS(LDK(KP951056516), VADD(T2x, T2y), VMUL(LDK(KP309016994), T2l)))), T2u));
888 ST(&(x[WS(rs, 17)]), VSUB(T2E, T2F), ms, &(x[WS(rs, 1)]));
889 ST(&(x[WS(rs, 8)]), VADD(T2E, T2F), ms, &(x[0]));
890 {
891 V T2V, T34, T33, T35, T2O, T32;
892 T2O = VFNMS(LDK(KP250000000), T2N, T2u);
893 T2V = VBYI(VADD(T2M, VADD(T2O, VFNMS(LDK(KP587785252), T2U, VMUL(LDK(KP951056516), T2R)))));
894 T34 = VBYI(VADD(T2O, VSUB(VFMA(LDK(KP587785252), T2R, VMUL(LDK(KP951056516), T2U)), T2M)));
895 T32 = VFNMS(LDK(KP250000000), T31, T2w);
896 T33 = VFMA(LDK(KP951056516), T2W, VFMA(LDK(KP587785252), T2X, VADD(T30, T32)));
897 T35 = VFMA(LDK(KP587785252), T2W, VSUB(VFNMS(LDK(KP951056516), T2X, T32), T30));
898 ST(&(x[WS(rs, 7)]), VADD(T2V, T33), ms, &(x[WS(rs, 1)]));
899 ST(&(x[WS(rs, 13)]), VSUB(T35, T34), ms, &(x[WS(rs, 1)]));
900 ST(&(x[WS(rs, 18)]), VSUB(T33, T2V), ms, &(x[0]));
901 ST(&(x[WS(rs, 12)]), VADD(T34, T35), ms, &(x[0]));
902 }
903 }
904 }
905 }
906 VLEAVE();
907 }
908
909 static const tw_instr twinstr[] = {
910 VTW(0, 1),
911 VTW(0, 2),
912 VTW(0, 3),
913 VTW(0, 4),
914 VTW(0, 5),
915 VTW(0, 6),
916 VTW(0, 7),
917 VTW(0, 8),
918 VTW(0, 9),
919 VTW(0, 10),
920 VTW(0, 11),
921 VTW(0, 12),
922 VTW(0, 13),
923 VTW(0, 14),
924 VTW(0, 15),
925 VTW(0, 16),
926 VTW(0, 17),
927 VTW(0, 18),
928 VTW(0, 19),
929 VTW(0, 20),
930 VTW(0, 21),
931 VTW(0, 22),
932 VTW(0, 23),
933 VTW(0, 24),
934 {TW_NEXT, VL, 0}
935 };
936
937 static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, {170, 110, 78, 0}, 0, 0, 0 };
938
939 void XSIMD(codelet_t2fv_25) (planner *p) {
940 X(kdft_dit_register) (p, t2fv_25, &desc);
941 }
942 #endif