comparison src/fftw-3.3.3/dft/simd/common/t2fv_25.c @ 10:37bf6b4a2645

Add FFTW3
author Chris Cannam
date Wed, 20 Mar 2013 15:35:50 +0000
parents
children
comparison
equal deleted inserted replaced
9:c0fb53affa76 10:37bf6b4a2645
1 /*
2 * Copyright (c) 2003, 2007-11 Matteo Frigo
3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 */
20
21 /* This file was automatically generated --- DO NOT EDIT */
22 /* Generated on Sun Nov 25 07:38:42 EST 2012 */
23
24 #include "codelet-dft.h"
25
26 #ifdef HAVE_FMA
27
28 /* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include t2f.h */
29
30 /*
31 * This function contains 248 FP additions, 241 FP multiplications,
32 * (or, 67 additions, 60 multiplications, 181 fused multiply/add),
33 * 208 stack variables, 67 constants, and 50 memory accesses
34 */
35 #include "t2f.h"
36
37 static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38 {
39 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
40 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
41 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
42 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
43 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
44 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
45 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
46 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
47 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
48 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
49 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
50 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
51 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
52 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
53 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
54 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
55 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
56 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
57 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
58 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
59 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
60 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
61 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
62 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
63 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
64 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
65 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
66 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
67 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
68 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
69 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
70 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
71 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
72 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
73 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
74 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
75 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
76 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
77 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
78 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
79 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
80 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
81 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
82 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
83 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
84 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
85 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
86 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
87 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
88 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
89 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
90 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
91 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
92 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
93 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
94 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
95 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
96 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
97 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
98 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
99 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
100 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
101 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
102 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
104 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
105 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
106 {
107 INT m;
108 R *x;
109 x = ri;
110 for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) {
111 V T25, T1B, T2y, T1K, T2s, T23, T1S, T26, T20, T1X;
112 {
113 V T1O, T2X, Te, T3L, Td, T3Q, T3j, T3b, T2R, T2M, T2f, T27, T1y, T1H, T3M;
114 V TW, TR, TK, T2B, T3n, T3e, T2U, T2F, T2i, T2a, Tz, T1C, T3N, TQ, T11;
115 V T1b, T1c, T16;
116 {
117 V T1, T1g, T1i, T1p, T1k, T1m, Tb, T1N, T6, T1M;
118 {
119 V T7, T9, T2, T4, T1f, T1h, T1o;
120 T1 = LD(&(x[0]), ms, &(x[0]));
121 T7 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
122 T9 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
123 T2 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
124 T4 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
125 T1f = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
126 T1h = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
127 T1o = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
128 {
129 V T8, Ta, T3, T5, T1j;
130 T1j = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
131 T8 = BYTWJ(&(W[TWVL * 18]), T7);
132 Ta = BYTWJ(&(W[TWVL * 28]), T9);
133 T3 = BYTWJ(&(W[TWVL * 8]), T2);
134 T5 = BYTWJ(&(W[TWVL * 38]), T4);
135 T1g = BYTWJ(&(W[TWVL * 4]), T1f);
136 T1i = BYTWJ(&(W[TWVL * 14]), T1h);
137 T1p = BYTWJ(&(W[TWVL * 34]), T1o);
138 T1k = BYTWJ(&(W[TWVL * 44]), T1j);
139 T1m = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
140 Tb = VADD(T8, Ta);
141 T1N = VSUB(T8, Ta);
142 T6 = VADD(T3, T5);
143 T1M = VSUB(T3, T5);
144 }
145 }
146 {
147 V T1v, T1l, Th, Tj, T1w, T1q, Tq, Tk, Tn, Tg;
148 Tg = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
149 {
150 V Tc, Ti, T1n, Tp;
151 Ti = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
152 T1v = VSUB(T1i, T1k);
153 T1l = VADD(T1i, T1k);
154 T1n = BYTWJ(&(W[TWVL * 24]), T1m);
155 Tp = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
156 T1O = VFMA(LDK(KP618033988), T1N, T1M);
157 T2X = VFNMS(LDK(KP618033988), T1M, T1N);
158 Te = VSUB(T6, Tb);
159 Tc = VADD(T6, Tb);
160 Th = BYTWJ(&(W[0]), Tg);
161 Tj = BYTWJ(&(W[TWVL * 10]), Ti);
162 T1w = VSUB(T1n, T1p);
163 T1q = VADD(T1n, T1p);
164 Tq = BYTWJ(&(W[TWVL * 30]), Tp);
165 Tk = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
166 T3L = VADD(T1, Tc);
167 Td = VFNMS(LDK(KP250000000), Tc, T1);
168 Tn = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
169 }
170 {
171 V T1x, T2K, TM, TB, Tw, Tm, Tx, Tr, TI, T2L, T1u, TD, TF, TL;
172 TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
173 {
174 V T1t, Tl, To, TH, T1s, T1r, TA, TC;
175 TA = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
176 T1r = VADD(T1l, T1q);
177 T1t = VSUB(T1q, T1l);
178 T1x = VFMA(LDK(KP618033988), T1w, T1v);
179 T2K = VFNMS(LDK(KP618033988), T1v, T1w);
180 Tl = BYTWJ(&(W[TWVL * 40]), Tk);
181 To = BYTWJ(&(W[TWVL * 20]), Tn);
182 TM = BYTWJ(&(W[TWVL * 6]), TL);
183 TB = BYTWJ(&(W[TWVL * 46]), TA);
184 TH = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
185 T1s = VFNMS(LDK(KP250000000), T1r, T1g);
186 T3Q = VADD(T1g, T1r);
187 TC = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
188 Tw = VSUB(Tj, Tl);
189 Tm = VADD(Tj, Tl);
190 Tx = VSUB(Tq, To);
191 Tr = VADD(To, Tq);
192 TI = BYTWJ(&(W[TWVL * 26]), TH);
193 T2L = VFMA(LDK(KP559016994), T1t, T1s);
194 T1u = VFNMS(LDK(KP559016994), T1t, T1s);
195 TD = BYTWJ(&(W[TWVL * 16]), TC);
196 TF = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
197 }
198 {
199 V Tu, Ty, T2E, TE, TN, TG, Tt, TV, Ts;
200 TV = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
201 Ts = VADD(Tm, Tr);
202 Tu = VSUB(Tm, Tr);
203 Ty = VFNMS(LDK(KP618033988), Tx, Tw);
204 T2E = VFMA(LDK(KP618033988), Tw, Tx);
205 T3j = VFNMS(LDK(KP059835404), T2K, T2L);
206 T3b = VFMA(LDK(KP066152395), T2L, T2K);
207 T2R = VFNMS(LDK(KP786782374), T2K, T2L);
208 T2M = VFMA(LDK(KP869845200), T2L, T2K);
209 T2f = VFMA(LDK(KP132830569), T1u, T1x);
210 T27 = VFNMS(LDK(KP120146378), T1x, T1u);
211 T1y = VFNMS(LDK(KP893101515), T1x, T1u);
212 T1H = VFMA(LDK(KP987388751), T1u, T1x);
213 TE = VSUB(TB, TD);
214 TN = VADD(TD, TB);
215 TG = BYTWJ(&(W[TWVL * 36]), TF);
216 Tt = VFNMS(LDK(KP250000000), Ts, Th);
217 T3M = VADD(Th, Ts);
218 TW = BYTWJ(&(W[TWVL * 2]), TV);
219 {
220 V TJ, TO, Tv, T2D, TY, T15, T10, T13, TP;
221 {
222 V TX, T14, TZ, T12;
223 TX = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
224 T14 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
225 TZ = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
226 T12 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
227 TJ = VSUB(TG, TI);
228 TO = VADD(TI, TG);
229 Tv = VFMA(LDK(KP559016994), Tu, Tt);
230 T2D = VFNMS(LDK(KP559016994), Tu, Tt);
231 TY = BYTWJ(&(W[TWVL * 12]), TX);
232 T15 = BYTWJ(&(W[TWVL * 32]), T14);
233 T10 = BYTWJ(&(W[TWVL * 42]), TZ);
234 T13 = BYTWJ(&(W[TWVL * 22]), T12);
235 }
236 TP = VADD(TN, TO);
237 TR = VSUB(TN, TO);
238 TK = VFMA(LDK(KP618033988), TJ, TE);
239 T2B = VFNMS(LDK(KP618033988), TE, TJ);
240 T3n = VFMA(LDK(KP578046249), T2D, T2E);
241 T3e = VFNMS(LDK(KP522847744), T2E, T2D);
242 T2U = VFNMS(LDK(KP987388751), T2D, T2E);
243 T2F = VFMA(LDK(KP893101515), T2E, T2D);
244 T2i = VFNMS(LDK(KP603558818), Ty, Tv);
245 T2a = VFMA(LDK(KP667278218), Tv, Ty);
246 Tz = VFNMS(LDK(KP244189809), Ty, Tv);
247 T1C = VFMA(LDK(KP269969613), Tv, Ty);
248 T3N = VADD(TM, TP);
249 TQ = VFMS(LDK(KP250000000), TP, TM);
250 T11 = VADD(TY, T10);
251 T1b = VSUB(TY, T10);
252 T1c = VSUB(T15, T13);
253 T16 = VADD(T13, T15);
254 }
255 }
256 }
257 }
258 }
259 {
260 V T2z, Tf, T3W, T3O, T1d, T2H, T3m, T2j, T2b, TT, T1D, T2G, T35, T2V, T2Z;
261 V T3A, T3g, T2I, T1a, T3R, T3X;
262 T2z = VFNMS(LDK(KP559016994), Te, Td);
263 Tf = VFMA(LDK(KP559016994), Te, Td);
264 {
265 V TS, T2A, T17, T19;
266 TS = VFNMS(LDK(KP559016994), TR, TQ);
267 T2A = VFMA(LDK(KP559016994), TR, TQ);
268 T3W = VSUB(T3M, T3N);
269 T3O = VADD(T3M, T3N);
270 T1d = VFNMS(LDK(KP618033988), T1c, T1b);
271 T2H = VFMA(LDK(KP618033988), T1b, T1c);
272 T17 = VADD(T11, T16);
273 T19 = VSUB(T16, T11);
274 {
275 V T3f, T2T, T2C, T18, T3P;
276 T3m = VFMA(LDK(KP447533225), T2B, T2A);
277 T3f = VFNMS(LDK(KP494780565), T2A, T2B);
278 T2T = VFNMS(LDK(KP132830569), T2A, T2B);
279 T2C = VFMA(LDK(KP120146378), T2B, T2A);
280 T2j = VFNMS(LDK(KP786782374), TK, TS);
281 T2b = VFMA(LDK(KP869845200), TS, TK);
282 TT = VFNMS(LDK(KP667278218), TS, TK);
283 T1D = VFMA(LDK(KP603558818), TK, TS);
284 T18 = VFNMS(LDK(KP250000000), T17, TW);
285 T3P = VADD(TW, T17);
286 T2G = VFMA(LDK(KP734762448), T2F, T2C);
287 T35 = VFNMS(LDK(KP734762448), T2F, T2C);
288 T2V = VFNMS(LDK(KP734762448), T2U, T2T);
289 T2Z = VFMA(LDK(KP734762448), T2U, T2T);
290 T3A = VFMA(LDK(KP982009705), T3f, T3e);
291 T3g = VFNMS(LDK(KP982009705), T3f, T3e);
292 T2I = VFMA(LDK(KP559016994), T19, T18);
293 T1a = VFNMS(LDK(KP559016994), T19, T18);
294 T3R = VADD(T3P, T3Q);
295 T3X = VSUB(T3P, T3Q);
296 }
297 }
298 {
299 V T2n, T2t, T1V, T22, T2l, T2d, T1Q, T1I, T2w, T1A, T1F, T2q;
300 {
301 V T2k, T1G, T28, T2g, T3K, T3E, T3a, T34, T3x, T3H, T2c, TU, T1T, T1U, T1z;
302 V T3o, T3t;
303 T2n = VFNMS(LDK(KP912575812), T2j, T2i);
304 T2k = VFMA(LDK(KP912575812), T2j, T2i);
305 T3o = VFNMS(LDK(KP921078979), T3n, T3m);
306 T3t = VFMA(LDK(KP921078979), T3n, T3m);
307 {
308 V T3c, T2Q, T2J, T3k, T1e;
309 T3c = VFNMS(LDK(KP667278218), T2I, T2H);
310 T2Q = VFNMS(LDK(KP059835404), T2H, T2I);
311 T2J = VFMA(LDK(KP066152395), T2I, T2H);
312 T3k = VFMA(LDK(KP603558818), T2H, T2I);
313 T1G = VFMA(LDK(KP578046249), T1a, T1d);
314 T1e = VFNMS(LDK(KP522847744), T1d, T1a);
315 T28 = VFNMS(LDK(KP494780565), T1a, T1d);
316 T2g = VFMA(LDK(KP447533225), T1d, T1a);
317 {
318 V T3U, T3S, T40, T3Y;
319 T3U = VSUB(T3O, T3R);
320 T3S = VADD(T3O, T3R);
321 T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X));
322 T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W));
323 {
324 V T3s, T3l, T2N, T36;
325 T3s = VFNMS(LDK(KP845997307), T3k, T3j);
326 T3l = VFMA(LDK(KP845997307), T3k, T3j);
327 T2N = VFNMS(LDK(KP772036680), T2M, T2J);
328 T36 = VFMA(LDK(KP772036680), T2M, T2J);
329 {
330 V T30, T2S, T3d, T3z, T3T;
331 T30 = VFNMS(LDK(KP772036680), T2R, T2Q);
332 T2S = VFMA(LDK(KP772036680), T2R, T2Q);
333 T3d = VFNMS(LDK(KP845997307), T3c, T3b);
334 T3z = VFMA(LDK(KP845997307), T3c, T3b);
335 ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0]));
336 T3T = VFNMS(LDK(KP250000000), T3S, T3L);
337 {
338 V T3C, T3p, T2O, T37;
339 T3C = VFMA(LDK(KP906616052), T3o, T3l);
340 T3p = VFNMS(LDK(KP906616052), T3o, T3l);
341 T2O = VFMA(LDK(KP956723877), T2N, T2G);
342 T37 = VFMA(LDK(KP522616830), T2V, T36);
343 {
344 V T31, T2W, T3u, T3h;
345 T31 = VFNMS(LDK(KP522616830), T2G, T30);
346 T2W = VFMA(LDK(KP945422727), T2V, T2S);
347 T3u = VFNMS(LDK(KP923225144), T3g, T3d);
348 T3h = VFMA(LDK(KP923225144), T3g, T3d);
349 {
350 V T3I, T3B, T3V, T3Z;
351 T3I = VFNMS(LDK(KP669429328), T3z, T3A);
352 T3B = VFMA(LDK(KP570584518), T3A, T3z);
353 T3V = VFMA(LDK(KP559016994), T3U, T3T);
354 T3Z = VFNMS(LDK(KP559016994), T3U, T3T);
355 {
356 V T3y, T3q, T2P, T38;
357 T3y = VFMA(LDK(KP262346850), T3p, T2X);
358 T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p));
359 T2P = VFMA(LDK(KP992114701), T2O, T2z);
360 T38 = VFNMS(LDK(KP690983005), T37, T2S);
361 {
362 V T32, T2Y, T3v, T3F;
363 T32 = VFMA(LDK(KP763932022), T31, T2N);
364 T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W));
365 T3v = VFNMS(LDK(KP997675361), T3u, T3t);
366 T3F = VFNMS(LDK(KP904508497), T3u, T3s);
367 {
368 V T3i, T3r, T3J, T3D;
369 T3i = VFMA(LDK(KP949179823), T3h, T2z);
370 T3r = VFNMS(LDK(KP237294955), T3h, T2z);
371 T3J = VFNMS(LDK(KP669429328), T3C, T3I);
372 T3D = VFMA(LDK(KP618033988), T3C, T3B);
373 ST(&(x[WS(rs, 20)]), VFMAI(T3Y, T3V), ms, &(x[0]));
374 ST(&(x[WS(rs, 5)]), VFNMSI(T3Y, T3V), ms, &(x[WS(rs, 1)]));
375 ST(&(x[WS(rs, 15)]), VFNMSI(T40, T3Z), ms, &(x[WS(rs, 1)]));
376 ST(&(x[WS(rs, 10)]), VFMAI(T40, T3Z), ms, &(x[0]));
377 {
378 V T39, T33, T3w, T3G;
379 T39 = VFMA(LDK(KP855719849), T38, T35);
380 T33 = VFNMS(LDK(KP855719849), T32, T2Z);
381 ST(&(x[WS(rs, 22)]), VFMAI(T2Y, T2P), ms, &(x[0]));
382 ST(&(x[WS(rs, 3)]), VFNMSI(T2Y, T2P), ms, &(x[WS(rs, 1)]));
383 T3w = VFMA(LDK(KP560319534), T3v, T3s);
384 T3G = VFNMS(LDK(KP681693190), T3F, T3t);
385 ST(&(x[WS(rs, 23)]), VFMAI(T3q, T3i), ms, &(x[WS(rs, 1)]));
386 ST(&(x[WS(rs, 2)]), VFNMSI(T3q, T3i), ms, &(x[0]));
387 T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y));
388 T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y));
389 T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X));
390 T34 = VFMA(LDK(KP897376177), T33, T2z);
391 T3x = VFNMS(LDK(KP949179823), T3w, T3r);
392 T3H = VFNMS(LDK(KP860541664), T3G, T3r);
393 T2t = VFNMS(LDK(KP912575812), T2b, T2a);
394 T2c = VFMA(LDK(KP912575812), T2b, T2a);
395 TU = VFMA(LDK(KP829049696), TT, Tz);
396 T1T = VFNMS(LDK(KP829049696), TT, Tz);
397 T1U = VFNMS(LDK(KP831864738), T1y, T1e);
398 T1z = VFMA(LDK(KP831864738), T1y, T1e);
399 }
400 }
401 }
402 }
403 }
404 }
405 }
406 }
407 }
408 }
409 }
410 {
411 V T2o, T2h, T29, T2u, T2v, T2p;
412 T2o = VFNMS(LDK(KP958953096), T2g, T2f);
413 T2h = VFMA(LDK(KP958953096), T2g, T2f);
414 ST(&(x[WS(rs, 17)]), VFMAI(T3a, T34), ms, &(x[WS(rs, 1)]));
415 ST(&(x[WS(rs, 8)]), VFNMSI(T3a, T34), ms, &(x[0]));
416 ST(&(x[WS(rs, 12)]), VFMAI(T3E, T3x), ms, &(x[0]));
417 ST(&(x[WS(rs, 13)]), VFNMSI(T3E, T3x), ms, &(x[WS(rs, 1)]));
418 ST(&(x[WS(rs, 18)]), VFNMSI(T3K, T3H), ms, &(x[0]));
419 ST(&(x[WS(rs, 7)]), VFMAI(T3K, T3H), ms, &(x[WS(rs, 1)]));
420 T1V = VFMA(LDK(KP559154169), T1U, T1T);
421 T22 = VFNMS(LDK(KP683113946), T1T, T1U);
422 T29 = VFNMS(LDK(KP867381224), T28, T27);
423 T2u = VFMA(LDK(KP867381224), T28, T27);
424 T2l = VFMA(LDK(KP894834959), T2k, T2h);
425 T2v = VFMA(LDK(KP447417479), T2k, T2u);
426 T2d = VFNMS(LDK(KP809385824), T2c, T29);
427 T2p = VFMA(LDK(KP447417479), T2c, T2o);
428 T1Q = VFMA(LDK(KP831864738), T1H, T1G);
429 T1I = VFNMS(LDK(KP831864738), T1H, T1G);
430 T2w = VFNMS(LDK(KP763932022), T2v, T2h);
431 T1A = VFMA(LDK(KP904730450), T1z, TU);
432 T1F = VFNMS(LDK(KP904730450), T1z, TU);
433 T2q = VFMA(LDK(KP690983005), T2p, T29);
434 }
435 }
436 {
437 V T2e, T1E, T1P, T2m;
438 T2e = VFNMS(LDK(KP992114701), T2d, Tf);
439 T1E = VFMA(LDK(KP916574801), T1D, T1C);
440 T1P = VFNMS(LDK(KP916574801), T1D, T1C);
441 T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O));
442 {
443 V T1J, T2r, T1R, T1W, T1Z, T2x;
444 T2x = VFNMS(LDK(KP999544308), T2w, T2t);
445 T1J = VFNMS(LDK(KP904730450), T1I, T1F);
446 T25 = VFMA(LDK(KP968583161), T1A, Tf);
447 T1B = VFNMS(LDK(KP242145790), T1A, Tf);
448 T2r = VFNMS(LDK(KP999544308), T2q, T2n);
449 T1R = VFMA(LDK(KP904730450), T1Q, T1P);
450 T1W = VFNMS(LDK(KP904730450), T1Q, T1P);
451 T1Z = VADD(T1E, T1F);
452 ST(&(x[WS(rs, 21)]), VFNMSI(T2m, T2e), ms, &(x[WS(rs, 1)]));
453 ST(&(x[WS(rs, 4)]), VFMAI(T2m, T2e), ms, &(x[0]));
454 T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O));
455 T1K = VFNMS(LDK(KP618033988), T1J, T1E);
456 T2s = VFNMS(LDK(KP803003575), T2r, Tf);
457 T23 = VFMA(LDK(KP617882369), T1W, T22);
458 T1S = VFNMS(LDK(KP242145790), T1R, T1O);
459 T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O));
460 T20 = VFNMS(LDK(KP683113946), T1Z, T1I);
461 T1X = VFMA(LDK(KP559016994), T1W, T1V);
462 }
463 }
464 }
465 }
466 }
467 {
468 V T1L, T24, T21, T1Y;
469 T1L = VFNMS(LDK(KP876091699), T1K, T1B);
470 ST(&(x[WS(rs, 9)]), VFMAI(T2y, T2s), ms, &(x[WS(rs, 1)]));
471 ST(&(x[WS(rs, 16)]), VFNMSI(T2y, T2s), ms, &(x[0]));
472 T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S));
473 ST(&(x[WS(rs, 24)]), VFMAI(T26, T25), ms, &(x[0]));
474 ST(&(x[WS(rs, 1)]), VFNMSI(T26, T25), ms, &(x[WS(rs, 1)]));
475 T21 = VFMA(LDK(KP792626838), T20, T1B);
476 T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S));
477 ST(&(x[WS(rs, 11)]), VFNMSI(T24, T21), ms, &(x[WS(rs, 1)]));
478 ST(&(x[WS(rs, 14)]), VFMAI(T24, T21), ms, &(x[0]));
479 ST(&(x[WS(rs, 19)]), VFMAI(T1Y, T1L), ms, &(x[WS(rs, 1)]));
480 ST(&(x[WS(rs, 6)]), VFNMSI(T1Y, T1L), ms, &(x[0]));
481 }
482 }
483 }
484 VLEAVE();
485 }
486
487 static const tw_instr twinstr[] = {
488 VTW(0, 1),
489 VTW(0, 2),
490 VTW(0, 3),
491 VTW(0, 4),
492 VTW(0, 5),
493 VTW(0, 6),
494 VTW(0, 7),
495 VTW(0, 8),
496 VTW(0, 9),
497 VTW(0, 10),
498 VTW(0, 11),
499 VTW(0, 12),
500 VTW(0, 13),
501 VTW(0, 14),
502 VTW(0, 15),
503 VTW(0, 16),
504 VTW(0, 17),
505 VTW(0, 18),
506 VTW(0, 19),
507 VTW(0, 20),
508 VTW(0, 21),
509 VTW(0, 22),
510 VTW(0, 23),
511 VTW(0, 24),
512 {TW_NEXT, VL, 0}
513 };
514
515 static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, {67, 60, 181, 0}, 0, 0, 0 };
516
517 void XSIMD(codelet_t2fv_25) (planner *p) {
518 X(kdft_dit_register) (p, t2fv_25, &desc);
519 }
520 #else /* HAVE_FMA */
521
522 /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t2fv_25 -include t2f.h */
523
524 /*
525 * This function contains 248 FP additions, 188 FP multiplications,
526 * (or, 170 additions, 110 multiplications, 78 fused multiply/add),
527 * 99 stack variables, 40 constants, and 50 memory accesses
528 */
529 #include "t2f.h"
530
531 static void t2fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
532 {
533 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
534 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
535 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
536 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
537 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
538 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
539 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
540 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
541 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
542 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
543 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
544 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
545 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
546 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
547 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
548 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
549 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
550 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
551 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
552 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
553 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
554 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
555 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
556 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
557 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
558 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
559 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
560 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
561 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
562 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
563 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
564 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
565 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
566 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
567 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
568 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
569 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
570 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
571 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
572 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
573 {
574 INT m;
575 R *x;
576 x = ri;
577 for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(25, rs)) {
578 V Tc, Tb, Td, Te, T1C, T2t, T1E, T1x, T2m, T1u, T3c, T2n, Ty, T2i, Tv;
579 V T38, T2j, TS, T2f, TP, T39, T2g, T1d, T2p, T1a, T3b, T2q;
580 {
581 V T7, T9, Ta, T2, T4, T5, T1D;
582 Tc = LD(&(x[0]), ms, &(x[0]));
583 {
584 V T6, T8, T1, T3;
585 T6 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
586 T7 = BYTWJ(&(W[TWVL * 18]), T6);
587 T8 = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
588 T9 = BYTWJ(&(W[TWVL * 28]), T8);
589 Ta = VADD(T7, T9);
590 T1 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
591 T2 = BYTWJ(&(W[TWVL * 8]), T1);
592 T3 = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
593 T4 = BYTWJ(&(W[TWVL * 38]), T3);
594 T5 = VADD(T2, T4);
595 }
596 Tb = VMUL(LDK(KP559016994), VSUB(T5, Ta));
597 Td = VADD(T5, Ta);
598 Te = VFNMS(LDK(KP250000000), Td, Tc);
599 T1C = VSUB(T2, T4);
600 T1D = VSUB(T7, T9);
601 T2t = VMUL(LDK(KP951056516), T1D);
602 T1E = VFMA(LDK(KP951056516), T1C, VMUL(LDK(KP587785252), T1D));
603 }
604 {
605 V T1r, T1l, T1n, T1o, T1g, T1i, T1j, T1q;
606 T1q = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
607 T1r = BYTWJ(&(W[TWVL * 4]), T1q);
608 {
609 V T1k, T1m, T1f, T1h;
610 T1k = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
611 T1l = BYTWJ(&(W[TWVL * 24]), T1k);
612 T1m = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
613 T1n = BYTWJ(&(W[TWVL * 34]), T1m);
614 T1o = VADD(T1l, T1n);
615 T1f = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
616 T1g = BYTWJ(&(W[TWVL * 14]), T1f);
617 T1h = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
618 T1i = BYTWJ(&(W[TWVL * 44]), T1h);
619 T1j = VADD(T1g, T1i);
620 }
621 {
622 V T1v, T1w, T1p, T1s, T1t;
623 T1v = VSUB(T1g, T1i);
624 T1w = VSUB(T1l, T1n);
625 T1x = VFMA(LDK(KP475528258), T1v, VMUL(LDK(KP293892626), T1w));
626 T2m = VFNMS(LDK(KP293892626), T1v, VMUL(LDK(KP475528258), T1w));
627 T1p = VMUL(LDK(KP559016994), VSUB(T1j, T1o));
628 T1s = VADD(T1j, T1o);
629 T1t = VFNMS(LDK(KP250000000), T1s, T1r);
630 T1u = VADD(T1p, T1t);
631 T3c = VADD(T1r, T1s);
632 T2n = VSUB(T1t, T1p);
633 }
634 }
635 {
636 V Ts, Tm, To, Tp, Th, Tj, Tk, Tr;
637 Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
638 Ts = BYTWJ(&(W[0]), Tr);
639 {
640 V Tl, Tn, Tg, Ti;
641 Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
642 Tm = BYTWJ(&(W[TWVL * 20]), Tl);
643 Tn = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
644 To = BYTWJ(&(W[TWVL * 30]), Tn);
645 Tp = VADD(Tm, To);
646 Tg = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
647 Th = BYTWJ(&(W[TWVL * 10]), Tg);
648 Ti = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
649 Tj = BYTWJ(&(W[TWVL * 40]), Ti);
650 Tk = VADD(Th, Tj);
651 }
652 {
653 V Tw, Tx, Tq, Tt, Tu;
654 Tw = VSUB(Th, Tj);
655 Tx = VSUB(Tm, To);
656 Ty = VFMA(LDK(KP475528258), Tw, VMUL(LDK(KP293892626), Tx));
657 T2i = VFNMS(LDK(KP293892626), Tw, VMUL(LDK(KP475528258), Tx));
658 Tq = VMUL(LDK(KP559016994), VSUB(Tk, Tp));
659 Tt = VADD(Tk, Tp);
660 Tu = VFNMS(LDK(KP250000000), Tt, Ts);
661 Tv = VADD(Tq, Tu);
662 T38 = VADD(Ts, Tt);
663 T2j = VSUB(Tu, Tq);
664 }
665 }
666 {
667 V TM, TG, TI, TJ, TB, TD, TE, TL;
668 TL = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
669 TM = BYTWJ(&(W[TWVL * 6]), TL);
670 {
671 V TF, TH, TA, TC;
672 TF = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
673 TG = BYTWJ(&(W[TWVL * 26]), TF);
674 TH = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
675 TI = BYTWJ(&(W[TWVL * 36]), TH);
676 TJ = VADD(TG, TI);
677 TA = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
678 TB = BYTWJ(&(W[TWVL * 16]), TA);
679 TC = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
680 TD = BYTWJ(&(W[TWVL * 46]), TC);
681 TE = VADD(TB, TD);
682 }
683 {
684 V TQ, TR, TK, TN, TO;
685 TQ = VSUB(TB, TD);
686 TR = VSUB(TG, TI);
687 TS = VFMA(LDK(KP475528258), TQ, VMUL(LDK(KP293892626), TR));
688 T2f = VFNMS(LDK(KP293892626), TQ, VMUL(LDK(KP475528258), TR));
689 TK = VMUL(LDK(KP559016994), VSUB(TE, TJ));
690 TN = VADD(TE, TJ);
691 TO = VFNMS(LDK(KP250000000), TN, TM);
692 TP = VADD(TK, TO);
693 T39 = VADD(TM, TN);
694 T2g = VSUB(TO, TK);
695 }
696 }
697 {
698 V T17, T11, T13, T14, TW, TY, TZ, T16;
699 T16 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
700 T17 = BYTWJ(&(W[TWVL * 2]), T16);
701 {
702 V T10, T12, TV, TX;
703 T10 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
704 T11 = BYTWJ(&(W[TWVL * 22]), T10);
705 T12 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
706 T13 = BYTWJ(&(W[TWVL * 32]), T12);
707 T14 = VADD(T11, T13);
708 TV = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
709 TW = BYTWJ(&(W[TWVL * 12]), TV);
710 TX = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
711 TY = BYTWJ(&(W[TWVL * 42]), TX);
712 TZ = VADD(TW, TY);
713 }
714 {
715 V T1b, T1c, T15, T18, T19;
716 T1b = VSUB(TW, TY);
717 T1c = VSUB(T11, T13);
718 T1d = VFMA(LDK(KP475528258), T1b, VMUL(LDK(KP293892626), T1c));
719 T2p = VFNMS(LDK(KP293892626), T1b, VMUL(LDK(KP475528258), T1c));
720 T15 = VMUL(LDK(KP559016994), VSUB(TZ, T14));
721 T18 = VADD(TZ, T14);
722 T19 = VFNMS(LDK(KP250000000), T18, T17);
723 T1a = VADD(T15, T19);
724 T3b = VADD(T17, T18);
725 T2q = VSUB(T19, T15);
726 }
727 }
728 {
729 V T3l, T3m, T3f, T3g, T3e, T3h, T3n, T3i;
730 {
731 V T3j, T3k, T3a, T3d;
732 T3j = VSUB(T38, T39);
733 T3k = VSUB(T3b, T3c);
734 T3l = VBYI(VFMA(LDK(KP951056516), T3j, VMUL(LDK(KP587785252), T3k)));
735 T3m = VBYI(VFNMS(LDK(KP587785252), T3j, VMUL(LDK(KP951056516), T3k)));
736 T3f = VADD(Tc, Td);
737 T3a = VADD(T38, T39);
738 T3d = VADD(T3b, T3c);
739 T3g = VADD(T3a, T3d);
740 T3e = VMUL(LDK(KP559016994), VSUB(T3a, T3d));
741 T3h = VFNMS(LDK(KP250000000), T3g, T3f);
742 }
743 ST(&(x[0]), VADD(T3f, T3g), ms, &(x[0]));
744 T3n = VSUB(T3h, T3e);
745 ST(&(x[WS(rs, 10)]), VADD(T3m, T3n), ms, &(x[0]));
746 ST(&(x[WS(rs, 15)]), VSUB(T3n, T3m), ms, &(x[WS(rs, 1)]));
747 T3i = VADD(T3e, T3h);
748 ST(&(x[WS(rs, 5)]), VSUB(T3i, T3l), ms, &(x[WS(rs, 1)]));
749 ST(&(x[WS(rs, 20)]), VADD(T3l, T3i), ms, &(x[0]));
750 }
751 {
752 V Tf, T1Z, T20, T21, T29, T2a, T2b, T26, T27, T28, T22, T23, T24, T1L, T1U;
753 V T1Q, T1S, T1A, T1V, T1N, T1O, T2d, T2e;
754 Tf = VADD(Tb, Te);
755 T1Z = VFMA(LDK(KP1_688655851), Ty, VMUL(LDK(KP535826794), Tv));
756 T20 = VFMA(LDK(KP1_541026485), TS, VMUL(LDK(KP637423989), TP));
757 T21 = VSUB(T1Z, T20);
758 T29 = VFMA(LDK(KP851558583), T1d, VMUL(LDK(KP904827052), T1a));
759 T2a = VFMA(LDK(KP1_984229402), T1x, VMUL(LDK(KP125333233), T1u));
760 T2b = VADD(T29, T2a);
761 T26 = VFNMS(LDK(KP844327925), Tv, VMUL(LDK(KP1_071653589), Ty));
762 T27 = VFNMS(LDK(KP1_274847979), TS, VMUL(LDK(KP770513242), TP));
763 T28 = VADD(T26, T27);
764 T22 = VFNMS(LDK(KP425779291), T1a, VMUL(LDK(KP1_809654104), T1d));
765 T23 = VFNMS(LDK(KP992114701), T1u, VMUL(LDK(KP250666467), T1x));
766 T24 = VADD(T22, T23);
767 {
768 V T1F, T1G, T1H, T1I, T1J, T1K;
769 T1F = VFMA(LDK(KP1_937166322), Ty, VMUL(LDK(KP248689887), Tv));
770 T1G = VFMA(LDK(KP1_071653589), TS, VMUL(LDK(KP844327925), TP));
771 T1H = VADD(T1F, T1G);
772 T1I = VFMA(LDK(KP1_752613360), T1d, VMUL(LDK(KP481753674), T1a));
773 T1J = VFMA(LDK(KP1_457937254), T1x, VMUL(LDK(KP684547105), T1u));
774 T1K = VADD(T1I, T1J);
775 T1L = VADD(T1H, T1K);
776 T1U = VSUB(T1J, T1I);
777 T1Q = VMUL(LDK(KP559016994), VSUB(T1K, T1H));
778 T1S = VSUB(T1G, T1F);
779 }
780 {
781 V Tz, TT, TU, T1e, T1y, T1z;
782 Tz = VFNMS(LDK(KP497379774), Ty, VMUL(LDK(KP968583161), Tv));
783 TT = VFNMS(LDK(KP1_688655851), TS, VMUL(LDK(KP535826794), TP));
784 TU = VADD(Tz, TT);
785 T1e = VFNMS(LDK(KP963507348), T1d, VMUL(LDK(KP876306680), T1a));
786 T1y = VFNMS(LDK(KP1_369094211), T1x, VMUL(LDK(KP728968627), T1u));
787 T1z = VADD(T1e, T1y);
788 T1A = VADD(TU, T1z);
789 T1V = VMUL(LDK(KP559016994), VSUB(TU, T1z));
790 T1N = VSUB(TT, Tz);
791 T1O = VSUB(T1e, T1y);
792 }
793 {
794 V T1B, T1M, T25, T2c;
795 T1B = VADD(Tf, T1A);
796 T1M = VBYI(VADD(T1E, T1L));
797 ST(&(x[WS(rs, 1)]), VSUB(T1B, T1M), ms, &(x[WS(rs, 1)]));
798 ST(&(x[WS(rs, 24)]), VADD(T1B, T1M), ms, &(x[0]));
799 T25 = VADD(Tf, VADD(T21, T24));
800 T2c = VBYI(VADD(T1E, VSUB(T28, T2b)));
801 ST(&(x[WS(rs, 21)]), VSUB(T25, T2c), ms, &(x[WS(rs, 1)]));
802 ST(&(x[WS(rs, 4)]), VADD(T25, T2c), ms, &(x[0]));
803 }
804 T2d = VBYI(VADD(T1E, VFMA(LDK(KP309016994), T28, VFMA(LDK(KP587785252), VSUB(T23, T22), VFNMS(LDK(KP951056516), VADD(T1Z, T20), VMUL(LDK(KP809016994), T2b))))));
805 T2e = VFMA(LDK(KP309016994), T21, VFMA(LDK(KP951056516), VSUB(T26, T27), VFMA(LDK(KP587785252), VSUB(T2a, T29), VFNMS(LDK(KP809016994), T24, Tf))));
806 ST(&(x[WS(rs, 9)]), VADD(T2d, T2e), ms, &(x[WS(rs, 1)]));
807 ST(&(x[WS(rs, 16)]), VSUB(T2e, T2d), ms, &(x[0]));
808 {
809 V T1R, T1X, T1W, T1Y, T1P, T1T;
810 T1P = VFMS(LDK(KP250000000), T1L, T1E);
811 T1R = VBYI(VADD(VFMA(LDK(KP587785252), T1N, VMUL(LDK(KP951056516), T1O)), VSUB(T1P, T1Q)));
812 T1X = VBYI(VADD(VFNMS(LDK(KP587785252), T1O, VMUL(LDK(KP951056516), T1N)), VADD(T1P, T1Q)));
813 T1T = VFNMS(LDK(KP250000000), T1A, Tf);
814 T1W = VFMA(LDK(KP587785252), T1S, VFNMS(LDK(KP951056516), T1U, VSUB(T1T, T1V)));
815 T1Y = VFMA(LDK(KP951056516), T1S, VADD(T1V, VFMA(LDK(KP587785252), T1U, T1T)));
816 ST(&(x[WS(rs, 11)]), VADD(T1R, T1W), ms, &(x[WS(rs, 1)]));
817 ST(&(x[WS(rs, 19)]), VSUB(T1Y, T1X), ms, &(x[WS(rs, 1)]));
818 ST(&(x[WS(rs, 14)]), VSUB(T1W, T1R), ms, &(x[0]));
819 ST(&(x[WS(rs, 6)]), VADD(T1X, T1Y), ms, &(x[0]));
820 }
821 }
822 {
823 V T2u, T2w, T2h, T2k, T2l, T2A, T2B, T2C, T2o, T2r, T2s, T2x, T2y, T2z, T2M;
824 V T2X, T2N, T2W, T2R, T31, T2U, T30, T2E, T2F;
825 T2u = VFNMS(LDK(KP587785252), T1C, T2t);
826 T2w = VSUB(Te, Tb);
827 T2h = VFNMS(LDK(KP125333233), T2g, VMUL(LDK(KP1_984229402), T2f));
828 T2k = VFMA(LDK(KP1_457937254), T2i, VMUL(LDK(KP684547105), T2j));
829 T2l = VSUB(T2h, T2k);
830 T2A = VFNMS(LDK(KP1_996053456), T2p, VMUL(LDK(KP062790519), T2q));
831 T2B = VFMA(LDK(KP1_541026485), T2m, VMUL(LDK(KP637423989), T2n));
832 T2C = VSUB(T2A, T2B);
833 T2o = VFNMS(LDK(KP770513242), T2n, VMUL(LDK(KP1_274847979), T2m));
834 T2r = VFMA(LDK(KP125581039), T2p, VMUL(LDK(KP998026728), T2q));
835 T2s = VSUB(T2o, T2r);
836 T2x = VFNMS(LDK(KP1_369094211), T2i, VMUL(LDK(KP728968627), T2j));
837 T2y = VFMA(LDK(KP250666467), T2f, VMUL(LDK(KP992114701), T2g));
838 T2z = VSUB(T2x, T2y);
839 {
840 V T2G, T2H, T2I, T2J, T2K, T2L;
841 T2G = VFNMS(LDK(KP481753674), T2j, VMUL(LDK(KP1_752613360), T2i));
842 T2H = VFMA(LDK(KP851558583), T2f, VMUL(LDK(KP904827052), T2g));
843 T2I = VSUB(T2G, T2H);
844 T2J = VFNMS(LDK(KP844327925), T2q, VMUL(LDK(KP1_071653589), T2p));
845 T2K = VFNMS(LDK(KP998026728), T2n, VMUL(LDK(KP125581039), T2m));
846 T2L = VADD(T2J, T2K);
847 T2M = VMUL(LDK(KP559016994), VSUB(T2I, T2L));
848 T2X = VSUB(T2J, T2K);
849 T2N = VADD(T2I, T2L);
850 T2W = VADD(T2G, T2H);
851 }
852 {
853 V T2P, T2Q, T2Y, T2S, T2T, T2Z;
854 T2P = VFNMS(LDK(KP425779291), T2g, VMUL(LDK(KP1_809654104), T2f));
855 T2Q = VFMA(LDK(KP963507348), T2i, VMUL(LDK(KP876306680), T2j));
856 T2Y = VADD(T2Q, T2P);
857 T2S = VFMA(LDK(KP1_688655851), T2p, VMUL(LDK(KP535826794), T2q));
858 T2T = VFMA(LDK(KP1_996053456), T2m, VMUL(LDK(KP062790519), T2n));
859 T2Z = VADD(T2S, T2T);
860 T2R = VSUB(T2P, T2Q);
861 T31 = VADD(T2Y, T2Z);
862 T2U = VSUB(T2S, T2T);
863 T30 = VMUL(LDK(KP559016994), VSUB(T2Y, T2Z));
864 }
865 {
866 V T36, T37, T2v, T2D;
867 T36 = VBYI(VADD(T2u, T2N));
868 T37 = VADD(T2w, T31);
869 ST(&(x[WS(rs, 2)]), VADD(T36, T37), ms, &(x[0]));
870 ST(&(x[WS(rs, 23)]), VSUB(T37, T36), ms, &(x[WS(rs, 1)]));
871 T2v = VBYI(VSUB(VADD(T2l, T2s), T2u));
872 T2D = VADD(T2w, VADD(T2z, T2C));
873 ST(&(x[WS(rs, 3)]), VADD(T2v, T2D), ms, &(x[WS(rs, 1)]));
874 ST(&(x[WS(rs, 22)]), VSUB(T2D, T2v), ms, &(x[0]));
875 }
876 T2E = VFMA(LDK(KP309016994), T2z, VFNMS(LDK(KP809016994), T2C, VFNMS(LDK(KP587785252), VADD(T2r, T2o), VFNMS(LDK(KP951056516), VADD(T2k, T2h), T2w))));
877 T2F = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2A, T2B), VFNMS(LDK(KP809016994), T2s, VFNMS(LDK(KP951056516), VADD(T2x, T2y), VMUL(LDK(KP309016994), T2l)))), T2u));
878 ST(&(x[WS(rs, 17)]), VSUB(T2E, T2F), ms, &(x[WS(rs, 1)]));
879 ST(&(x[WS(rs, 8)]), VADD(T2E, T2F), ms, &(x[0]));
880 {
881 V T2V, T34, T33, T35, T2O, T32;
882 T2O = VFNMS(LDK(KP250000000), T2N, T2u);
883 T2V = VBYI(VADD(T2M, VADD(T2O, VFNMS(LDK(KP587785252), T2U, VMUL(LDK(KP951056516), T2R)))));
884 T34 = VBYI(VADD(T2O, VSUB(VFMA(LDK(KP587785252), T2R, VMUL(LDK(KP951056516), T2U)), T2M)));
885 T32 = VFNMS(LDK(KP250000000), T31, T2w);
886 T33 = VFMA(LDK(KP951056516), T2W, VFMA(LDK(KP587785252), T2X, VADD(T30, T32)));
887 T35 = VFMA(LDK(KP587785252), T2W, VSUB(VFNMS(LDK(KP951056516), T2X, T32), T30));
888 ST(&(x[WS(rs, 7)]), VADD(T2V, T33), ms, &(x[WS(rs, 1)]));
889 ST(&(x[WS(rs, 13)]), VSUB(T35, T34), ms, &(x[WS(rs, 1)]));
890 ST(&(x[WS(rs, 18)]), VSUB(T33, T2V), ms, &(x[0]));
891 ST(&(x[WS(rs, 12)]), VADD(T34, T35), ms, &(x[0]));
892 }
893 }
894 }
895 }
896 VLEAVE();
897 }
898
899 static const tw_instr twinstr[] = {
900 VTW(0, 1),
901 VTW(0, 2),
902 VTW(0, 3),
903 VTW(0, 4),
904 VTW(0, 5),
905 VTW(0, 6),
906 VTW(0, 7),
907 VTW(0, 8),
908 VTW(0, 9),
909 VTW(0, 10),
910 VTW(0, 11),
911 VTW(0, 12),
912 VTW(0, 13),
913 VTW(0, 14),
914 VTW(0, 15),
915 VTW(0, 16),
916 VTW(0, 17),
917 VTW(0, 18),
918 VTW(0, 19),
919 VTW(0, 20),
920 VTW(0, 21),
921 VTW(0, 22),
922 VTW(0, 23),
923 VTW(0, 24),
924 {TW_NEXT, VL, 0}
925 };
926
927 static const ct_desc desc = { 25, XSIMD_STRING("t2fv_25"), twinstr, &GENUS, {170, 110, 78, 0}, 0, 0, 0 };
928
929 void XSIMD(codelet_t2fv_25) (planner *p) {
930 X(kdft_dit_register) (p, t2fv_25, &desc);
931 }
932 #endif /* HAVE_FMA */