annotate src/fftw-3.3.3/dft/simd/common/t3fv_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:38:56 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3fv_25 -include t3f.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 268 FP additions, 281 FP multiplications,
Chris@10 32 * (or, 87 additions, 100 multiplications, 181 fused multiply/add),
Chris@10 33 * 223 stack variables, 67 constants, and 50 memory accesses
Chris@10 34 */
Chris@10 35 #include "t3f.h"
Chris@10 36
Chris@10 37 static void t3fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@10 38 {
Chris@10 39 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
Chris@10 40 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
Chris@10 41 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
Chris@10 42 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@10 43 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@10 44 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 45 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
Chris@10 46 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
Chris@10 47 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@10 48 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
Chris@10 49 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
Chris@10 50 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
Chris@10 51 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
Chris@10 52 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@10 53 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@10 54 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
Chris@10 55 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@10 56 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
Chris@10 57 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@10 58 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
Chris@10 59 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@10 60 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@10 61 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@10 62 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
Chris@10 63 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
Chris@10 64 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@10 65 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
Chris@10 66 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
Chris@10 67 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
Chris@10 68 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
Chris@10 69 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 70 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
Chris@10 71 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 72 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
Chris@10 73 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
Chris@10 74 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
Chris@10 75 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
Chris@10 76 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
Chris@10 77 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
Chris@10 78 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
Chris@10 79 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@10 80 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@10 81 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@10 82 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 83 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@10 84 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
Chris@10 85 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
Chris@10 86 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@10 87 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
Chris@10 88 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
Chris@10 89 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
Chris@10 90 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
Chris@10 91 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
Chris@10 92 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
Chris@10 93 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
Chris@10 94 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
Chris@10 95 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
Chris@10 96 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
Chris@10 97 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
Chris@10 98 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
Chris@10 99 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
Chris@10 100 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
Chris@10 101 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
Chris@10 102 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
Chris@10 103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 104 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 105 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@10 106 {
Chris@10 107 INT m;
Chris@10 108 R *x;
Chris@10 109 x = ri;
Chris@10 110 for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) {
Chris@10 111 V T2t, T1Z, T2W, T28, T2Q, T2r, T2g, T2u, T2o, T2l;
Chris@10 112 {
Chris@10 113 V T2, T5, T3, T9;
Chris@10 114 T2 = LDW(&(W[0]));
Chris@10 115 T5 = LDW(&(W[TWVL * 4]));
Chris@10 116 T3 = LDW(&(W[TWVL * 2]));
Chris@10 117 T9 = LDW(&(W[TWVL * 6]));
Chris@10 118 {
Chris@10 119 V T2c, T3l, Tn, T49, Tm, T4e, TN, T32, T1d, T3a, T3f, T3z, T3H, T25, T1W;
Chris@10 120 V T2v, T2D, T4a, T1g, T18, T2Z, T11, T31, TK, T1q, T1j, T1n, T4b, T17;
Chris@10 121 {
Chris@10 122 V T1, T1l, Tr, T4, Ty, T1E, Tu, TX, TD, T1h, Tz, T1e, T1I, T1o, TU;
Chris@10 123 V Tk, T2b, T1B, T1D, T1N, T1F, Td, T2a, T1J;
Chris@10 124 {
Chris@10 125 V T7, Tb, TC, Tg, T1L, Ta, T6, Tj, T1A;
Chris@10 126 T1 = LD(&(x[0]), ms, &(x[0]));
Chris@10 127 {
Chris@10 128 V Tf, Ti, Te, Th;
Chris@10 129 Tf = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Chris@10 130 Ti = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
Chris@10 131 T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Chris@10 132 Tb = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
Chris@10 133 Te = VZMUL(T2, T5);
Chris@10 134 TC = VZMULJ(T2, T5);
Chris@10 135 T1l = VZMUL(T3, T5);
Chris@10 136 Tr = VZMULJ(T3, T5);
Chris@10 137 T4 = VZMUL(T2, T3);
Chris@10 138 Ty = VZMULJ(T2, T3);
Chris@10 139 T1E = VZMULJ(T2, T9);
Chris@10 140 Th = VZMULJ(T5, T9);
Chris@10 141 Tu = VZMULJ(T3, T9);
Chris@10 142 Tg = VZMULJ(Te, Tf);
Chris@10 143 TX = VZMULJ(Te, T9);
Chris@10 144 TD = VZMULJ(TC, T9);
Chris@10 145 T1h = VZMULJ(Ty, T9);
Chris@10 146 Tz = VZMUL(Ty, T5);
Chris@10 147 T1e = VZMULJ(Ty, T5);
Chris@10 148 T1L = VZMULJ(Tr, T9);
Chris@10 149 Ta = VZMULJ(T4, T9);
Chris@10 150 T1I = VZMUL(T4, T5);
Chris@10 151 T6 = VZMULJ(T4, T5);
Chris@10 152 Tj = VZMULJ(Th, Ti);
Chris@10 153 }
Chris@10 154 T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Chris@10 155 T1o = VZMULJ(T1e, T9);
Chris@10 156 {
Chris@10 157 V Tc, T8, T1C, T1M;
Chris@10 158 T1C = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Chris@10 159 T1M = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
Chris@10 160 Tc = VZMULJ(Ta, Tb);
Chris@10 161 T8 = VZMULJ(T6, T7);
Chris@10 162 TU = VZMULJ(T6, T9);
Chris@10 163 Tk = VADD(Tg, Tj);
Chris@10 164 T2b = VSUB(Tg, Tj);
Chris@10 165 T1B = VZMULJ(T3, T1A);
Chris@10 166 T1D = VZMULJ(TC, T1C);
Chris@10 167 T1N = VZMULJ(T1L, T1M);
Chris@10 168 T1F = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
Chris@10 169 Td = VADD(T8, Tc);
Chris@10 170 T2a = VSUB(T8, Tc);
Chris@10 171 T1J = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
Chris@10 172 }
Chris@10 173 }
Chris@10 174 {
Chris@10 175 V Tq, Tt, TF, T1T, T1H, Tw, T1U, T1O, TA, Tp, Ts, TE;
Chris@10 176 Tp = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Chris@10 177 Ts = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Chris@10 178 TE = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
Chris@10 179 {
Chris@10 180 V T1K, Tv, T1G, Tl;
Chris@10 181 Tv = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
Chris@10 182 T1G = VZMULJ(T1E, T1F);
Chris@10 183 T2c = VFMA(LDK(KP618033988), T2b, T2a);
Chris@10 184 T3l = VFNMS(LDK(KP618033988), T2a, T2b);
Chris@10 185 Tn = VSUB(Td, Tk);
Chris@10 186 Tl = VADD(Td, Tk);
Chris@10 187 T1K = VZMULJ(T1I, T1J);
Chris@10 188 Tq = VZMULJ(T2, Tp);
Chris@10 189 Tt = VZMULJ(Tr, Ts);
Chris@10 190 TF = VZMULJ(TD, TE);
Chris@10 191 T1T = VSUB(T1D, T1G);
Chris@10 192 T1H = VADD(T1D, T1G);
Chris@10 193 T49 = VADD(T1, Tl);
Chris@10 194 Tm = VFNMS(LDK(KP250000000), Tl, T1);
Chris@10 195 Tw = VZMULJ(Tu, Tv);
Chris@10 196 T1U = VSUB(T1K, T1N);
Chris@10 197 T1O = VADD(T1K, T1N);
Chris@10 198 TA = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Chris@10 199 }
Chris@10 200 {
Chris@10 201 V Tx, TL, T1R, T38, T1V, T13, TQ, TZ, TS, T1Q, TV, TG, TM, T12, T1c;
Chris@10 202 V T16;
Chris@10 203 T12 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Chris@10 204 {
Chris@10 205 V TP, TY, T1P, TB, TR;
Chris@10 206 TP = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
Chris@10 207 TY = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Chris@10 208 TR = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Chris@10 209 Tx = VADD(Tt, Tw);
Chris@10 210 TL = VSUB(Tt, Tw);
Chris@10 211 T1R = VSUB(T1O, T1H);
Chris@10 212 T1P = VADD(T1H, T1O);
Chris@10 213 T38 = VFNMS(LDK(KP618033988), T1T, T1U);
Chris@10 214 T1V = VFMA(LDK(KP618033988), T1U, T1T);
Chris@10 215 TB = VZMULJ(Tz, TA);
Chris@10 216 T13 = VZMULJ(T4, T12);
Chris@10 217 TQ = VZMULJ(T9, TP);
Chris@10 218 TZ = VZMULJ(TX, TY);
Chris@10 219 TS = VZMULJ(T5, TR);
Chris@10 220 T4e = VADD(T1B, T1P);
Chris@10 221 T1Q = VFNMS(LDK(KP250000000), T1P, T1B);
Chris@10 222 TV = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
Chris@10 223 TG = VADD(TB, TF);
Chris@10 224 TM = VSUB(TF, TB);
Chris@10 225 }
Chris@10 226 T1c = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Chris@10 227 {
Chris@10 228 V T14, TT, TJ, T15, T10, TI, T1p, T1f, T1i, T1m;
Chris@10 229 T1f = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Chris@10 230 T14 = VADD(TS, TQ);
Chris@10 231 TT = VSUB(TQ, TS);
Chris@10 232 {
Chris@10 233 V T39, T1S, TW, TH;
Chris@10 234 T39 = VFMA(LDK(KP559016994), T1R, T1Q);
Chris@10 235 T1S = VFNMS(LDK(KP559016994), T1R, T1Q);
Chris@10 236 TW = VZMULJ(TU, TV);
Chris@10 237 TH = VADD(Tx, TG);
Chris@10 238 TJ = VSUB(Tx, TG);
Chris@10 239 TN = VFNMS(LDK(KP618033988), TM, TL);
Chris@10 240 T32 = VFMA(LDK(KP618033988), TL, TM);
Chris@10 241 T1d = VZMULJ(Ty, T1c);
Chris@10 242 T3a = VFMA(LDK(KP869845200), T39, T38);
Chris@10 243 T3f = VFNMS(LDK(KP786782374), T38, T39);
Chris@10 244 T3z = VFMA(LDK(KP066152395), T39, T38);
Chris@10 245 T3H = VFNMS(LDK(KP059835404), T38, T39);
Chris@10 246 T25 = VFMA(LDK(KP987388751), T1S, T1V);
Chris@10 247 T1W = VFNMS(LDK(KP893101515), T1V, T1S);
Chris@10 248 T2v = VFNMS(LDK(KP120146378), T1V, T1S);
Chris@10 249 T2D = VFMA(LDK(KP132830569), T1S, T1V);
Chris@10 250 T15 = VADD(TZ, TW);
Chris@10 251 T10 = VSUB(TW, TZ);
Chris@10 252 TI = VFNMS(LDK(KP250000000), TH, Tq);
Chris@10 253 T4a = VADD(Tq, TH);
Chris@10 254 T1g = VZMULJ(T1e, T1f);
Chris@10 255 }
Chris@10 256 T1p = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
Chris@10 257 T1i = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
Chris@10 258 T1m = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Chris@10 259 T18 = VSUB(T14, T15);
Chris@10 260 T16 = VADD(T14, T15);
Chris@10 261 T2Z = VFNMS(LDK(KP618033988), TT, T10);
Chris@10 262 T11 = VFMA(LDK(KP618033988), T10, TT);
Chris@10 263 T31 = VFNMS(LDK(KP559016994), TJ, TI);
Chris@10 264 TK = VFMA(LDK(KP559016994), TJ, TI);
Chris@10 265 T1q = VZMULJ(T1o, T1p);
Chris@10 266 T1j = VZMULJ(T1h, T1i);
Chris@10 267 T1n = VZMULJ(T1l, T1m);
Chris@10 268 }
Chris@10 269 T4b = VADD(T13, T16);
Chris@10 270 T17 = VFMS(LDK(KP250000000), T16, T13);
Chris@10 271 }
Chris@10 272 }
Chris@10 273 }
Chris@10 274 {
Chris@10 275 V T33, T3i, T3C, T3L, T20, TO, T2y, T2G, T1k, T1w, T1r, T1x, T2Y, T19, T4k;
Chris@10 276 V T4c;
Chris@10 277 T33 = VFMA(LDK(KP893101515), T32, T31);
Chris@10 278 T3i = VFNMS(LDK(KP987388751), T31, T32);
Chris@10 279 T3C = VFNMS(LDK(KP522847744), T32, T31);
Chris@10 280 T3L = VFMA(LDK(KP578046249), T31, T32);
Chris@10 281 T20 = VFMA(LDK(KP269969613), TK, TN);
Chris@10 282 TO = VFNMS(LDK(KP244189809), TN, TK);
Chris@10 283 T2y = VFMA(LDK(KP667278218), TK, TN);
Chris@10 284 T2G = VFNMS(LDK(KP603558818), TN, TK);
Chris@10 285 T1k = VADD(T1g, T1j);
Chris@10 286 T1w = VSUB(T1g, T1j);
Chris@10 287 T1r = VADD(T1n, T1q);
Chris@10 288 T1x = VSUB(T1q, T1n);
Chris@10 289 T2Y = VFMA(LDK(KP559016994), T18, T17);
Chris@10 290 T19 = VFNMS(LDK(KP559016994), T18, T17);
Chris@10 291 T4k = VSUB(T4a, T4b);
Chris@10 292 T4c = VADD(T4a, T4b);
Chris@10 293 {
Chris@10 294 V T2X, To, T35, T1y, T2H, T2z, T1a, T21, T3t, T34, T3n, T3j, T3E, T3Y, T3M;
Chris@10 295 V T3R, T1v, T36, T4l, T4f, T1u, T1s;
Chris@10 296 T2X = VFNMS(LDK(KP559016994), Tn, Tm);
Chris@10 297 To = VFMA(LDK(KP559016994), Tn, Tm);
Chris@10 298 T1u = VSUB(T1r, T1k);
Chris@10 299 T1s = VADD(T1k, T1r);
Chris@10 300 T35 = VFMA(LDK(KP618033988), T1w, T1x);
Chris@10 301 T1y = VFNMS(LDK(KP618033988), T1x, T1w);
Chris@10 302 {
Chris@10 303 V T3K, T30, T3h, T3D, T4d, T1t;
Chris@10 304 T3K = VFMA(LDK(KP447533225), T2Z, T2Y);
Chris@10 305 T30 = VFMA(LDK(KP120146378), T2Z, T2Y);
Chris@10 306 T3h = VFNMS(LDK(KP132830569), T2Y, T2Z);
Chris@10 307 T3D = VFNMS(LDK(KP494780565), T2Y, T2Z);
Chris@10 308 T2H = VFNMS(LDK(KP786782374), T11, T19);
Chris@10 309 T2z = VFMA(LDK(KP869845200), T19, T11);
Chris@10 310 T1a = VFNMS(LDK(KP667278218), T19, T11);
Chris@10 311 T21 = VFMA(LDK(KP603558818), T11, T19);
Chris@10 312 T4d = VADD(T1d, T1s);
Chris@10 313 T1t = VFNMS(LDK(KP250000000), T1s, T1d);
Chris@10 314 T3t = VFNMS(LDK(KP734762448), T33, T30);
Chris@10 315 T34 = VFMA(LDK(KP734762448), T33, T30);
Chris@10 316 T3n = VFMA(LDK(KP734762448), T3i, T3h);
Chris@10 317 T3j = VFNMS(LDK(KP734762448), T3i, T3h);
Chris@10 318 T3E = VFNMS(LDK(KP982009705), T3D, T3C);
Chris@10 319 T3Y = VFMA(LDK(KP982009705), T3D, T3C);
Chris@10 320 T3M = VFNMS(LDK(KP921078979), T3L, T3K);
Chris@10 321 T3R = VFMA(LDK(KP921078979), T3L, T3K);
Chris@10 322 T1v = VFNMS(LDK(KP559016994), T1u, T1t);
Chris@10 323 T36 = VFMA(LDK(KP559016994), T1u, T1t);
Chris@10 324 T4l = VSUB(T4d, T4e);
Chris@10 325 T4f = VADD(T4d, T4e);
Chris@10 326 }
Chris@10 327 {
Chris@10 328 V T2L, T2R, T2j, T2q, T2J, T2B, T2e, T26, T2U, T1Y, T23, T2O;
Chris@10 329 {
Chris@10 330 V T2I, T24, T2w, T2E, T48, T42, T3y, T3s, T3V, T45, T2A, T1b, T2h, T2i, T1X;
Chris@10 331 T2L = VFNMS(LDK(KP912575812), T2H, T2G);
Chris@10 332 T2I = VFMA(LDK(KP912575812), T2H, T2G);
Chris@10 333 {
Chris@10 334 V T3A, T3e, T37, T3I, T1z;
Chris@10 335 T3A = VFNMS(LDK(KP667278218), T36, T35);
Chris@10 336 T3e = VFNMS(LDK(KP059835404), T35, T36);
Chris@10 337 T37 = VFMA(LDK(KP066152395), T36, T35);
Chris@10 338 T3I = VFMA(LDK(KP603558818), T35, T36);
Chris@10 339 T24 = VFMA(LDK(KP578046249), T1v, T1y);
Chris@10 340 T1z = VFNMS(LDK(KP522847744), T1y, T1v);
Chris@10 341 T2w = VFNMS(LDK(KP494780565), T1v, T1y);
Chris@10 342 T2E = VFMA(LDK(KP447533225), T1y, T1v);
Chris@10 343 {
Chris@10 344 V T4i, T4g, T4o, T4m;
Chris@10 345 T4i = VSUB(T4c, T4f);
Chris@10 346 T4g = VADD(T4c, T4f);
Chris@10 347 T4o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T4k, T4l));
Chris@10 348 T4m = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T4l, T4k));
Chris@10 349 {
Chris@10 350 V T3Q, T3J, T3b, T3u;
Chris@10 351 T3Q = VFNMS(LDK(KP845997307), T3I, T3H);
Chris@10 352 T3J = VFMA(LDK(KP845997307), T3I, T3H);
Chris@10 353 T3b = VFNMS(LDK(KP772036680), T3a, T37);
Chris@10 354 T3u = VFMA(LDK(KP772036680), T3a, T37);
Chris@10 355 {
Chris@10 356 V T3o, T3g, T3B, T3X, T4h;
Chris@10 357 T3o = VFNMS(LDK(KP772036680), T3f, T3e);
Chris@10 358 T3g = VFMA(LDK(KP772036680), T3f, T3e);
Chris@10 359 T3B = VFNMS(LDK(KP845997307), T3A, T3z);
Chris@10 360 T3X = VFMA(LDK(KP845997307), T3A, T3z);
Chris@10 361 ST(&(x[0]), VADD(T4g, T49), ms, &(x[0]));
Chris@10 362 T4h = VFNMS(LDK(KP250000000), T4g, T49);
Chris@10 363 {
Chris@10 364 V T40, T3N, T3c, T3v;
Chris@10 365 T40 = VFMA(LDK(KP906616052), T3M, T3J);
Chris@10 366 T3N = VFNMS(LDK(KP906616052), T3M, T3J);
Chris@10 367 T3c = VFMA(LDK(KP956723877), T3b, T34);
Chris@10 368 T3v = VFMA(LDK(KP522616830), T3j, T3u);
Chris@10 369 {
Chris@10 370 V T3p, T3k, T3S, T3F;
Chris@10 371 T3p = VFNMS(LDK(KP522616830), T34, T3o);
Chris@10 372 T3k = VFMA(LDK(KP945422727), T3j, T3g);
Chris@10 373 T3S = VFNMS(LDK(KP923225144), T3E, T3B);
Chris@10 374 T3F = VFMA(LDK(KP923225144), T3E, T3B);
Chris@10 375 {
Chris@10 376 V T46, T3Z, T4j, T4n;
Chris@10 377 T46 = VFNMS(LDK(KP669429328), T3X, T3Y);
Chris@10 378 T3Z = VFMA(LDK(KP570584518), T3Y, T3X);
Chris@10 379 T4j = VFMA(LDK(KP559016994), T4i, T4h);
Chris@10 380 T4n = VFNMS(LDK(KP559016994), T4i, T4h);
Chris@10 381 {
Chris@10 382 V T3W, T3O, T3d, T3w;
Chris@10 383 T3W = VFMA(LDK(KP262346850), T3N, T3l);
Chris@10 384 T3O = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T3l, T3N));
Chris@10 385 T3d = VFMA(LDK(KP992114701), T3c, T2X);
Chris@10 386 T3w = VFNMS(LDK(KP690983005), T3v, T3g);
Chris@10 387 {
Chris@10 388 V T3q, T3m, T3T, T43;
Chris@10 389 T3q = VFMA(LDK(KP763932022), T3p, T3b);
Chris@10 390 T3m = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T3l, T3k));
Chris@10 391 T3T = VFNMS(LDK(KP997675361), T3S, T3R);
Chris@10 392 T43 = VFNMS(LDK(KP904508497), T3S, T3Q);
Chris@10 393 {
Chris@10 394 V T3G, T3P, T47, T41;
Chris@10 395 T3G = VFMA(LDK(KP949179823), T3F, T2X);
Chris@10 396 T3P = VFNMS(LDK(KP237294955), T3F, T2X);
Chris@10 397 T47 = VFNMS(LDK(KP669429328), T40, T46);
Chris@10 398 T41 = VFMA(LDK(KP618033988), T40, T3Z);
Chris@10 399 ST(&(x[WS(rs, 20)]), VFMAI(T4m, T4j), ms, &(x[0]));
Chris@10 400 ST(&(x[WS(rs, 5)]), VFNMSI(T4m, T4j), ms, &(x[WS(rs, 1)]));
Chris@10 401 ST(&(x[WS(rs, 15)]), VFNMSI(T4o, T4n), ms, &(x[WS(rs, 1)]));
Chris@10 402 ST(&(x[WS(rs, 10)]), VFMAI(T4o, T4n), ms, &(x[0]));
Chris@10 403 {
Chris@10 404 V T3x, T3r, T3U, T44;
Chris@10 405 T3x = VFMA(LDK(KP855719849), T3w, T3t);
Chris@10 406 T3r = VFNMS(LDK(KP855719849), T3q, T3n);
Chris@10 407 ST(&(x[WS(rs, 22)]), VFMAI(T3m, T3d), ms, &(x[0]));
Chris@10 408 ST(&(x[WS(rs, 3)]), VFNMSI(T3m, T3d), ms, &(x[WS(rs, 1)]));
Chris@10 409 T3U = VFMA(LDK(KP560319534), T3T, T3Q);
Chris@10 410 T44 = VFNMS(LDK(KP681693190), T43, T3R);
Chris@10 411 ST(&(x[WS(rs, 23)]), VFMAI(T3O, T3G), ms, &(x[WS(rs, 1)]));
Chris@10 412 ST(&(x[WS(rs, 2)]), VFNMSI(T3O, T3G), ms, &(x[0]));
Chris@10 413 T48 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T47, T3W));
Chris@10 414 T42 = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T41, T3W));
Chris@10 415 T3y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T3x, T3l));
Chris@10 416 T3s = VFMA(LDK(KP897376177), T3r, T2X);
Chris@10 417 T3V = VFNMS(LDK(KP949179823), T3U, T3P);
Chris@10 418 T45 = VFNMS(LDK(KP860541664), T44, T3P);
Chris@10 419 T2R = VFNMS(LDK(KP912575812), T2z, T2y);
Chris@10 420 T2A = VFMA(LDK(KP912575812), T2z, T2y);
Chris@10 421 T1b = VFMA(LDK(KP829049696), T1a, TO);
Chris@10 422 T2h = VFNMS(LDK(KP829049696), T1a, TO);
Chris@10 423 T2i = VFNMS(LDK(KP831864738), T1W, T1z);
Chris@10 424 T1X = VFMA(LDK(KP831864738), T1W, T1z);
Chris@10 425 }
Chris@10 426 }
Chris@10 427 }
Chris@10 428 }
Chris@10 429 }
Chris@10 430 }
Chris@10 431 }
Chris@10 432 }
Chris@10 433 }
Chris@10 434 }
Chris@10 435 }
Chris@10 436 {
Chris@10 437 V T2M, T2F, T2x, T2S, T2T, T2N;
Chris@10 438 T2M = VFNMS(LDK(KP958953096), T2E, T2D);
Chris@10 439 T2F = VFMA(LDK(KP958953096), T2E, T2D);
Chris@10 440 ST(&(x[WS(rs, 17)]), VFMAI(T3y, T3s), ms, &(x[WS(rs, 1)]));
Chris@10 441 ST(&(x[WS(rs, 8)]), VFNMSI(T3y, T3s), ms, &(x[0]));
Chris@10 442 ST(&(x[WS(rs, 12)]), VFMAI(T42, T3V), ms, &(x[0]));
Chris@10 443 ST(&(x[WS(rs, 13)]), VFNMSI(T42, T3V), ms, &(x[WS(rs, 1)]));
Chris@10 444 ST(&(x[WS(rs, 18)]), VFNMSI(T48, T45), ms, &(x[0]));
Chris@10 445 ST(&(x[WS(rs, 7)]), VFMAI(T48, T45), ms, &(x[WS(rs, 1)]));
Chris@10 446 T2j = VFMA(LDK(KP559154169), T2i, T2h);
Chris@10 447 T2q = VFNMS(LDK(KP683113946), T2h, T2i);
Chris@10 448 T2x = VFNMS(LDK(KP867381224), T2w, T2v);
Chris@10 449 T2S = VFMA(LDK(KP867381224), T2w, T2v);
Chris@10 450 T2J = VFMA(LDK(KP894834959), T2I, T2F);
Chris@10 451 T2T = VFMA(LDK(KP447417479), T2I, T2S);
Chris@10 452 T2B = VFNMS(LDK(KP809385824), T2A, T2x);
Chris@10 453 T2N = VFMA(LDK(KP447417479), T2A, T2M);
Chris@10 454 T2e = VFMA(LDK(KP831864738), T25, T24);
Chris@10 455 T26 = VFNMS(LDK(KP831864738), T25, T24);
Chris@10 456 T2U = VFNMS(LDK(KP763932022), T2T, T2F);
Chris@10 457 T1Y = VFMA(LDK(KP904730450), T1X, T1b);
Chris@10 458 T23 = VFNMS(LDK(KP904730450), T1X, T1b);
Chris@10 459 T2O = VFMA(LDK(KP690983005), T2N, T2x);
Chris@10 460 }
Chris@10 461 }
Chris@10 462 {
Chris@10 463 V T2C, T22, T2d, T2K;
Chris@10 464 T2C = VFNMS(LDK(KP992114701), T2B, To);
Chris@10 465 T22 = VFMA(LDK(KP916574801), T21, T20);
Chris@10 466 T2d = VFNMS(LDK(KP916574801), T21, T20);
Chris@10 467 T2K = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2J, T2c));
Chris@10 468 {
Chris@10 469 V T27, T2P, T2f, T2k, T2n, T2V;
Chris@10 470 T2V = VFNMS(LDK(KP999544308), T2U, T2R);
Chris@10 471 T27 = VFNMS(LDK(KP904730450), T26, T23);
Chris@10 472 T2t = VFMA(LDK(KP968583161), T1Y, To);
Chris@10 473 T1Z = VFNMS(LDK(KP242145790), T1Y, To);
Chris@10 474 T2P = VFNMS(LDK(KP999544308), T2O, T2L);
Chris@10 475 T2f = VFMA(LDK(KP904730450), T2e, T2d);
Chris@10 476 T2k = VFNMS(LDK(KP904730450), T2e, T2d);
Chris@10 477 T2n = VADD(T22, T23);
Chris@10 478 ST(&(x[WS(rs, 21)]), VFNMSI(T2K, T2C), ms, &(x[WS(rs, 1)]));
Chris@10 479 ST(&(x[WS(rs, 4)]), VFMAI(T2K, T2C), ms, &(x[0]));
Chris@10 480 T2W = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2V, T2c));
Chris@10 481 T28 = VFNMS(LDK(KP618033988), T27, T22);
Chris@10 482 T2Q = VFNMS(LDK(KP803003575), T2P, To);
Chris@10 483 T2r = VFMA(LDK(KP617882369), T2k, T2q);
Chris@10 484 T2g = VFNMS(LDK(KP242145790), T2f, T2c);
Chris@10 485 T2u = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2f, T2c));
Chris@10 486 T2o = VFNMS(LDK(KP683113946), T2n, T26);
Chris@10 487 T2l = VFMA(LDK(KP559016994), T2k, T2j);
Chris@10 488 }
Chris@10 489 }
Chris@10 490 }
Chris@10 491 }
Chris@10 492 }
Chris@10 493 }
Chris@10 494 }
Chris@10 495 {
Chris@10 496 V T29, T2s, T2p, T2m;
Chris@10 497 T29 = VFNMS(LDK(KP876091699), T28, T1Z);
Chris@10 498 ST(&(x[WS(rs, 9)]), VFMAI(T2W, T2Q), ms, &(x[WS(rs, 1)]));
Chris@10 499 ST(&(x[WS(rs, 16)]), VFNMSI(T2W, T2Q), ms, &(x[0]));
Chris@10 500 T2s = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T2r, T2g));
Chris@10 501 ST(&(x[WS(rs, 24)]), VFMAI(T2u, T2t), ms, &(x[0]));
Chris@10 502 ST(&(x[WS(rs, 1)]), VFNMSI(T2u, T2t), ms, &(x[WS(rs, 1)]));
Chris@10 503 T2p = VFMA(LDK(KP792626838), T2o, T1Z);
Chris@10 504 T2m = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2l, T2g));
Chris@10 505 ST(&(x[WS(rs, 11)]), VFNMSI(T2s, T2p), ms, &(x[WS(rs, 1)]));
Chris@10 506 ST(&(x[WS(rs, 14)]), VFMAI(T2s, T2p), ms, &(x[0]));
Chris@10 507 ST(&(x[WS(rs, 19)]), VFMAI(T2m, T29), ms, &(x[WS(rs, 1)]));
Chris@10 508 ST(&(x[WS(rs, 6)]), VFNMSI(T2m, T29), ms, &(x[0]));
Chris@10 509 }
Chris@10 510 }
Chris@10 511 }
Chris@10 512 VLEAVE();
Chris@10 513 }
Chris@10 514
Chris@10 515 static const tw_instr twinstr[] = {
Chris@10 516 VTW(0, 1),
Chris@10 517 VTW(0, 3),
Chris@10 518 VTW(0, 9),
Chris@10 519 VTW(0, 24),
Chris@10 520 {TW_NEXT, VL, 0}
Chris@10 521 };
Chris@10 522
Chris@10 523 static const ct_desc desc = { 25, XSIMD_STRING("t3fv_25"), twinstr, &GENUS, {87, 100, 181, 0}, 0, 0, 0 };
Chris@10 524
Chris@10 525 void XSIMD(codelet_t3fv_25) (planner *p) {
Chris@10 526 X(kdft_dit_register) (p, t3fv_25, &desc);
Chris@10 527 }
Chris@10 528 #else /* HAVE_FMA */
Chris@10 529
Chris@10 530 /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3fv_25 -include t3f.h */
Chris@10 531
Chris@10 532 /*
Chris@10 533 * This function contains 268 FP additions, 228 FP multiplications,
Chris@10 534 * (or, 190 additions, 150 multiplications, 78 fused multiply/add),
Chris@10 535 * 123 stack variables, 40 constants, and 50 memory accesses
Chris@10 536 */
Chris@10 537 #include "t3f.h"
Chris@10 538
Chris@10 539 static void t3fv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@10 540 {
Chris@10 541 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 542 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
Chris@10 543 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@10 544 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@10 545 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@10 546 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
Chris@10 547 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
Chris@10 548 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@10 549 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@10 550 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@10 551 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
Chris@10 552 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 553 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@10 554 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
Chris@10 555 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@10 556 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@10 557 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@10 558 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@10 559 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 560 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
Chris@10 561 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@10 562 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@10 563 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
Chris@10 564 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@10 565 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@10 566 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
Chris@10 567 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@10 568 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
Chris@10 569 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@10 570 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
Chris@10 571 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@10 572 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
Chris@10 573 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@10 574 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
Chris@10 575 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
Chris@10 576 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
Chris@10 577 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@10 578 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 579 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 580 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 581 {
Chris@10 582 INT m;
Chris@10 583 R *x;
Chris@10 584 x = ri;
Chris@10 585 for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) {
Chris@10 586 V T1, T4, T2, T3, TA, Td, Tp, Tw, Tx, T1G, T1j, T5, T1c, T8, T9;
Chris@10 587 V Ts, T1J, Tg, T1C, T1m, TX, TB, T1f, TU;
Chris@10 588 T1 = LDW(&(W[0]));
Chris@10 589 T4 = LDW(&(W[TWVL * 4]));
Chris@10 590 T2 = LDW(&(W[TWVL * 2]));
Chris@10 591 T3 = VZMUL(T1, T2);
Chris@10 592 TA = VZMULJ(T1, T4);
Chris@10 593 Td = VZMUL(T1, T4);
Chris@10 594 Tp = VZMULJ(T2, T4);
Chris@10 595 Tw = VZMULJ(T1, T2);
Chris@10 596 Tx = VZMUL(Tw, T4);
Chris@10 597 T1G = VZMUL(T3, T4);
Chris@10 598 T1j = VZMUL(T2, T4);
Chris@10 599 T5 = VZMULJ(T3, T4);
Chris@10 600 T1c = VZMULJ(Tw, T4);
Chris@10 601 T8 = LDW(&(W[TWVL * 6]));
Chris@10 602 T9 = VZMULJ(T3, T8);
Chris@10 603 Ts = VZMULJ(T2, T8);
Chris@10 604 T1J = VZMULJ(Tp, T8);
Chris@10 605 Tg = VZMULJ(T4, T8);
Chris@10 606 T1C = VZMULJ(T1, T8);
Chris@10 607 T1m = VZMULJ(T1c, T8);
Chris@10 608 TX = VZMULJ(T5, T8);
Chris@10 609 TB = VZMULJ(TA, T8);
Chris@10 610 T1f = VZMULJ(Tw, T8);
Chris@10 611 TU = VZMULJ(Td, T8);
Chris@10 612 {
Chris@10 613 V Tl, Tk, Tm, Tn, T20, T2R, T22, T1V, T2K, T1S, T3A, T2L, TN, T2G, TK;
Chris@10 614 V T3w, T2H, T19, T2D, T16, T3x, T2E, T1y, T2N, T1v, T3z, T2O;
Chris@10 615 {
Chris@10 616 V Tf, Ti, Tj, T7, Tb, Tc, T21;
Chris@10 617 Tl = LD(&(x[0]), ms, &(x[0]));
Chris@10 618 {
Chris@10 619 V Te, Th, T6, Ta;
Chris@10 620 Te = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Chris@10 621 Tf = VZMULJ(Td, Te);
Chris@10 622 Th = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
Chris@10 623 Ti = VZMULJ(Tg, Th);
Chris@10 624 Tj = VADD(Tf, Ti);
Chris@10 625 T6 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Chris@10 626 T7 = VZMULJ(T5, T6);
Chris@10 627 Ta = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
Chris@10 628 Tb = VZMULJ(T9, Ta);
Chris@10 629 Tc = VADD(T7, Tb);
Chris@10 630 }
Chris@10 631 Tk = VMUL(LDK(KP559016994), VSUB(Tc, Tj));
Chris@10 632 Tm = VADD(Tc, Tj);
Chris@10 633 Tn = VFNMS(LDK(KP250000000), Tm, Tl);
Chris@10 634 T20 = VSUB(T7, Tb);
Chris@10 635 T21 = VSUB(Tf, Ti);
Chris@10 636 T2R = VMUL(LDK(KP951056516), T21);
Chris@10 637 T22 = VFMA(LDK(KP951056516), T20, VMUL(LDK(KP587785252), T21));
Chris@10 638 }
Chris@10 639 {
Chris@10 640 V T1P, T1I, T1L, T1M, T1B, T1E, T1F, T1O;
Chris@10 641 T1O = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Chris@10 642 T1P = VZMULJ(T2, T1O);
Chris@10 643 {
Chris@10 644 V T1H, T1K, T1A, T1D;
Chris@10 645 T1H = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
Chris@10 646 T1I = VZMULJ(T1G, T1H);
Chris@10 647 T1K = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
Chris@10 648 T1L = VZMULJ(T1J, T1K);
Chris@10 649 T1M = VADD(T1I, T1L);
Chris@10 650 T1A = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Chris@10 651 T1B = VZMULJ(TA, T1A);
Chris@10 652 T1D = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
Chris@10 653 T1E = VZMULJ(T1C, T1D);
Chris@10 654 T1F = VADD(T1B, T1E);
Chris@10 655 }
Chris@10 656 {
Chris@10 657 V T1T, T1U, T1N, T1Q, T1R;
Chris@10 658 T1T = VSUB(T1B, T1E);
Chris@10 659 T1U = VSUB(T1I, T1L);
Chris@10 660 T1V = VFMA(LDK(KP475528258), T1T, VMUL(LDK(KP293892626), T1U));
Chris@10 661 T2K = VFNMS(LDK(KP293892626), T1T, VMUL(LDK(KP475528258), T1U));
Chris@10 662 T1N = VMUL(LDK(KP559016994), VSUB(T1F, T1M));
Chris@10 663 T1Q = VADD(T1F, T1M);
Chris@10 664 T1R = VFNMS(LDK(KP250000000), T1Q, T1P);
Chris@10 665 T1S = VADD(T1N, T1R);
Chris@10 666 T3A = VADD(T1P, T1Q);
Chris@10 667 T2L = VSUB(T1R, T1N);
Chris@10 668 }
Chris@10 669 }
Chris@10 670 {
Chris@10 671 V TH, Tz, TD, TE, Tr, Tu, Tv, TG;
Chris@10 672 TG = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Chris@10 673 TH = VZMULJ(T1, TG);
Chris@10 674 {
Chris@10 675 V Ty, TC, Tq, Tt;
Chris@10 676 Ty = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Chris@10 677 Tz = VZMULJ(Tx, Ty);
Chris@10 678 TC = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
Chris@10 679 TD = VZMULJ(TB, TC);
Chris@10 680 TE = VADD(Tz, TD);
Chris@10 681 Tq = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Chris@10 682 Tr = VZMULJ(Tp, Tq);
Chris@10 683 Tt = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
Chris@10 684 Tu = VZMULJ(Ts, Tt);
Chris@10 685 Tv = VADD(Tr, Tu);
Chris@10 686 }
Chris@10 687 {
Chris@10 688 V TL, TM, TF, TI, TJ;
Chris@10 689 TL = VSUB(Tr, Tu);
Chris@10 690 TM = VSUB(Tz, TD);
Chris@10 691 TN = VFMA(LDK(KP475528258), TL, VMUL(LDK(KP293892626), TM));
Chris@10 692 T2G = VFNMS(LDK(KP293892626), TL, VMUL(LDK(KP475528258), TM));
Chris@10 693 TF = VMUL(LDK(KP559016994), VSUB(Tv, TE));
Chris@10 694 TI = VADD(Tv, TE);
Chris@10 695 TJ = VFNMS(LDK(KP250000000), TI, TH);
Chris@10 696 TK = VADD(TF, TJ);
Chris@10 697 T3w = VADD(TH, TI);
Chris@10 698 T2H = VSUB(TJ, TF);
Chris@10 699 }
Chris@10 700 }
Chris@10 701 {
Chris@10 702 V T13, TW, TZ, T10, TQ, TS, TT, T12;
Chris@10 703 T12 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Chris@10 704 T13 = VZMULJ(T3, T12);
Chris@10 705 {
Chris@10 706 V TV, TY, TP, TR;
Chris@10 707 TV = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Chris@10 708 TW = VZMULJ(TU, TV);
Chris@10 709 TY = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
Chris@10 710 TZ = VZMULJ(TX, TY);
Chris@10 711 T10 = VADD(TW, TZ);
Chris@10 712 TP = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Chris@10 713 TQ = VZMULJ(T4, TP);
Chris@10 714 TR = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
Chris@10 715 TS = VZMULJ(T8, TR);
Chris@10 716 TT = VADD(TQ, TS);
Chris@10 717 }
Chris@10 718 {
Chris@10 719 V T17, T18, T11, T14, T15;
Chris@10 720 T17 = VSUB(TQ, TS);
Chris@10 721 T18 = VSUB(TW, TZ);
Chris@10 722 T19 = VFMA(LDK(KP475528258), T17, VMUL(LDK(KP293892626), T18));
Chris@10 723 T2D = VFNMS(LDK(KP293892626), T17, VMUL(LDK(KP475528258), T18));
Chris@10 724 T11 = VMUL(LDK(KP559016994), VSUB(TT, T10));
Chris@10 725 T14 = VADD(TT, T10);
Chris@10 726 T15 = VFNMS(LDK(KP250000000), T14, T13);
Chris@10 727 T16 = VADD(T11, T15);
Chris@10 728 T3x = VADD(T13, T14);
Chris@10 729 T2E = VSUB(T15, T11);
Chris@10 730 }
Chris@10 731 }
Chris@10 732 {
Chris@10 733 V T1s, T1l, T1o, T1p, T1e, T1h, T1i, T1r;
Chris@10 734 T1r = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Chris@10 735 T1s = VZMULJ(Tw, T1r);
Chris@10 736 {
Chris@10 737 V T1k, T1n, T1d, T1g;
Chris@10 738 T1k = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Chris@10 739 T1l = VZMULJ(T1j, T1k);
Chris@10 740 T1n = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
Chris@10 741 T1o = VZMULJ(T1m, T1n);
Chris@10 742 T1p = VADD(T1l, T1o);
Chris@10 743 T1d = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Chris@10 744 T1e = VZMULJ(T1c, T1d);
Chris@10 745 T1g = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
Chris@10 746 T1h = VZMULJ(T1f, T1g);
Chris@10 747 T1i = VADD(T1e, T1h);
Chris@10 748 }
Chris@10 749 {
Chris@10 750 V T1w, T1x, T1q, T1t, T1u;
Chris@10 751 T1w = VSUB(T1e, T1h);
Chris@10 752 T1x = VSUB(T1l, T1o);
Chris@10 753 T1y = VFMA(LDK(KP475528258), T1w, VMUL(LDK(KP293892626), T1x));
Chris@10 754 T2N = VFNMS(LDK(KP293892626), T1w, VMUL(LDK(KP475528258), T1x));
Chris@10 755 T1q = VMUL(LDK(KP559016994), VSUB(T1i, T1p));
Chris@10 756 T1t = VADD(T1i, T1p);
Chris@10 757 T1u = VFNMS(LDK(KP250000000), T1t, T1s);
Chris@10 758 T1v = VADD(T1q, T1u);
Chris@10 759 T3z = VADD(T1s, T1t);
Chris@10 760 T2O = VSUB(T1u, T1q);
Chris@10 761 }
Chris@10 762 }
Chris@10 763 {
Chris@10 764 V T3J, T3K, T3D, T3E, T3C, T3F, T3L, T3G;
Chris@10 765 {
Chris@10 766 V T3H, T3I, T3y, T3B;
Chris@10 767 T3H = VSUB(T3w, T3x);
Chris@10 768 T3I = VSUB(T3z, T3A);
Chris@10 769 T3J = VBYI(VFMA(LDK(KP951056516), T3H, VMUL(LDK(KP587785252), T3I)));
Chris@10 770 T3K = VBYI(VFNMS(LDK(KP587785252), T3H, VMUL(LDK(KP951056516), T3I)));
Chris@10 771 T3D = VADD(Tl, Tm);
Chris@10 772 T3y = VADD(T3w, T3x);
Chris@10 773 T3B = VADD(T3z, T3A);
Chris@10 774 T3E = VADD(T3y, T3B);
Chris@10 775 T3C = VMUL(LDK(KP559016994), VSUB(T3y, T3B));
Chris@10 776 T3F = VFNMS(LDK(KP250000000), T3E, T3D);
Chris@10 777 }
Chris@10 778 ST(&(x[0]), VADD(T3D, T3E), ms, &(x[0]));
Chris@10 779 T3L = VSUB(T3F, T3C);
Chris@10 780 ST(&(x[WS(rs, 10)]), VADD(T3K, T3L), ms, &(x[0]));
Chris@10 781 ST(&(x[WS(rs, 15)]), VSUB(T3L, T3K), ms, &(x[WS(rs, 1)]));
Chris@10 782 T3G = VADD(T3C, T3F);
Chris@10 783 ST(&(x[WS(rs, 5)]), VSUB(T3G, T3J), ms, &(x[WS(rs, 1)]));
Chris@10 784 ST(&(x[WS(rs, 20)]), VADD(T3J, T3G), ms, &(x[0]));
Chris@10 785 }
Chris@10 786 {
Chris@10 787 V To, T2n, T2o, T2p, T2x, T2y, T2z, T2u, T2v, T2w, T2q, T2r, T2s, T29, T2i;
Chris@10 788 V T2e, T2g, T1Y, T2j, T2b, T2c, T2B, T2C;
Chris@10 789 To = VADD(Tk, Tn);
Chris@10 790 T2n = VFMA(LDK(KP1_688655851), TN, VMUL(LDK(KP535826794), TK));
Chris@10 791 T2o = VFMA(LDK(KP1_541026485), T19, VMUL(LDK(KP637423989), T16));
Chris@10 792 T2p = VSUB(T2n, T2o);
Chris@10 793 T2x = VFMA(LDK(KP851558583), T1y, VMUL(LDK(KP904827052), T1v));
Chris@10 794 T2y = VFMA(LDK(KP1_984229402), T1V, VMUL(LDK(KP125333233), T1S));
Chris@10 795 T2z = VADD(T2x, T2y);
Chris@10 796 T2u = VFNMS(LDK(KP844327925), TK, VMUL(LDK(KP1_071653589), TN));
Chris@10 797 T2v = VFNMS(LDK(KP1_274847979), T19, VMUL(LDK(KP770513242), T16));
Chris@10 798 T2w = VADD(T2u, T2v);
Chris@10 799 T2q = VFNMS(LDK(KP425779291), T1v, VMUL(LDK(KP1_809654104), T1y));
Chris@10 800 T2r = VFNMS(LDK(KP992114701), T1S, VMUL(LDK(KP250666467), T1V));
Chris@10 801 T2s = VADD(T2q, T2r);
Chris@10 802 {
Chris@10 803 V T23, T24, T25, T26, T27, T28;
Chris@10 804 T23 = VFMA(LDK(KP1_937166322), TN, VMUL(LDK(KP248689887), TK));
Chris@10 805 T24 = VFMA(LDK(KP1_071653589), T19, VMUL(LDK(KP844327925), T16));
Chris@10 806 T25 = VADD(T23, T24);
Chris@10 807 T26 = VFMA(LDK(KP1_752613360), T1y, VMUL(LDK(KP481753674), T1v));
Chris@10 808 T27 = VFMA(LDK(KP1_457937254), T1V, VMUL(LDK(KP684547105), T1S));
Chris@10 809 T28 = VADD(T26, T27);
Chris@10 810 T29 = VADD(T25, T28);
Chris@10 811 T2i = VSUB(T27, T26);
Chris@10 812 T2e = VMUL(LDK(KP559016994), VSUB(T28, T25));
Chris@10 813 T2g = VSUB(T24, T23);
Chris@10 814 }
Chris@10 815 {
Chris@10 816 V TO, T1a, T1b, T1z, T1W, T1X;
Chris@10 817 TO = VFNMS(LDK(KP497379774), TN, VMUL(LDK(KP968583161), TK));
Chris@10 818 T1a = VFNMS(LDK(KP1_688655851), T19, VMUL(LDK(KP535826794), T16));
Chris@10 819 T1b = VADD(TO, T1a);
Chris@10 820 T1z = VFNMS(LDK(KP963507348), T1y, VMUL(LDK(KP876306680), T1v));
Chris@10 821 T1W = VFNMS(LDK(KP1_369094211), T1V, VMUL(LDK(KP728968627), T1S));
Chris@10 822 T1X = VADD(T1z, T1W);
Chris@10 823 T1Y = VADD(T1b, T1X);
Chris@10 824 T2j = VMUL(LDK(KP559016994), VSUB(T1b, T1X));
Chris@10 825 T2b = VSUB(T1a, TO);
Chris@10 826 T2c = VSUB(T1z, T1W);
Chris@10 827 }
Chris@10 828 {
Chris@10 829 V T1Z, T2a, T2t, T2A;
Chris@10 830 T1Z = VADD(To, T1Y);
Chris@10 831 T2a = VBYI(VADD(T22, T29));
Chris@10 832 ST(&(x[WS(rs, 1)]), VSUB(T1Z, T2a), ms, &(x[WS(rs, 1)]));
Chris@10 833 ST(&(x[WS(rs, 24)]), VADD(T1Z, T2a), ms, &(x[0]));
Chris@10 834 T2t = VADD(To, VADD(T2p, T2s));
Chris@10 835 T2A = VBYI(VADD(T22, VSUB(T2w, T2z)));
Chris@10 836 ST(&(x[WS(rs, 21)]), VSUB(T2t, T2A), ms, &(x[WS(rs, 1)]));
Chris@10 837 ST(&(x[WS(rs, 4)]), VADD(T2t, T2A), ms, &(x[0]));
Chris@10 838 }
Chris@10 839 T2B = VBYI(VADD(T22, VFMA(LDK(KP309016994), T2w, VFMA(LDK(KP587785252), VSUB(T2r, T2q), VFNMS(LDK(KP951056516), VADD(T2n, T2o), VMUL(LDK(KP809016994), T2z))))));
Chris@10 840 T2C = VFMA(LDK(KP309016994), T2p, VFMA(LDK(KP951056516), VSUB(T2u, T2v), VFMA(LDK(KP587785252), VSUB(T2y, T2x), VFNMS(LDK(KP809016994), T2s, To))));
Chris@10 841 ST(&(x[WS(rs, 9)]), VADD(T2B, T2C), ms, &(x[WS(rs, 1)]));
Chris@10 842 ST(&(x[WS(rs, 16)]), VSUB(T2C, T2B), ms, &(x[0]));
Chris@10 843 {
Chris@10 844 V T2f, T2l, T2k, T2m, T2d, T2h;
Chris@10 845 T2d = VFMS(LDK(KP250000000), T29, T22);
Chris@10 846 T2f = VBYI(VADD(VFMA(LDK(KP587785252), T2b, VMUL(LDK(KP951056516), T2c)), VSUB(T2d, T2e)));
Chris@10 847 T2l = VBYI(VADD(VFNMS(LDK(KP587785252), T2c, VMUL(LDK(KP951056516), T2b)), VADD(T2d, T2e)));
Chris@10 848 T2h = VFNMS(LDK(KP250000000), T1Y, To);
Chris@10 849 T2k = VFMA(LDK(KP587785252), T2g, VFNMS(LDK(KP951056516), T2i, VSUB(T2h, T2j)));
Chris@10 850 T2m = VFMA(LDK(KP951056516), T2g, VADD(T2j, VFMA(LDK(KP587785252), T2i, T2h)));
Chris@10 851 ST(&(x[WS(rs, 11)]), VADD(T2f, T2k), ms, &(x[WS(rs, 1)]));
Chris@10 852 ST(&(x[WS(rs, 19)]), VSUB(T2m, T2l), ms, &(x[WS(rs, 1)]));
Chris@10 853 ST(&(x[WS(rs, 14)]), VSUB(T2k, T2f), ms, &(x[0]));
Chris@10 854 ST(&(x[WS(rs, 6)]), VADD(T2l, T2m), ms, &(x[0]));
Chris@10 855 }
Chris@10 856 }
Chris@10 857 {
Chris@10 858 V T2S, T2U, T2F, T2I, T2J, T2Y, T2Z, T30, T2M, T2P, T2Q, T2V, T2W, T2X, T3a;
Chris@10 859 V T3l, T3b, T3k, T3f, T3p, T3i, T3o, T32, T33;
Chris@10 860 T2S = VFNMS(LDK(KP587785252), T20, T2R);
Chris@10 861 T2U = VSUB(Tn, Tk);
Chris@10 862 T2F = VFNMS(LDK(KP125333233), T2E, VMUL(LDK(KP1_984229402), T2D));
Chris@10 863 T2I = VFMA(LDK(KP1_457937254), T2G, VMUL(LDK(KP684547105), T2H));
Chris@10 864 T2J = VSUB(T2F, T2I);
Chris@10 865 T2Y = VFNMS(LDK(KP1_996053456), T2N, VMUL(LDK(KP062790519), T2O));
Chris@10 866 T2Z = VFMA(LDK(KP1_541026485), T2K, VMUL(LDK(KP637423989), T2L));
Chris@10 867 T30 = VSUB(T2Y, T2Z);
Chris@10 868 T2M = VFNMS(LDK(KP770513242), T2L, VMUL(LDK(KP1_274847979), T2K));
Chris@10 869 T2P = VFMA(LDK(KP125581039), T2N, VMUL(LDK(KP998026728), T2O));
Chris@10 870 T2Q = VSUB(T2M, T2P);
Chris@10 871 T2V = VFNMS(LDK(KP1_369094211), T2G, VMUL(LDK(KP728968627), T2H));
Chris@10 872 T2W = VFMA(LDK(KP250666467), T2D, VMUL(LDK(KP992114701), T2E));
Chris@10 873 T2X = VSUB(T2V, T2W);
Chris@10 874 {
Chris@10 875 V T34, T35, T36, T37, T38, T39;
Chris@10 876 T34 = VFNMS(LDK(KP481753674), T2H, VMUL(LDK(KP1_752613360), T2G));
Chris@10 877 T35 = VFMA(LDK(KP851558583), T2D, VMUL(LDK(KP904827052), T2E));
Chris@10 878 T36 = VSUB(T34, T35);
Chris@10 879 T37 = VFNMS(LDK(KP844327925), T2O, VMUL(LDK(KP1_071653589), T2N));
Chris@10 880 T38 = VFNMS(LDK(KP998026728), T2L, VMUL(LDK(KP125581039), T2K));
Chris@10 881 T39 = VADD(T37, T38);
Chris@10 882 T3a = VMUL(LDK(KP559016994), VSUB(T36, T39));
Chris@10 883 T3l = VSUB(T37, T38);
Chris@10 884 T3b = VADD(T36, T39);
Chris@10 885 T3k = VADD(T34, T35);
Chris@10 886 }
Chris@10 887 {
Chris@10 888 V T3d, T3e, T3m, T3g, T3h, T3n;
Chris@10 889 T3d = VFNMS(LDK(KP425779291), T2E, VMUL(LDK(KP1_809654104), T2D));
Chris@10 890 T3e = VFMA(LDK(KP963507348), T2G, VMUL(LDK(KP876306680), T2H));
Chris@10 891 T3m = VADD(T3e, T3d);
Chris@10 892 T3g = VFMA(LDK(KP1_688655851), T2N, VMUL(LDK(KP535826794), T2O));
Chris@10 893 T3h = VFMA(LDK(KP1_996053456), T2K, VMUL(LDK(KP062790519), T2L));
Chris@10 894 T3n = VADD(T3g, T3h);
Chris@10 895 T3f = VSUB(T3d, T3e);
Chris@10 896 T3p = VADD(T3m, T3n);
Chris@10 897 T3i = VSUB(T3g, T3h);
Chris@10 898 T3o = VMUL(LDK(KP559016994), VSUB(T3m, T3n));
Chris@10 899 }
Chris@10 900 {
Chris@10 901 V T3u, T3v, T2T, T31;
Chris@10 902 T3u = VBYI(VADD(T2S, T3b));
Chris@10 903 T3v = VADD(T2U, T3p);
Chris@10 904 ST(&(x[WS(rs, 2)]), VADD(T3u, T3v), ms, &(x[0]));
Chris@10 905 ST(&(x[WS(rs, 23)]), VSUB(T3v, T3u), ms, &(x[WS(rs, 1)]));
Chris@10 906 T2T = VBYI(VSUB(VADD(T2J, T2Q), T2S));
Chris@10 907 T31 = VADD(T2U, VADD(T2X, T30));
Chris@10 908 ST(&(x[WS(rs, 3)]), VADD(T2T, T31), ms, &(x[WS(rs, 1)]));
Chris@10 909 ST(&(x[WS(rs, 22)]), VSUB(T31, T2T), ms, &(x[0]));
Chris@10 910 }
Chris@10 911 T32 = VFMA(LDK(KP309016994), T2X, VFNMS(LDK(KP809016994), T30, VFNMS(LDK(KP587785252), VADD(T2P, T2M), VFNMS(LDK(KP951056516), VADD(T2I, T2F), T2U))));
Chris@10 912 T33 = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2Y, T2Z), VFNMS(LDK(KP809016994), T2Q, VFNMS(LDK(KP951056516), VADD(T2V, T2W), VMUL(LDK(KP309016994), T2J)))), T2S));
Chris@10 913 ST(&(x[WS(rs, 17)]), VSUB(T32, T33), ms, &(x[WS(rs, 1)]));
Chris@10 914 ST(&(x[WS(rs, 8)]), VADD(T32, T33), ms, &(x[0]));
Chris@10 915 {
Chris@10 916 V T3j, T3s, T3r, T3t, T3c, T3q;
Chris@10 917 T3c = VFNMS(LDK(KP250000000), T3b, T2S);
Chris@10 918 T3j = VBYI(VADD(T3a, VADD(T3c, VFNMS(LDK(KP587785252), T3i, VMUL(LDK(KP951056516), T3f)))));
Chris@10 919 T3s = VBYI(VADD(T3c, VSUB(VFMA(LDK(KP587785252), T3f, VMUL(LDK(KP951056516), T3i)), T3a)));
Chris@10 920 T3q = VFNMS(LDK(KP250000000), T3p, T2U);
Chris@10 921 T3r = VFMA(LDK(KP951056516), T3k, VFMA(LDK(KP587785252), T3l, VADD(T3o, T3q)));
Chris@10 922 T3t = VFMA(LDK(KP587785252), T3k, VSUB(VFNMS(LDK(KP951056516), T3l, T3q), T3o));
Chris@10 923 ST(&(x[WS(rs, 7)]), VADD(T3j, T3r), ms, &(x[WS(rs, 1)]));
Chris@10 924 ST(&(x[WS(rs, 13)]), VSUB(T3t, T3s), ms, &(x[WS(rs, 1)]));
Chris@10 925 ST(&(x[WS(rs, 18)]), VSUB(T3r, T3j), ms, &(x[0]));
Chris@10 926 ST(&(x[WS(rs, 12)]), VADD(T3s, T3t), ms, &(x[0]));
Chris@10 927 }
Chris@10 928 }
Chris@10 929 }
Chris@10 930 }
Chris@10 931 }
Chris@10 932 VLEAVE();
Chris@10 933 }
Chris@10 934
Chris@10 935 static const tw_instr twinstr[] = {
Chris@10 936 VTW(0, 1),
Chris@10 937 VTW(0, 3),
Chris@10 938 VTW(0, 9),
Chris@10 939 VTW(0, 24),
Chris@10 940 {TW_NEXT, VL, 0}
Chris@10 941 };
Chris@10 942
Chris@10 943 static const ct_desc desc = { 25, XSIMD_STRING("t3fv_25"), twinstr, &GENUS, {190, 150, 78, 0}, 0, 0, 0 };
Chris@10 944
Chris@10 945 void XSIMD(codelet_t3fv_25) (planner *p) {
Chris@10 946 X(kdft_dit_register) (p, t3fv_25, &desc);
Chris@10 947 }
Chris@10 948 #endif /* HAVE_FMA */