annotate src/fftw-3.3.5/dft/simd/common/t3bv_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:45:04 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_twiddle_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3bv_25 -include t3b.h -sign 1 */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 268 FP additions, 281 FP multiplications,
Chris@42 32 * (or, 87 additions, 100 multiplications, 181 fused multiply/add),
Chris@42 33 * 223 stack variables, 67 constants, and 50 memory accesses
Chris@42 34 */
Chris@42 35 #include "t3b.h"
Chris@42 36
Chris@42 37 static void t3bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
Chris@42 40 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
Chris@42 41 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
Chris@42 42 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@42 43 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@42 44 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 45 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
Chris@42 46 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
Chris@42 47 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@42 48 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
Chris@42 49 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
Chris@42 50 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
Chris@42 51 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
Chris@42 52 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@42 53 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@42 54 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
Chris@42 55 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@42 56 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
Chris@42 57 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@42 58 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
Chris@42 59 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 60 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@42 61 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@42 62 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
Chris@42 63 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
Chris@42 64 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@42 65 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
Chris@42 66 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
Chris@42 67 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
Chris@42 68 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
Chris@42 69 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 70 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
Chris@42 71 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 72 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
Chris@42 73 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
Chris@42 74 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
Chris@42 75 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
Chris@42 76 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
Chris@42 77 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
Chris@42 78 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
Chris@42 79 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@42 80 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@42 81 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@42 82 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 83 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@42 84 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
Chris@42 85 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
Chris@42 86 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@42 87 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
Chris@42 88 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
Chris@42 89 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
Chris@42 90 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
Chris@42 91 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
Chris@42 92 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
Chris@42 93 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
Chris@42 94 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
Chris@42 95 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
Chris@42 96 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
Chris@42 97 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
Chris@42 98 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
Chris@42 99 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
Chris@42 100 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
Chris@42 101 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
Chris@42 102 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
Chris@42 103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 104 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 105 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 106 {
Chris@42 107 INT m;
Chris@42 108 R *x;
Chris@42 109 x = ii;
Chris@42 110 for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) {
Chris@42 111 V T2t, T1Z, T2W, T28, T2Q, T2r, T2g, T2u, T2o, T2l;
Chris@42 112 {
Chris@42 113 V T2, T5, T3, T9;
Chris@42 114 T2 = LDW(&(W[0]));
Chris@42 115 T5 = LDW(&(W[TWVL * 4]));
Chris@42 116 T3 = LDW(&(W[TWVL * 2]));
Chris@42 117 T9 = LDW(&(W[TWVL * 6]));
Chris@42 118 {
Chris@42 119 V T2c, T3l, Tn, T49, Tm, T4e, TN, T32, T1d, T3a, T3f, T3z, T3H, T25, T1W;
Chris@42 120 V T2v, T2D, T4a, T1g, T18, T2Z, T11, T31, TK, T1q, T1j, T1n, T4b, T17;
Chris@42 121 {
Chris@42 122 V T1, T1l, Tr, T4, Ty, T1E, Tu, TX, TD, T1h, Tz, T1e, T1I, T1o, TU;
Chris@42 123 V Tk, T2b, T1B, T1D, T1N, T1F, Td, T2a, T1J;
Chris@42 124 {
Chris@42 125 V T7, Tb, TC, Tg, T1L, Ta, T6, Tj, T1A;
Chris@42 126 T1 = LD(&(x[0]), ms, &(x[0]));
Chris@42 127 {
Chris@42 128 V Tf, Ti, Te, Th;
Chris@42 129 Tf = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Chris@42 130 Ti = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
Chris@42 131 T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Chris@42 132 Tb = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
Chris@42 133 Te = VZMUL(T2, T5);
Chris@42 134 TC = VZMULJ(T2, T5);
Chris@42 135 T1l = VZMUL(T3, T5);
Chris@42 136 Tr = VZMULJ(T3, T5);
Chris@42 137 T4 = VZMUL(T2, T3);
Chris@42 138 Ty = VZMULJ(T2, T3);
Chris@42 139 T1E = VZMULJ(T2, T9);
Chris@42 140 Th = VZMULJ(T5, T9);
Chris@42 141 Tu = VZMULJ(T3, T9);
Chris@42 142 Tg = VZMUL(Te, Tf);
Chris@42 143 TX = VZMULJ(Te, T9);
Chris@42 144 TD = VZMULJ(TC, T9);
Chris@42 145 T1h = VZMULJ(Ty, T9);
Chris@42 146 Tz = VZMUL(Ty, T5);
Chris@42 147 T1e = VZMULJ(Ty, T5);
Chris@42 148 T1L = VZMULJ(Tr, T9);
Chris@42 149 Ta = VZMULJ(T4, T9);
Chris@42 150 T1I = VZMUL(T4, T5);
Chris@42 151 T6 = VZMULJ(T4, T5);
Chris@42 152 Tj = VZMUL(Th, Ti);
Chris@42 153 }
Chris@42 154 T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Chris@42 155 T1o = VZMULJ(T1e, T9);
Chris@42 156 {
Chris@42 157 V Tc, T8, T1C, T1M;
Chris@42 158 T1C = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Chris@42 159 T1M = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
Chris@42 160 Tc = VZMUL(Ta, Tb);
Chris@42 161 T8 = VZMUL(T6, T7);
Chris@42 162 TU = VZMULJ(T6, T9);
Chris@42 163 Tk = VADD(Tg, Tj);
Chris@42 164 T2b = VSUB(Tg, Tj);
Chris@42 165 T1B = VZMUL(T3, T1A);
Chris@42 166 T1D = VZMUL(TC, T1C);
Chris@42 167 T1N = VZMUL(T1L, T1M);
Chris@42 168 T1F = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
Chris@42 169 Td = VADD(T8, Tc);
Chris@42 170 T2a = VSUB(T8, Tc);
Chris@42 171 T1J = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
Chris@42 172 }
Chris@42 173 }
Chris@42 174 {
Chris@42 175 V Tq, Tt, TF, T1T, T1H, Tw, T1U, T1O, TA, Tp, Ts, TE;
Chris@42 176 Tp = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Chris@42 177 Ts = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Chris@42 178 TE = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
Chris@42 179 {
Chris@42 180 V T1K, Tv, T1G, Tl;
Chris@42 181 Tv = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
Chris@42 182 T1G = VZMUL(T1E, T1F);
Chris@42 183 T2c = VFMA(LDK(KP618033988), T2b, T2a);
Chris@42 184 T3l = VFNMS(LDK(KP618033988), T2a, T2b);
Chris@42 185 Tn = VSUB(Td, Tk);
Chris@42 186 Tl = VADD(Td, Tk);
Chris@42 187 T1K = VZMUL(T1I, T1J);
Chris@42 188 Tq = VZMUL(T2, Tp);
Chris@42 189 Tt = VZMUL(Tr, Ts);
Chris@42 190 TF = VZMUL(TD, TE);
Chris@42 191 T1T = VSUB(T1D, T1G);
Chris@42 192 T1H = VADD(T1D, T1G);
Chris@42 193 T49 = VADD(T1, Tl);
Chris@42 194 Tm = VFNMS(LDK(KP250000000), Tl, T1);
Chris@42 195 Tw = VZMUL(Tu, Tv);
Chris@42 196 T1U = VSUB(T1K, T1N);
Chris@42 197 T1O = VADD(T1K, T1N);
Chris@42 198 TA = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Chris@42 199 }
Chris@42 200 {
Chris@42 201 V Tx, TL, T1R, T38, T1V, T13, TQ, TZ, TS, T1Q, TV, TG, TM, T12, T1c;
Chris@42 202 V T16;
Chris@42 203 T12 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Chris@42 204 {
Chris@42 205 V TP, TY, T1P, TB, TR;
Chris@42 206 TP = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
Chris@42 207 TY = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Chris@42 208 TR = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Chris@42 209 Tx = VADD(Tt, Tw);
Chris@42 210 TL = VSUB(Tt, Tw);
Chris@42 211 T1R = VSUB(T1O, T1H);
Chris@42 212 T1P = VADD(T1H, T1O);
Chris@42 213 T38 = VFNMS(LDK(KP618033988), T1T, T1U);
Chris@42 214 T1V = VFMA(LDK(KP618033988), T1U, T1T);
Chris@42 215 TB = VZMUL(Tz, TA);
Chris@42 216 T13 = VZMUL(T4, T12);
Chris@42 217 TQ = VZMUL(T9, TP);
Chris@42 218 TZ = VZMUL(TX, TY);
Chris@42 219 TS = VZMUL(T5, TR);
Chris@42 220 T4e = VADD(T1B, T1P);
Chris@42 221 T1Q = VFNMS(LDK(KP250000000), T1P, T1B);
Chris@42 222 TV = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
Chris@42 223 TG = VADD(TB, TF);
Chris@42 224 TM = VSUB(TF, TB);
Chris@42 225 }
Chris@42 226 T1c = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Chris@42 227 {
Chris@42 228 V T14, TT, TJ, T15, T10, TI, T1p, T1f, T1i, T1m;
Chris@42 229 T1f = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Chris@42 230 T14 = VADD(TS, TQ);
Chris@42 231 TT = VSUB(TQ, TS);
Chris@42 232 {
Chris@42 233 V T39, T1S, TW, TH;
Chris@42 234 T39 = VFMA(LDK(KP559016994), T1R, T1Q);
Chris@42 235 T1S = VFNMS(LDK(KP559016994), T1R, T1Q);
Chris@42 236 TW = VZMUL(TU, TV);
Chris@42 237 TH = VADD(Tx, TG);
Chris@42 238 TJ = VSUB(Tx, TG);
Chris@42 239 TN = VFNMS(LDK(KP618033988), TM, TL);
Chris@42 240 T32 = VFMA(LDK(KP618033988), TL, TM);
Chris@42 241 T1d = VZMUL(Ty, T1c);
Chris@42 242 T3a = VFMA(LDK(KP869845200), T39, T38);
Chris@42 243 T3f = VFNMS(LDK(KP786782374), T38, T39);
Chris@42 244 T3z = VFMA(LDK(KP066152395), T39, T38);
Chris@42 245 T3H = VFNMS(LDK(KP059835404), T38, T39);
Chris@42 246 T25 = VFMA(LDK(KP987388751), T1S, T1V);
Chris@42 247 T1W = VFNMS(LDK(KP893101515), T1V, T1S);
Chris@42 248 T2v = VFNMS(LDK(KP120146378), T1V, T1S);
Chris@42 249 T2D = VFMA(LDK(KP132830569), T1S, T1V);
Chris@42 250 T15 = VADD(TZ, TW);
Chris@42 251 T10 = VSUB(TW, TZ);
Chris@42 252 TI = VFNMS(LDK(KP250000000), TH, Tq);
Chris@42 253 T4a = VADD(Tq, TH);
Chris@42 254 T1g = VZMUL(T1e, T1f);
Chris@42 255 }
Chris@42 256 T1p = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
Chris@42 257 T1i = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
Chris@42 258 T1m = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Chris@42 259 T18 = VSUB(T14, T15);
Chris@42 260 T16 = VADD(T14, T15);
Chris@42 261 T2Z = VFNMS(LDK(KP618033988), TT, T10);
Chris@42 262 T11 = VFMA(LDK(KP618033988), T10, TT);
Chris@42 263 T31 = VFNMS(LDK(KP559016994), TJ, TI);
Chris@42 264 TK = VFMA(LDK(KP559016994), TJ, TI);
Chris@42 265 T1q = VZMUL(T1o, T1p);
Chris@42 266 T1j = VZMUL(T1h, T1i);
Chris@42 267 T1n = VZMUL(T1l, T1m);
Chris@42 268 }
Chris@42 269 T4b = VADD(T13, T16);
Chris@42 270 T17 = VFMS(LDK(KP250000000), T16, T13);
Chris@42 271 }
Chris@42 272 }
Chris@42 273 }
Chris@42 274 {
Chris@42 275 V T33, T3i, T3C, T3L, T20, TO, T2y, T2G, T1k, T1w, T1r, T1x, T2Y, T19, T4k;
Chris@42 276 V T4c;
Chris@42 277 T33 = VFMA(LDK(KP893101515), T32, T31);
Chris@42 278 T3i = VFNMS(LDK(KP987388751), T31, T32);
Chris@42 279 T3C = VFNMS(LDK(KP522847744), T32, T31);
Chris@42 280 T3L = VFMA(LDK(KP578046249), T31, T32);
Chris@42 281 T20 = VFMA(LDK(KP269969613), TK, TN);
Chris@42 282 TO = VFNMS(LDK(KP244189809), TN, TK);
Chris@42 283 T2y = VFMA(LDK(KP667278218), TK, TN);
Chris@42 284 T2G = VFNMS(LDK(KP603558818), TN, TK);
Chris@42 285 T1k = VADD(T1g, T1j);
Chris@42 286 T1w = VSUB(T1g, T1j);
Chris@42 287 T1r = VADD(T1n, T1q);
Chris@42 288 T1x = VSUB(T1q, T1n);
Chris@42 289 T2Y = VFMA(LDK(KP559016994), T18, T17);
Chris@42 290 T19 = VFNMS(LDK(KP559016994), T18, T17);
Chris@42 291 T4k = VSUB(T4a, T4b);
Chris@42 292 T4c = VADD(T4a, T4b);
Chris@42 293 {
Chris@42 294 V T2X, To, T35, T1y, T2H, T2z, T1a, T21, T3t, T34, T3n, T3j, T3E, T3Y, T3M;
Chris@42 295 V T3R, T1v, T36, T4l, T4f, T1u, T1s;
Chris@42 296 T2X = VFNMS(LDK(KP559016994), Tn, Tm);
Chris@42 297 To = VFMA(LDK(KP559016994), Tn, Tm);
Chris@42 298 T1u = VSUB(T1r, T1k);
Chris@42 299 T1s = VADD(T1k, T1r);
Chris@42 300 T35 = VFMA(LDK(KP618033988), T1w, T1x);
Chris@42 301 T1y = VFNMS(LDK(KP618033988), T1x, T1w);
Chris@42 302 {
Chris@42 303 V T3K, T30, T3h, T3D, T4d, T1t;
Chris@42 304 T3K = VFMA(LDK(KP447533225), T2Z, T2Y);
Chris@42 305 T30 = VFMA(LDK(KP120146378), T2Z, T2Y);
Chris@42 306 T3h = VFNMS(LDK(KP132830569), T2Y, T2Z);
Chris@42 307 T3D = VFNMS(LDK(KP494780565), T2Y, T2Z);
Chris@42 308 T2H = VFNMS(LDK(KP786782374), T11, T19);
Chris@42 309 T2z = VFMA(LDK(KP869845200), T19, T11);
Chris@42 310 T1a = VFNMS(LDK(KP667278218), T19, T11);
Chris@42 311 T21 = VFMA(LDK(KP603558818), T11, T19);
Chris@42 312 T4d = VADD(T1d, T1s);
Chris@42 313 T1t = VFNMS(LDK(KP250000000), T1s, T1d);
Chris@42 314 T3t = VFNMS(LDK(KP734762448), T33, T30);
Chris@42 315 T34 = VFMA(LDK(KP734762448), T33, T30);
Chris@42 316 T3n = VFMA(LDK(KP734762448), T3i, T3h);
Chris@42 317 T3j = VFNMS(LDK(KP734762448), T3i, T3h);
Chris@42 318 T3E = VFNMS(LDK(KP982009705), T3D, T3C);
Chris@42 319 T3Y = VFMA(LDK(KP982009705), T3D, T3C);
Chris@42 320 T3M = VFNMS(LDK(KP921078979), T3L, T3K);
Chris@42 321 T3R = VFMA(LDK(KP921078979), T3L, T3K);
Chris@42 322 T1v = VFNMS(LDK(KP559016994), T1u, T1t);
Chris@42 323 T36 = VFMA(LDK(KP559016994), T1u, T1t);
Chris@42 324 T4l = VSUB(T4d, T4e);
Chris@42 325 T4f = VADD(T4d, T4e);
Chris@42 326 }
Chris@42 327 {
Chris@42 328 V T2L, T2R, T2j, T2q, T2J, T2B, T2e, T26, T2U, T1Y, T23, T2O;
Chris@42 329 {
Chris@42 330 V T2I, T24, T2w, T2E, T48, T42, T3y, T3s, T3V, T45, T2A, T1b, T2h, T2i, T1X;
Chris@42 331 T2L = VFNMS(LDK(KP912575812), T2H, T2G);
Chris@42 332 T2I = VFMA(LDK(KP912575812), T2H, T2G);
Chris@42 333 {
Chris@42 334 V T3A, T3e, T37, T3I, T1z;
Chris@42 335 T3A = VFNMS(LDK(KP667278218), T36, T35);
Chris@42 336 T3e = VFNMS(LDK(KP059835404), T35, T36);
Chris@42 337 T37 = VFMA(LDK(KP066152395), T36, T35);
Chris@42 338 T3I = VFMA(LDK(KP603558818), T35, T36);
Chris@42 339 T24 = VFMA(LDK(KP578046249), T1v, T1y);
Chris@42 340 T1z = VFNMS(LDK(KP522847744), T1y, T1v);
Chris@42 341 T2w = VFNMS(LDK(KP494780565), T1v, T1y);
Chris@42 342 T2E = VFMA(LDK(KP447533225), T1y, T1v);
Chris@42 343 {
Chris@42 344 V T4i, T4g, T4o, T4m;
Chris@42 345 T4i = VSUB(T4c, T4f);
Chris@42 346 T4g = VADD(T4c, T4f);
Chris@42 347 T4o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T4k, T4l));
Chris@42 348 T4m = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T4l, T4k));
Chris@42 349 {
Chris@42 350 V T3Q, T3J, T3b, T3u;
Chris@42 351 T3Q = VFNMS(LDK(KP845997307), T3I, T3H);
Chris@42 352 T3J = VFMA(LDK(KP845997307), T3I, T3H);
Chris@42 353 T3b = VFNMS(LDK(KP772036680), T3a, T37);
Chris@42 354 T3u = VFMA(LDK(KP772036680), T3a, T37);
Chris@42 355 {
Chris@42 356 V T3o, T3g, T3B, T3X, T4h;
Chris@42 357 T3o = VFNMS(LDK(KP772036680), T3f, T3e);
Chris@42 358 T3g = VFMA(LDK(KP772036680), T3f, T3e);
Chris@42 359 T3B = VFNMS(LDK(KP845997307), T3A, T3z);
Chris@42 360 T3X = VFMA(LDK(KP845997307), T3A, T3z);
Chris@42 361 ST(&(x[0]), VADD(T4g, T49), ms, &(x[0]));
Chris@42 362 T4h = VFNMS(LDK(KP250000000), T4g, T49);
Chris@42 363 {
Chris@42 364 V T40, T3N, T3c, T3v;
Chris@42 365 T40 = VFMA(LDK(KP906616052), T3M, T3J);
Chris@42 366 T3N = VFNMS(LDK(KP906616052), T3M, T3J);
Chris@42 367 T3c = VFMA(LDK(KP956723877), T3b, T34);
Chris@42 368 T3v = VFMA(LDK(KP522616830), T3j, T3u);
Chris@42 369 {
Chris@42 370 V T3p, T3k, T3S, T3F;
Chris@42 371 T3p = VFNMS(LDK(KP522616830), T34, T3o);
Chris@42 372 T3k = VFMA(LDK(KP945422727), T3j, T3g);
Chris@42 373 T3S = VFNMS(LDK(KP923225144), T3E, T3B);
Chris@42 374 T3F = VFMA(LDK(KP923225144), T3E, T3B);
Chris@42 375 {
Chris@42 376 V T46, T3Z, T4j, T4n;
Chris@42 377 T46 = VFNMS(LDK(KP669429328), T3X, T3Y);
Chris@42 378 T3Z = VFMA(LDK(KP570584518), T3Y, T3X);
Chris@42 379 T4j = VFMA(LDK(KP559016994), T4i, T4h);
Chris@42 380 T4n = VFNMS(LDK(KP559016994), T4i, T4h);
Chris@42 381 {
Chris@42 382 V T3W, T3O, T3d, T3w;
Chris@42 383 T3W = VFMA(LDK(KP262346850), T3N, T3l);
Chris@42 384 T3O = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T3l, T3N));
Chris@42 385 T3d = VFMA(LDK(KP992114701), T3c, T2X);
Chris@42 386 T3w = VFNMS(LDK(KP690983005), T3v, T3g);
Chris@42 387 {
Chris@42 388 V T3q, T3m, T3T, T43;
Chris@42 389 T3q = VFMA(LDK(KP763932022), T3p, T3b);
Chris@42 390 T3m = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T3l, T3k));
Chris@42 391 T3T = VFNMS(LDK(KP997675361), T3S, T3R);
Chris@42 392 T43 = VFNMS(LDK(KP904508497), T3S, T3Q);
Chris@42 393 {
Chris@42 394 V T3G, T3P, T47, T41;
Chris@42 395 T3G = VFMA(LDK(KP949179823), T3F, T2X);
Chris@42 396 T3P = VFNMS(LDK(KP237294955), T3F, T2X);
Chris@42 397 T47 = VFNMS(LDK(KP669429328), T40, T46);
Chris@42 398 T41 = VFMA(LDK(KP618033988), T40, T3Z);
Chris@42 399 ST(&(x[WS(rs, 20)]), VFNMSI(T4m, T4j), ms, &(x[0]));
Chris@42 400 ST(&(x[WS(rs, 5)]), VFMAI(T4m, T4j), ms, &(x[WS(rs, 1)]));
Chris@42 401 ST(&(x[WS(rs, 15)]), VFMAI(T4o, T4n), ms, &(x[WS(rs, 1)]));
Chris@42 402 ST(&(x[WS(rs, 10)]), VFNMSI(T4o, T4n), ms, &(x[0]));
Chris@42 403 {
Chris@42 404 V T3x, T3r, T3U, T44;
Chris@42 405 T3x = VFMA(LDK(KP855719849), T3w, T3t);
Chris@42 406 T3r = VFNMS(LDK(KP855719849), T3q, T3n);
Chris@42 407 ST(&(x[WS(rs, 3)]), VFMAI(T3m, T3d), ms, &(x[WS(rs, 1)]));
Chris@42 408 ST(&(x[WS(rs, 22)]), VFNMSI(T3m, T3d), ms, &(x[0]));
Chris@42 409 T3U = VFMA(LDK(KP560319534), T3T, T3Q);
Chris@42 410 T44 = VFNMS(LDK(KP681693190), T43, T3R);
Chris@42 411 ST(&(x[WS(rs, 2)]), VFMAI(T3O, T3G), ms, &(x[0]));
Chris@42 412 ST(&(x[WS(rs, 23)]), VFNMSI(T3O, T3G), ms, &(x[WS(rs, 1)]));
Chris@42 413 T48 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T47, T3W));
Chris@42 414 T42 = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T41, T3W));
Chris@42 415 T3y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T3x, T3l));
Chris@42 416 T3s = VFMA(LDK(KP897376177), T3r, T2X);
Chris@42 417 T3V = VFNMS(LDK(KP949179823), T3U, T3P);
Chris@42 418 T45 = VFNMS(LDK(KP860541664), T44, T3P);
Chris@42 419 T2R = VFNMS(LDK(KP912575812), T2z, T2y);
Chris@42 420 T2A = VFMA(LDK(KP912575812), T2z, T2y);
Chris@42 421 T1b = VFMA(LDK(KP829049696), T1a, TO);
Chris@42 422 T2h = VFNMS(LDK(KP829049696), T1a, TO);
Chris@42 423 T2i = VFNMS(LDK(KP831864738), T1W, T1z);
Chris@42 424 T1X = VFMA(LDK(KP831864738), T1W, T1z);
Chris@42 425 }
Chris@42 426 }
Chris@42 427 }
Chris@42 428 }
Chris@42 429 }
Chris@42 430 }
Chris@42 431 }
Chris@42 432 }
Chris@42 433 }
Chris@42 434 }
Chris@42 435 }
Chris@42 436 {
Chris@42 437 V T2M, T2F, T2x, T2S, T2T, T2N;
Chris@42 438 T2M = VFNMS(LDK(KP958953096), T2E, T2D);
Chris@42 439 T2F = VFMA(LDK(KP958953096), T2E, T2D);
Chris@42 440 ST(&(x[WS(rs, 17)]), VFNMSI(T3y, T3s), ms, &(x[WS(rs, 1)]));
Chris@42 441 ST(&(x[WS(rs, 8)]), VFMAI(T3y, T3s), ms, &(x[0]));
Chris@42 442 ST(&(x[WS(rs, 13)]), VFMAI(T42, T3V), ms, &(x[WS(rs, 1)]));
Chris@42 443 ST(&(x[WS(rs, 12)]), VFNMSI(T42, T3V), ms, &(x[0]));
Chris@42 444 ST(&(x[WS(rs, 7)]), VFNMSI(T48, T45), ms, &(x[WS(rs, 1)]));
Chris@42 445 ST(&(x[WS(rs, 18)]), VFMAI(T48, T45), ms, &(x[0]));
Chris@42 446 T2j = VFMA(LDK(KP559154169), T2i, T2h);
Chris@42 447 T2q = VFNMS(LDK(KP683113946), T2h, T2i);
Chris@42 448 T2x = VFNMS(LDK(KP867381224), T2w, T2v);
Chris@42 449 T2S = VFMA(LDK(KP867381224), T2w, T2v);
Chris@42 450 T2J = VFMA(LDK(KP894834959), T2I, T2F);
Chris@42 451 T2T = VFMA(LDK(KP447417479), T2I, T2S);
Chris@42 452 T2B = VFNMS(LDK(KP809385824), T2A, T2x);
Chris@42 453 T2N = VFMA(LDK(KP447417479), T2A, T2M);
Chris@42 454 T2e = VFMA(LDK(KP831864738), T25, T24);
Chris@42 455 T26 = VFNMS(LDK(KP831864738), T25, T24);
Chris@42 456 T2U = VFNMS(LDK(KP763932022), T2T, T2F);
Chris@42 457 T1Y = VFMA(LDK(KP904730450), T1X, T1b);
Chris@42 458 T23 = VFNMS(LDK(KP904730450), T1X, T1b);
Chris@42 459 T2O = VFMA(LDK(KP690983005), T2N, T2x);
Chris@42 460 }
Chris@42 461 }
Chris@42 462 {
Chris@42 463 V T2C, T22, T2d, T2K;
Chris@42 464 T2C = VFNMS(LDK(KP992114701), T2B, To);
Chris@42 465 T22 = VFMA(LDK(KP916574801), T21, T20);
Chris@42 466 T2d = VFNMS(LDK(KP916574801), T21, T20);
Chris@42 467 T2K = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2J, T2c));
Chris@42 468 {
Chris@42 469 V T27, T2P, T2f, T2k, T2n, T2V;
Chris@42 470 T2V = VFNMS(LDK(KP999544308), T2U, T2R);
Chris@42 471 T27 = VFNMS(LDK(KP904730450), T26, T23);
Chris@42 472 T2t = VFMA(LDK(KP968583161), T1Y, To);
Chris@42 473 T1Z = VFNMS(LDK(KP242145790), T1Y, To);
Chris@42 474 T2P = VFNMS(LDK(KP999544308), T2O, T2L);
Chris@42 475 T2f = VFMA(LDK(KP904730450), T2e, T2d);
Chris@42 476 T2k = VFNMS(LDK(KP904730450), T2e, T2d);
Chris@42 477 T2n = VADD(T22, T23);
Chris@42 478 ST(&(x[WS(rs, 21)]), VFMAI(T2K, T2C), ms, &(x[WS(rs, 1)]));
Chris@42 479 ST(&(x[WS(rs, 4)]), VFNMSI(T2K, T2C), ms, &(x[0]));
Chris@42 480 T2W = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2V, T2c));
Chris@42 481 T28 = VFNMS(LDK(KP618033988), T27, T22);
Chris@42 482 T2Q = VFNMS(LDK(KP803003575), T2P, To);
Chris@42 483 T2r = VFMA(LDK(KP617882369), T2k, T2q);
Chris@42 484 T2g = VFNMS(LDK(KP242145790), T2f, T2c);
Chris@42 485 T2u = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2f, T2c));
Chris@42 486 T2o = VFNMS(LDK(KP683113946), T2n, T26);
Chris@42 487 T2l = VFMA(LDK(KP559016994), T2k, T2j);
Chris@42 488 }
Chris@42 489 }
Chris@42 490 }
Chris@42 491 }
Chris@42 492 }
Chris@42 493 }
Chris@42 494 }
Chris@42 495 {
Chris@42 496 V T29, T2s, T2p, T2m;
Chris@42 497 T29 = VFNMS(LDK(KP876091699), T28, T1Z);
Chris@42 498 ST(&(x[WS(rs, 16)]), VFMAI(T2W, T2Q), ms, &(x[0]));
Chris@42 499 ST(&(x[WS(rs, 9)]), VFNMSI(T2W, T2Q), ms, &(x[WS(rs, 1)]));
Chris@42 500 T2s = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T2r, T2g));
Chris@42 501 ST(&(x[WS(rs, 24)]), VFNMSI(T2u, T2t), ms, &(x[0]));
Chris@42 502 ST(&(x[WS(rs, 1)]), VFMAI(T2u, T2t), ms, &(x[WS(rs, 1)]));
Chris@42 503 T2p = VFMA(LDK(KP792626838), T2o, T1Z);
Chris@42 504 T2m = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2l, T2g));
Chris@42 505 ST(&(x[WS(rs, 11)]), VFMAI(T2s, T2p), ms, &(x[WS(rs, 1)]));
Chris@42 506 ST(&(x[WS(rs, 14)]), VFNMSI(T2s, T2p), ms, &(x[0]));
Chris@42 507 ST(&(x[WS(rs, 19)]), VFNMSI(T2m, T29), ms, &(x[WS(rs, 1)]));
Chris@42 508 ST(&(x[WS(rs, 6)]), VFMAI(T2m, T29), ms, &(x[0]));
Chris@42 509 }
Chris@42 510 }
Chris@42 511 }
Chris@42 512 VLEAVE();
Chris@42 513 }
Chris@42 514
Chris@42 515 static const tw_instr twinstr[] = {
Chris@42 516 VTW(0, 1),
Chris@42 517 VTW(0, 3),
Chris@42 518 VTW(0, 9),
Chris@42 519 VTW(0, 24),
Chris@42 520 {TW_NEXT, VL, 0}
Chris@42 521 };
Chris@42 522
Chris@42 523 static const ct_desc desc = { 25, XSIMD_STRING("t3bv_25"), twinstr, &GENUS, {87, 100, 181, 0}, 0, 0, 0 };
Chris@42 524
Chris@42 525 void XSIMD(codelet_t3bv_25) (planner *p) {
Chris@42 526 X(kdft_dit_register) (p, t3bv_25, &desc);
Chris@42 527 }
Chris@42 528 #else /* HAVE_FMA */
Chris@42 529
Chris@42 530 /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3bv_25 -include t3b.h -sign 1 */
Chris@42 531
Chris@42 532 /*
Chris@42 533 * This function contains 268 FP additions, 228 FP multiplications,
Chris@42 534 * (or, 191 additions, 151 multiplications, 77 fused multiply/add),
Chris@42 535 * 124 stack variables, 40 constants, and 50 memory accesses
Chris@42 536 */
Chris@42 537 #include "t3b.h"
Chris@42 538
Chris@42 539 static void t3bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@42 540 {
Chris@42 541 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
Chris@42 542 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 543 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@42 544 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@42 545 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@42 546 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
Chris@42 547 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
Chris@42 548 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@42 549 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@42 550 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@42 551 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@42 552 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 553 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@42 554 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
Chris@42 555 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@42 556 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@42 557 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
Chris@42 558 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@42 559 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@42 560 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
Chris@42 561 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
Chris@42 562 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@42 563 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@42 564 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
Chris@42 565 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@42 566 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@42 567 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@42 568 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
Chris@42 569 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
Chris@42 570 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 571 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 572 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
Chris@42 573 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@42 574 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
Chris@42 575 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
Chris@42 576 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
Chris@42 577 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 578 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 579 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 580 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 581 {
Chris@42 582 INT m;
Chris@42 583 R *x;
Chris@42 584 x = ii;
Chris@42 585 for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) {
Chris@42 586 V T1, Td, T8, T9, TF, Te, Tu, TB, TC, T1s, T15, Tf, TY, T4, Ta;
Chris@42 587 V Tx, T1T, Tg, T1N, T1v, T18, TG, T1o, T11;
Chris@42 588 T1 = LDW(&(W[TWVL * 4]));
Chris@42 589 Td = LDW(&(W[TWVL * 2]));
Chris@42 590 T8 = LDW(&(W[0]));
Chris@42 591 T9 = VZMUL(T8, T1);
Chris@42 592 TF = VZMULJ(T8, T1);
Chris@42 593 Te = VZMUL(T8, Td);
Chris@42 594 Tu = VZMULJ(Td, T1);
Chris@42 595 TB = VZMULJ(T8, Td);
Chris@42 596 TC = VZMUL(TB, T1);
Chris@42 597 T1s = VZMUL(Te, T1);
Chris@42 598 T15 = VZMUL(Td, T1);
Chris@42 599 Tf = VZMULJ(Te, T1);
Chris@42 600 TY = VZMULJ(TB, T1);
Chris@42 601 T4 = LDW(&(W[TWVL * 6]));
Chris@42 602 Ta = VZMULJ(T9, T4);
Chris@42 603 Tx = VZMULJ(Td, T4);
Chris@42 604 T1T = VZMULJ(T1, T4);
Chris@42 605 Tg = VZMULJ(Tf, T4);
Chris@42 606 T1N = VZMULJ(Te, T4);
Chris@42 607 T1v = VZMULJ(Tu, T4);
Chris@42 608 T18 = VZMULJ(TY, T4);
Chris@42 609 TG = VZMULJ(TF, T4);
Chris@42 610 T1o = VZMULJ(T8, T4);
Chris@42 611 T11 = VZMULJ(TB, T4);
Chris@42 612 {
Chris@42 613 V T1Y, T1X, T2f, T2g, T1Z, T20, T2e, T39, T1H, T2T, T1E, T3C, T2S, Tk, T2G;
Chris@42 614 V Ts, T3z, T2F, TK, T2I, TS, T3y, T2J, T1k, T2Q, T1h, T3B, T2P;
Chris@42 615 {
Chris@42 616 V T1S, T1V, T1W, T1M, T1P, T1Q, T2d;
Chris@42 617 T1Y = LD(&(x[0]), ms, &(x[0]));
Chris@42 618 {
Chris@42 619 V T1R, T1U, T1L, T1O;
Chris@42 620 T1R = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Chris@42 621 T1S = VZMUL(T9, T1R);
Chris@42 622 T1U = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
Chris@42 623 T1V = VZMUL(T1T, T1U);
Chris@42 624 T1W = VADD(T1S, T1V);
Chris@42 625 T1L = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Chris@42 626 T1M = VZMUL(Tf, T1L);
Chris@42 627 T1O = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
Chris@42 628 T1P = VZMUL(T1N, T1O);
Chris@42 629 T1Q = VADD(T1M, T1P);
Chris@42 630 }
Chris@42 631 T1X = VMUL(LDK(KP559016994), VSUB(T1Q, T1W));
Chris@42 632 T2f = VSUB(T1S, T1V);
Chris@42 633 T2g = VMUL(LDK(KP587785252), T2f);
Chris@42 634 T1Z = VADD(T1Q, T1W);
Chris@42 635 T20 = VFNMS(LDK(KP250000000), T1Z, T1Y);
Chris@42 636 T2d = VSUB(T1M, T1P);
Chris@42 637 T2e = VMUL(LDK(KP951056516), T2d);
Chris@42 638 T39 = VMUL(LDK(KP587785252), T2d);
Chris@42 639 }
Chris@42 640 {
Chris@42 641 V T1B, T1u, T1x, T1y, T1n, T1q, T1r, T1A;
Chris@42 642 T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Chris@42 643 T1B = VZMUL(Td, T1A);
Chris@42 644 {
Chris@42 645 V T1t, T1w, T1m, T1p;
Chris@42 646 T1t = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
Chris@42 647 T1u = VZMUL(T1s, T1t);
Chris@42 648 T1w = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
Chris@42 649 T1x = VZMUL(T1v, T1w);
Chris@42 650 T1y = VADD(T1u, T1x);
Chris@42 651 T1m = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Chris@42 652 T1n = VZMUL(TF, T1m);
Chris@42 653 T1p = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
Chris@42 654 T1q = VZMUL(T1o, T1p);
Chris@42 655 T1r = VADD(T1n, T1q);
Chris@42 656 }
Chris@42 657 {
Chris@42 658 V T1F, T1G, T1z, T1C, T1D;
Chris@42 659 T1F = VSUB(T1n, T1q);
Chris@42 660 T1G = VSUB(T1u, T1x);
Chris@42 661 T1H = VFMA(LDK(KP475528258), T1F, VMUL(LDK(KP293892626), T1G));
Chris@42 662 T2T = VFNMS(LDK(KP475528258), T1G, VMUL(LDK(KP293892626), T1F));
Chris@42 663 T1z = VMUL(LDK(KP559016994), VSUB(T1r, T1y));
Chris@42 664 T1C = VADD(T1r, T1y);
Chris@42 665 T1D = VFNMS(LDK(KP250000000), T1C, T1B);
Chris@42 666 T1E = VADD(T1z, T1D);
Chris@42 667 T3C = VADD(T1B, T1C);
Chris@42 668 T2S = VSUB(T1D, T1z);
Chris@42 669 }
Chris@42 670 }
Chris@42 671 {
Chris@42 672 V Tp, Tc, Ti, Tm, T3, T6, Tl, To;
Chris@42 673 To = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Chris@42 674 Tp = VZMUL(Te, To);
Chris@42 675 {
Chris@42 676 V Tb, Th, T2, T5;
Chris@42 677 Tb = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Chris@42 678 Tc = VZMUL(Ta, Tb);
Chris@42 679 Th = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
Chris@42 680 Ti = VZMUL(Tg, Th);
Chris@42 681 Tm = VADD(Tc, Ti);
Chris@42 682 T2 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Chris@42 683 T3 = VZMUL(T1, T2);
Chris@42 684 T5 = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
Chris@42 685 T6 = VZMUL(T4, T5);
Chris@42 686 Tl = VADD(T3, T6);
Chris@42 687 }
Chris@42 688 {
Chris@42 689 V T7, Tj, Tn, Tq, Tr;
Chris@42 690 T7 = VSUB(T3, T6);
Chris@42 691 Tj = VSUB(Tc, Ti);
Chris@42 692 Tk = VFMA(LDK(KP475528258), T7, VMUL(LDK(KP293892626), Tj));
Chris@42 693 T2G = VFNMS(LDK(KP475528258), Tj, VMUL(LDK(KP293892626), T7));
Chris@42 694 Tn = VMUL(LDK(KP559016994), VSUB(Tl, Tm));
Chris@42 695 Tq = VADD(Tl, Tm);
Chris@42 696 Tr = VFNMS(LDK(KP250000000), Tq, Tp);
Chris@42 697 Ts = VADD(Tn, Tr);
Chris@42 698 T3z = VADD(Tp, Tq);
Chris@42 699 T2F = VSUB(Tr, Tn);
Chris@42 700 }
Chris@42 701 }
Chris@42 702 {
Chris@42 703 V TP, TE, TI, TM, Tw, Tz, TL, TO;
Chris@42 704 TO = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Chris@42 705 TP = VZMUL(T8, TO);
Chris@42 706 {
Chris@42 707 V TD, TH, Tv, Ty;
Chris@42 708 TD = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Chris@42 709 TE = VZMUL(TC, TD);
Chris@42 710 TH = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
Chris@42 711 TI = VZMUL(TG, TH);
Chris@42 712 TM = VADD(TE, TI);
Chris@42 713 Tv = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Chris@42 714 Tw = VZMUL(Tu, Tv);
Chris@42 715 Ty = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
Chris@42 716 Tz = VZMUL(Tx, Ty);
Chris@42 717 TL = VADD(Tw, Tz);
Chris@42 718 }
Chris@42 719 {
Chris@42 720 V TA, TJ, TN, TQ, TR;
Chris@42 721 TA = VSUB(Tw, Tz);
Chris@42 722 TJ = VSUB(TE, TI);
Chris@42 723 TK = VFMA(LDK(KP475528258), TA, VMUL(LDK(KP293892626), TJ));
Chris@42 724 T2I = VFNMS(LDK(KP475528258), TJ, VMUL(LDK(KP293892626), TA));
Chris@42 725 TN = VMUL(LDK(KP559016994), VSUB(TL, TM));
Chris@42 726 TQ = VADD(TL, TM);
Chris@42 727 TR = VFNMS(LDK(KP250000000), TQ, TP);
Chris@42 728 TS = VADD(TN, TR);
Chris@42 729 T3y = VADD(TP, TQ);
Chris@42 730 T2J = VSUB(TR, TN);
Chris@42 731 }
Chris@42 732 }
Chris@42 733 {
Chris@42 734 V T1e, T17, T1a, T1b, T10, T13, T14, T1d;
Chris@42 735 T1d = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Chris@42 736 T1e = VZMUL(TB, T1d);
Chris@42 737 {
Chris@42 738 V T16, T19, TZ, T12;
Chris@42 739 T16 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Chris@42 740 T17 = VZMUL(T15, T16);
Chris@42 741 T19 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
Chris@42 742 T1a = VZMUL(T18, T19);
Chris@42 743 T1b = VADD(T17, T1a);
Chris@42 744 TZ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Chris@42 745 T10 = VZMUL(TY, TZ);
Chris@42 746 T12 = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
Chris@42 747 T13 = VZMUL(T11, T12);
Chris@42 748 T14 = VADD(T10, T13);
Chris@42 749 }
Chris@42 750 {
Chris@42 751 V T1i, T1j, T1c, T1f, T1g;
Chris@42 752 T1i = VSUB(T10, T13);
Chris@42 753 T1j = VSUB(T17, T1a);
Chris@42 754 T1k = VFMA(LDK(KP475528258), T1i, VMUL(LDK(KP293892626), T1j));
Chris@42 755 T2Q = VFNMS(LDK(KP475528258), T1j, VMUL(LDK(KP293892626), T1i));
Chris@42 756 T1c = VMUL(LDK(KP559016994), VSUB(T14, T1b));
Chris@42 757 T1f = VADD(T14, T1b);
Chris@42 758 T1g = VFNMS(LDK(KP250000000), T1f, T1e);
Chris@42 759 T1h = VADD(T1c, T1g);
Chris@42 760 T3B = VADD(T1e, T1f);
Chris@42 761 T2P = VSUB(T1g, T1c);
Chris@42 762 }
Chris@42 763 }
Chris@42 764 {
Chris@42 765 V T3E, T3M, T3I, T3J, T3H, T3K, T3N, T3L;
Chris@42 766 {
Chris@42 767 V T3A, T3D, T3F, T3G;
Chris@42 768 T3A = VSUB(T3y, T3z);
Chris@42 769 T3D = VSUB(T3B, T3C);
Chris@42 770 T3E = VBYI(VFMA(LDK(KP951056516), T3A, VMUL(LDK(KP587785252), T3D)));
Chris@42 771 T3M = VBYI(VFNMS(LDK(KP951056516), T3D, VMUL(LDK(KP587785252), T3A)));
Chris@42 772 T3I = VADD(T1Y, T1Z);
Chris@42 773 T3F = VADD(T3y, T3z);
Chris@42 774 T3G = VADD(T3B, T3C);
Chris@42 775 T3J = VADD(T3F, T3G);
Chris@42 776 T3H = VMUL(LDK(KP559016994), VSUB(T3F, T3G));
Chris@42 777 T3K = VFNMS(LDK(KP250000000), T3J, T3I);
Chris@42 778 }
Chris@42 779 ST(&(x[0]), VADD(T3I, T3J), ms, &(x[0]));
Chris@42 780 T3N = VSUB(T3K, T3H);
Chris@42 781 ST(&(x[WS(rs, 10)]), VADD(T3M, T3N), ms, &(x[0]));
Chris@42 782 ST(&(x[WS(rs, 15)]), VSUB(T3N, T3M), ms, &(x[WS(rs, 1)]));
Chris@42 783 T3L = VADD(T3H, T3K);
Chris@42 784 ST(&(x[WS(rs, 5)]), VADD(T3E, T3L), ms, &(x[WS(rs, 1)]));
Chris@42 785 ST(&(x[WS(rs, 20)]), VSUB(T3L, T3E), ms, &(x[0]));
Chris@42 786 }
Chris@42 787 {
Chris@42 788 V T2X, T3a, T3i, T3j, T3k, T3s, T3t, T3u, T3l, T3m, T3n, T3p, T3q, T3r, T2L;
Chris@42 789 V T3b, T32, T38, T2W, T35, T2Y, T34, T3w, T3x;
Chris@42 790 T2X = VSUB(T20, T1X);
Chris@42 791 T3a = VFNMS(LDK(KP951056516), T2f, T39);
Chris@42 792 T3i = VFMA(LDK(KP1_369094211), T2I, VMUL(LDK(KP728968627), T2J));
Chris@42 793 T3j = VFNMS(LDK(KP992114701), T2F, VMUL(LDK(KP250666467), T2G));
Chris@42 794 T3k = VADD(T3i, T3j);
Chris@42 795 T3s = VFNMS(LDK(KP125581039), T2Q, VMUL(LDK(KP998026728), T2P));
Chris@42 796 T3t = VFMA(LDK(KP1_274847979), T2T, VMUL(LDK(KP770513242), T2S));
Chris@42 797 T3u = VADD(T3s, T3t);
Chris@42 798 T3l = VFMA(LDK(KP1_996053456), T2Q, VMUL(LDK(KP062790519), T2P));
Chris@42 799 T3m = VFNMS(LDK(KP637423989), T2S, VMUL(LDK(KP1_541026485), T2T));
Chris@42 800 T3n = VADD(T3l, T3m);
Chris@42 801 T3p = VFNMS(LDK(KP1_457937254), T2I, VMUL(LDK(KP684547105), T2J));
Chris@42 802 T3q = VFMA(LDK(KP1_984229402), T2G, VMUL(LDK(KP125333233), T2F));
Chris@42 803 T3r = VADD(T3p, T3q);
Chris@42 804 {
Chris@42 805 V T2H, T2K, T36, T30, T31, T37;
Chris@42 806 T2H = VFNMS(LDK(KP851558583), T2G, VMUL(LDK(KP904827052), T2F));
Chris@42 807 T2K = VFMA(LDK(KP1_752613360), T2I, VMUL(LDK(KP481753674), T2J));
Chris@42 808 T36 = VADD(T2K, T2H);
Chris@42 809 T30 = VFMA(LDK(KP1_071653589), T2Q, VMUL(LDK(KP844327925), T2P));
Chris@42 810 T31 = VFMA(LDK(KP125581039), T2T, VMUL(LDK(KP998026728), T2S));
Chris@42 811 T37 = VADD(T30, T31);
Chris@42 812 T2L = VSUB(T2H, T2K);
Chris@42 813 T3b = VADD(T36, T37);
Chris@42 814 T32 = VSUB(T30, T31);
Chris@42 815 T38 = VMUL(LDK(KP559016994), VSUB(T36, T37));
Chris@42 816 }
Chris@42 817 {
Chris@42 818 V T2M, T2N, T2O, T2R, T2U, T2V;
Chris@42 819 T2M = VFNMS(LDK(KP963507348), T2I, VMUL(LDK(KP876306680), T2J));
Chris@42 820 T2N = VFMA(LDK(KP1_809654104), T2G, VMUL(LDK(KP425779291), T2F));
Chris@42 821 T2O = VSUB(T2M, T2N);
Chris@42 822 T2R = VFNMS(LDK(KP1_688655851), T2Q, VMUL(LDK(KP535826794), T2P));
Chris@42 823 T2U = VFNMS(LDK(KP1_996053456), T2T, VMUL(LDK(KP062790519), T2S));
Chris@42 824 T2V = VADD(T2R, T2U);
Chris@42 825 T2W = VMUL(LDK(KP559016994), VSUB(T2O, T2V));
Chris@42 826 T35 = VSUB(T2R, T2U);
Chris@42 827 T2Y = VADD(T2O, T2V);
Chris@42 828 T34 = VADD(T2M, T2N);
Chris@42 829 }
Chris@42 830 {
Chris@42 831 V T3g, T3h, T3o, T3v;
Chris@42 832 T3g = VADD(T2X, T2Y);
Chris@42 833 T3h = VBYI(VADD(T3a, T3b));
Chris@42 834 ST(&(x[WS(rs, 23)]), VSUB(T3g, T3h), ms, &(x[WS(rs, 1)]));
Chris@42 835 ST(&(x[WS(rs, 2)]), VADD(T3g, T3h), ms, &(x[0]));
Chris@42 836 T3o = VADD(T2X, VADD(T3k, T3n));
Chris@42 837 T3v = VBYI(VSUB(VADD(T3r, T3u), T3a));
Chris@42 838 ST(&(x[WS(rs, 22)]), VSUB(T3o, T3v), ms, &(x[0]));
Chris@42 839 ST(&(x[WS(rs, 3)]), VADD(T3o, T3v), ms, &(x[WS(rs, 1)]));
Chris@42 840 }
Chris@42 841 T3w = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T3i, T3j), VFMA(LDK(KP309016994), T3r, VFNMS(LDK(KP809016994), T3u, VMUL(LDK(KP587785252), VSUB(T3l, T3m))))), T3a));
Chris@42 842 T3x = VFMA(LDK(KP309016994), T3k, VFMA(LDK(KP951056516), VSUB(T3q, T3p), VFMA(LDK(KP587785252), VSUB(T3t, T3s), VFNMS(LDK(KP809016994), T3n, T2X))));
Chris@42 843 ST(&(x[WS(rs, 8)]), VADD(T3w, T3x), ms, &(x[0]));
Chris@42 844 ST(&(x[WS(rs, 17)]), VSUB(T3x, T3w), ms, &(x[WS(rs, 1)]));
Chris@42 845 {
Chris@42 846 V T33, T3e, T3d, T3f, T2Z, T3c;
Chris@42 847 T2Z = VFNMS(LDK(KP250000000), T2Y, T2X);
Chris@42 848 T33 = VFMA(LDK(KP951056516), T2L, VADD(T2W, VFNMS(LDK(KP587785252), T32, T2Z)));
Chris@42 849 T3e = VFMA(LDK(KP587785252), T2L, VFMA(LDK(KP951056516), T32, VSUB(T2Z, T2W)));
Chris@42 850 T3c = VFNMS(LDK(KP250000000), T3b, T3a);
Chris@42 851 T3d = VBYI(VADD(VFMA(LDK(KP951056516), T34, VMUL(LDK(KP587785252), T35)), VADD(T38, T3c)));
Chris@42 852 T3f = VBYI(VADD(VFNMS(LDK(KP951056516), T35, VMUL(LDK(KP587785252), T34)), VSUB(T3c, T38)));
Chris@42 853 ST(&(x[WS(rs, 18)]), VSUB(T33, T3d), ms, &(x[0]));
Chris@42 854 ST(&(x[WS(rs, 12)]), VADD(T3e, T3f), ms, &(x[0]));
Chris@42 855 ST(&(x[WS(rs, 7)]), VADD(T33, T3d), ms, &(x[WS(rs, 1)]));
Chris@42 856 ST(&(x[WS(rs, 13)]), VSUB(T3e, T3f), ms, &(x[WS(rs, 1)]));
Chris@42 857 }
Chris@42 858 }
Chris@42 859 {
Chris@42 860 V T21, T2h, T2p, T2q, T2r, T2z, T2A, T2B, T2s, T2t, T2u, T2w, T2x, T2y, TU;
Chris@42 861 V T2i, T26, T2c, T1K, T29, T22, T28, T2D, T2E;
Chris@42 862 T21 = VADD(T1X, T20);
Chris@42 863 T2h = VADD(T2e, T2g);
Chris@42 864 T2p = VFMA(LDK(KP1_688655851), TK, VMUL(LDK(KP535826794), TS));
Chris@42 865 T2q = VFMA(LDK(KP1_541026485), Tk, VMUL(LDK(KP637423989), Ts));
Chris@42 866 T2r = VSUB(T2p, T2q);
Chris@42 867 T2z = VFMA(LDK(KP851558583), T1k, VMUL(LDK(KP904827052), T1h));
Chris@42 868 T2A = VFMA(LDK(KP1_984229402), T1H, VMUL(LDK(KP125333233), T1E));
Chris@42 869 T2B = VADD(T2z, T2A);
Chris@42 870 T2s = VFNMS(LDK(KP425779291), T1h, VMUL(LDK(KP1_809654104), T1k));
Chris@42 871 T2t = VFNMS(LDK(KP992114701), T1E, VMUL(LDK(KP250666467), T1H));
Chris@42 872 T2u = VADD(T2s, T2t);
Chris@42 873 T2w = VFNMS(LDK(KP1_071653589), TK, VMUL(LDK(KP844327925), TS));
Chris@42 874 T2x = VFNMS(LDK(KP770513242), Ts, VMUL(LDK(KP1_274847979), Tk));
Chris@42 875 T2y = VADD(T2w, T2x);
Chris@42 876 {
Chris@42 877 V Tt, TT, T2a, T24, T25, T2b;
Chris@42 878 Tt = VFMA(LDK(KP1_071653589), Tk, VMUL(LDK(KP844327925), Ts));
Chris@42 879 TT = VFMA(LDK(KP1_937166322), TK, VMUL(LDK(KP248689887), TS));
Chris@42 880 T2a = VADD(TT, Tt);
Chris@42 881 T24 = VFMA(LDK(KP1_752613360), T1k, VMUL(LDK(KP481753674), T1h));
Chris@42 882 T25 = VFMA(LDK(KP1_457937254), T1H, VMUL(LDK(KP684547105), T1E));
Chris@42 883 T2b = VADD(T24, T25);
Chris@42 884 TU = VSUB(Tt, TT);
Chris@42 885 T2i = VADD(T2a, T2b);
Chris@42 886 T26 = VSUB(T24, T25);
Chris@42 887 T2c = VMUL(LDK(KP559016994), VSUB(T2a, T2b));
Chris@42 888 }
Chris@42 889 {
Chris@42 890 V TV, TW, TX, T1l, T1I, T1J;
Chris@42 891 TV = VFNMS(LDK(KP497379774), TK, VMUL(LDK(KP968583161), TS));
Chris@42 892 TW = VFNMS(LDK(KP1_688655851), Tk, VMUL(LDK(KP535826794), Ts));
Chris@42 893 TX = VADD(TV, TW);
Chris@42 894 T1l = VFNMS(LDK(KP963507348), T1k, VMUL(LDK(KP876306680), T1h));
Chris@42 895 T1I = VFNMS(LDK(KP1_369094211), T1H, VMUL(LDK(KP728968627), T1E));
Chris@42 896 T1J = VADD(T1l, T1I);
Chris@42 897 T1K = VMUL(LDK(KP559016994), VSUB(TX, T1J));
Chris@42 898 T29 = VSUB(T1l, T1I);
Chris@42 899 T22 = VADD(TX, T1J);
Chris@42 900 T28 = VSUB(TV, TW);
Chris@42 901 }
Chris@42 902 {
Chris@42 903 V T2n, T2o, T2v, T2C;
Chris@42 904 T2n = VADD(T21, T22);
Chris@42 905 T2o = VBYI(VADD(T2h, T2i));
Chris@42 906 ST(&(x[WS(rs, 24)]), VSUB(T2n, T2o), ms, &(x[0]));
Chris@42 907 ST(&(x[WS(rs, 1)]), VADD(T2n, T2o), ms, &(x[WS(rs, 1)]));
Chris@42 908 T2v = VADD(T21, VADD(T2r, T2u));
Chris@42 909 T2C = VBYI(VSUB(VADD(T2y, T2B), T2h));
Chris@42 910 ST(&(x[WS(rs, 21)]), VSUB(T2v, T2C), ms, &(x[WS(rs, 1)]));
Chris@42 911 ST(&(x[WS(rs, 4)]), VADD(T2v, T2C), ms, &(x[0]));
Chris@42 912 }
Chris@42 913 T2D = VBYI(VSUB(VFMA(LDK(KP309016994), T2y, VFMA(LDK(KP951056516), VADD(T2p, T2q), VFNMS(LDK(KP809016994), T2B, VMUL(LDK(KP587785252), VSUB(T2s, T2t))))), T2h));
Chris@42 914 T2E = VFMA(LDK(KP951056516), VSUB(T2x, T2w), VFMA(LDK(KP309016994), T2r, VFMA(LDK(KP587785252), VSUB(T2A, T2z), VFNMS(LDK(KP809016994), T2u, T21))));
Chris@42 915 ST(&(x[WS(rs, 9)]), VADD(T2D, T2E), ms, &(x[WS(rs, 1)]));
Chris@42 916 ST(&(x[WS(rs, 16)]), VSUB(T2E, T2D), ms, &(x[0]));
Chris@42 917 {
Chris@42 918 V T27, T2l, T2k, T2m, T23, T2j;
Chris@42 919 T23 = VFNMS(LDK(KP250000000), T22, T21);
Chris@42 920 T27 = VFMA(LDK(KP951056516), TU, VADD(T1K, VFNMS(LDK(KP587785252), T26, T23)));
Chris@42 921 T2l = VFMA(LDK(KP587785252), TU, VFMA(LDK(KP951056516), T26, VSUB(T23, T1K)));
Chris@42 922 T2j = VFNMS(LDK(KP250000000), T2i, T2h);
Chris@42 923 T2k = VBYI(VADD(VFMA(LDK(KP951056516), T28, VMUL(LDK(KP587785252), T29)), VADD(T2c, T2j)));
Chris@42 924 T2m = VBYI(VADD(VFNMS(LDK(KP951056516), T29, VMUL(LDK(KP587785252), T28)), VSUB(T2j, T2c)));
Chris@42 925 ST(&(x[WS(rs, 19)]), VSUB(T27, T2k), ms, &(x[WS(rs, 1)]));
Chris@42 926 ST(&(x[WS(rs, 11)]), VADD(T2l, T2m), ms, &(x[WS(rs, 1)]));
Chris@42 927 ST(&(x[WS(rs, 6)]), VADD(T27, T2k), ms, &(x[0]));
Chris@42 928 ST(&(x[WS(rs, 14)]), VSUB(T2l, T2m), ms, &(x[0]));
Chris@42 929 }
Chris@42 930 }
Chris@42 931 }
Chris@42 932 }
Chris@42 933 }
Chris@42 934 VLEAVE();
Chris@42 935 }
Chris@42 936
Chris@42 937 static const tw_instr twinstr[] = {
Chris@42 938 VTW(0, 1),
Chris@42 939 VTW(0, 3),
Chris@42 940 VTW(0, 9),
Chris@42 941 VTW(0, 24),
Chris@42 942 {TW_NEXT, VL, 0}
Chris@42 943 };
Chris@42 944
Chris@42 945 static const ct_desc desc = { 25, XSIMD_STRING("t3bv_25"), twinstr, &GENUS, {191, 151, 77, 0}, 0, 0, 0 };
Chris@42 946
Chris@42 947 void XSIMD(codelet_t3bv_25) (planner *p) {
Chris@42 948 X(kdft_dit_register) (p, t3bv_25, &desc);
Chris@42 949 }
Chris@42 950 #endif /* HAVE_FMA */