annotate src/fftw-3.3.8/dft/simd/common/t3bv_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:06:09 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twiddle_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3bv_25 -include dft/simd/t3b.h -sign 1 */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 268 FP additions, 281 FP multiplications,
Chris@82 32 * (or, 87 additions, 100 multiplications, 181 fused multiply/add),
Chris@82 33 * 171 stack variables, 67 constants, and 50 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/simd/t3b.h"
Chris@82 36
Chris@82 37 static void t3bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
Chris@82 40 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
Chris@82 41 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
Chris@82 42 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@82 43 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
Chris@82 44 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 45 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@82 46 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
Chris@82 47 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
Chris@82 48 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 49 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
Chris@82 50 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
Chris@82 51 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
Chris@82 52 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
Chris@82 53 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
Chris@82 54 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 55 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 56 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@82 57 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
Chris@82 58 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@82 59 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 60 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
Chris@82 61 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@82 62 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@82 63 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@82 64 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
Chris@82 65 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
Chris@82 66 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@82 67 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
Chris@82 68 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 69 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 70 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
Chris@82 71 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
Chris@82 72 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
Chris@82 73 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@82 74 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
Chris@82 75 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
Chris@82 76 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
Chris@82 77 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
Chris@82 78 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
Chris@82 79 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
Chris@82 80 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
Chris@82 81 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
Chris@82 82 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@82 83 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
Chris@82 84 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
Chris@82 85 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
Chris@82 86 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@82 87 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@82 88 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 89 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
Chris@82 90 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
Chris@82 91 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
Chris@82 92 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
Chris@82 93 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
Chris@82 94 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
Chris@82 95 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
Chris@82 96 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
Chris@82 97 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
Chris@82 98 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
Chris@82 99 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
Chris@82 100 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
Chris@82 101 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
Chris@82 102 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
Chris@82 103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 104 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 105 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 106 {
Chris@82 107 INT m;
Chris@82 108 R *x;
Chris@82 109 x = ii;
Chris@82 110 for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) {
Chris@82 111 V T2, T5, T3, T4, TC, Te, Tr, Ty, Tz, T1I, T1l, T6, T1e, T9, Ta;
Chris@82 112 V Tu, T1L, Th, T1E, T1o, TX, TD, T1h, TU;
Chris@82 113 T2 = LDW(&(W[0]));
Chris@82 114 T5 = LDW(&(W[TWVL * 4]));
Chris@82 115 T3 = LDW(&(W[TWVL * 2]));
Chris@82 116 T4 = VZMUL(T2, T3);
Chris@82 117 TC = VZMULJ(T2, T5);
Chris@82 118 Te = VZMUL(T2, T5);
Chris@82 119 Tr = VZMULJ(T3, T5);
Chris@82 120 Ty = VZMULJ(T2, T3);
Chris@82 121 Tz = VZMUL(Ty, T5);
Chris@82 122 T1I = VZMUL(T4, T5);
Chris@82 123 T1l = VZMUL(T3, T5);
Chris@82 124 T6 = VZMULJ(T4, T5);
Chris@82 125 T1e = VZMULJ(Ty, T5);
Chris@82 126 T9 = LDW(&(W[TWVL * 6]));
Chris@82 127 Ta = VZMULJ(T4, T9);
Chris@82 128 Tu = VZMULJ(T3, T9);
Chris@82 129 T1L = VZMULJ(Tr, T9);
Chris@82 130 Th = VZMULJ(T5, T9);
Chris@82 131 T1E = VZMULJ(T2, T9);
Chris@82 132 T1o = VZMULJ(T1e, T9);
Chris@82 133 TX = VZMULJ(Te, T9);
Chris@82 134 TD = VZMULJ(TC, T9);
Chris@82 135 T1h = VZMULJ(Ty, T9);
Chris@82 136 TU = VZMULJ(T6, T9);
Chris@82 137 {
Chris@82 138 V T1, Tn, Tl, Tm, T2c, T3l, T4e, T1V, T38, T1S, T39, T1W, T2v, T3z, T3f;
Chris@82 139 V T3a, T2D, T4a, TN, T32, TK, T31, TO, T2y, T3C, T3i, T33, T2G, T4b, T11;
Chris@82 140 V T2Z, T19, T2Y, T1a, T2z, T3D, T3h, T30, T2H, T4d, T1y, T35, T1v, T36, T1z;
Chris@82 141 V T2w, T3A, T3e, T37, T2E;
Chris@82 142 {
Chris@82 143 V Tg, Tj, Tk, T8, Tc, Td, T2a, T2b;
Chris@82 144 T1 = LD(&(x[0]), ms, &(x[0]));
Chris@82 145 {
Chris@82 146 V Tf, Ti, T7, Tb;
Chris@82 147 Tf = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Chris@82 148 Tg = VZMUL(Te, Tf);
Chris@82 149 Ti = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
Chris@82 150 Tj = VZMUL(Th, Ti);
Chris@82 151 Tk = VADD(Tg, Tj);
Chris@82 152 T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Chris@82 153 T8 = VZMUL(T6, T7);
Chris@82 154 Tb = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
Chris@82 155 Tc = VZMUL(Ta, Tb);
Chris@82 156 Td = VADD(T8, Tc);
Chris@82 157 }
Chris@82 158 Tn = VSUB(Td, Tk);
Chris@82 159 Tl = VADD(Td, Tk);
Chris@82 160 Tm = VFNMS(LDK(KP250000000), Tl, T1);
Chris@82 161 T2a = VSUB(T8, Tc);
Chris@82 162 T2b = VSUB(Tg, Tj);
Chris@82 163 T2c = VFMA(LDK(KP618033988), T2b, T2a);
Chris@82 164 T3l = VFNMS(LDK(KP618033988), T2a, T2b);
Chris@82 165 }
Chris@82 166 {
Chris@82 167 V T1B, T1T, T1U, T1H, T1O, T1P, T1A, T1Q, T1R;
Chris@82 168 T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Chris@82 169 T1B = VZMUL(T3, T1A);
Chris@82 170 {
Chris@82 171 V T1D, T1N, T1G, T1K;
Chris@82 172 {
Chris@82 173 V T1C, T1M, T1F, T1J;
Chris@82 174 T1C = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Chris@82 175 T1D = VZMUL(TC, T1C);
Chris@82 176 T1M = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
Chris@82 177 T1N = VZMUL(T1L, T1M);
Chris@82 178 T1F = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
Chris@82 179 T1G = VZMUL(T1E, T1F);
Chris@82 180 T1J = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
Chris@82 181 T1K = VZMUL(T1I, T1J);
Chris@82 182 }
Chris@82 183 T1T = VSUB(T1D, T1G);
Chris@82 184 T1U = VSUB(T1K, T1N);
Chris@82 185 T1H = VADD(T1D, T1G);
Chris@82 186 T1O = VADD(T1K, T1N);
Chris@82 187 T1P = VADD(T1H, T1O);
Chris@82 188 }
Chris@82 189 T4e = VADD(T1B, T1P);
Chris@82 190 T1V = VFMA(LDK(KP618033988), T1U, T1T);
Chris@82 191 T38 = VFNMS(LDK(KP618033988), T1T, T1U);
Chris@82 192 T1Q = VFNMS(LDK(KP250000000), T1P, T1B);
Chris@82 193 T1R = VSUB(T1O, T1H);
Chris@82 194 T1S = VFNMS(LDK(KP559016994), T1R, T1Q);
Chris@82 195 T39 = VFMA(LDK(KP559016994), T1R, T1Q);
Chris@82 196 T1W = VFNMS(LDK(KP893101515), T1V, T1S);
Chris@82 197 T2v = VFNMS(LDK(KP120146378), T1V, T1S);
Chris@82 198 T3z = VFMA(LDK(KP066152395), T39, T38);
Chris@82 199 T3f = VFNMS(LDK(KP786782374), T38, T39);
Chris@82 200 T3a = VFMA(LDK(KP869845200), T39, T38);
Chris@82 201 T2D = VFMA(LDK(KP132830569), T1S, T1V);
Chris@82 202 }
Chris@82 203 {
Chris@82 204 V Tq, TL, TM, Tx, TG, TH, Tp, TI, TJ;
Chris@82 205 Tp = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Chris@82 206 Tq = VZMUL(T2, Tp);
Chris@82 207 {
Chris@82 208 V Tt, TF, Tw, TB;
Chris@82 209 {
Chris@82 210 V Ts, TE, Tv, TA;
Chris@82 211 Ts = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Chris@82 212 Tt = VZMUL(Tr, Ts);
Chris@82 213 TE = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
Chris@82 214 TF = VZMUL(TD, TE);
Chris@82 215 Tv = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
Chris@82 216 Tw = VZMUL(Tu, Tv);
Chris@82 217 TA = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Chris@82 218 TB = VZMUL(Tz, TA);
Chris@82 219 }
Chris@82 220 TL = VSUB(Tt, Tw);
Chris@82 221 TM = VSUB(TF, TB);
Chris@82 222 Tx = VADD(Tt, Tw);
Chris@82 223 TG = VADD(TB, TF);
Chris@82 224 TH = VADD(Tx, TG);
Chris@82 225 }
Chris@82 226 T4a = VADD(Tq, TH);
Chris@82 227 TN = VFNMS(LDK(KP618033988), TM, TL);
Chris@82 228 T32 = VFMA(LDK(KP618033988), TL, TM);
Chris@82 229 TI = VFNMS(LDK(KP250000000), TH, Tq);
Chris@82 230 TJ = VSUB(Tx, TG);
Chris@82 231 TK = VFMA(LDK(KP559016994), TJ, TI);
Chris@82 232 T31 = VFNMS(LDK(KP559016994), TJ, TI);
Chris@82 233 TO = VFNMS(LDK(KP244189809), TN, TK);
Chris@82 234 T2y = VFMA(LDK(KP667278218), TK, TN);
Chris@82 235 T3C = VFNMS(LDK(KP522847744), T32, T31);
Chris@82 236 T3i = VFNMS(LDK(KP987388751), T31, T32);
Chris@82 237 T33 = VFMA(LDK(KP893101515), T32, T31);
Chris@82 238 T2G = VFNMS(LDK(KP603558818), TN, TK);
Chris@82 239 }
Chris@82 240 {
Chris@82 241 V T13, TT, T10, T14, T15, T16, T12, T17, T18;
Chris@82 242 T12 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Chris@82 243 T13 = VZMUL(T4, T12);
Chris@82 244 {
Chris@82 245 V TQ, TZ, TS, TW;
Chris@82 246 {
Chris@82 247 V TP, TY, TR, TV;
Chris@82 248 TP = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
Chris@82 249 TQ = VZMUL(T9, TP);
Chris@82 250 TY = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Chris@82 251 TZ = VZMUL(TX, TY);
Chris@82 252 TR = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Chris@82 253 TS = VZMUL(T5, TR);
Chris@82 254 TV = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
Chris@82 255 TW = VZMUL(TU, TV);
Chris@82 256 }
Chris@82 257 TT = VSUB(TQ, TS);
Chris@82 258 T10 = VSUB(TW, TZ);
Chris@82 259 T14 = VADD(TS, TQ);
Chris@82 260 T15 = VADD(TZ, TW);
Chris@82 261 T16 = VADD(T14, T15);
Chris@82 262 }
Chris@82 263 T4b = VADD(T13, T16);
Chris@82 264 T11 = VFMA(LDK(KP618033988), T10, TT);
Chris@82 265 T2Z = VFNMS(LDK(KP618033988), TT, T10);
Chris@82 266 T17 = VFMS(LDK(KP250000000), T16, T13);
Chris@82 267 T18 = VSUB(T14, T15);
Chris@82 268 T19 = VFNMS(LDK(KP559016994), T18, T17);
Chris@82 269 T2Y = VFMA(LDK(KP559016994), T18, T17);
Chris@82 270 T1a = VFNMS(LDK(KP667278218), T19, T11);
Chris@82 271 T2z = VFMA(LDK(KP869845200), T19, T11);
Chris@82 272 T3D = VFNMS(LDK(KP494780565), T2Y, T2Z);
Chris@82 273 T3h = VFNMS(LDK(KP132830569), T2Y, T2Z);
Chris@82 274 T30 = VFMA(LDK(KP120146378), T2Z, T2Y);
Chris@82 275 T2H = VFNMS(LDK(KP786782374), T11, T19);
Chris@82 276 }
Chris@82 277 {
Chris@82 278 V T1d, T1w, T1x, T1k, T1r, T1s, T1c, T1t, T1u;
Chris@82 279 T1c = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Chris@82 280 T1d = VZMUL(Ty, T1c);
Chris@82 281 {
Chris@82 282 V T1g, T1q, T1j, T1n;
Chris@82 283 {
Chris@82 284 V T1f, T1p, T1i, T1m;
Chris@82 285 T1f = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Chris@82 286 T1g = VZMUL(T1e, T1f);
Chris@82 287 T1p = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
Chris@82 288 T1q = VZMUL(T1o, T1p);
Chris@82 289 T1i = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
Chris@82 290 T1j = VZMUL(T1h, T1i);
Chris@82 291 T1m = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Chris@82 292 T1n = VZMUL(T1l, T1m);
Chris@82 293 }
Chris@82 294 T1w = VSUB(T1g, T1j);
Chris@82 295 T1x = VSUB(T1q, T1n);
Chris@82 296 T1k = VADD(T1g, T1j);
Chris@82 297 T1r = VADD(T1n, T1q);
Chris@82 298 T1s = VADD(T1k, T1r);
Chris@82 299 }
Chris@82 300 T4d = VADD(T1d, T1s);
Chris@82 301 T1y = VFNMS(LDK(KP618033988), T1x, T1w);
Chris@82 302 T35 = VFMA(LDK(KP618033988), T1w, T1x);
Chris@82 303 T1t = VFNMS(LDK(KP250000000), T1s, T1d);
Chris@82 304 T1u = VSUB(T1r, T1k);
Chris@82 305 T1v = VFNMS(LDK(KP559016994), T1u, T1t);
Chris@82 306 T36 = VFMA(LDK(KP559016994), T1u, T1t);
Chris@82 307 T1z = VFNMS(LDK(KP522847744), T1y, T1v);
Chris@82 308 T2w = VFNMS(LDK(KP494780565), T1v, T1y);
Chris@82 309 T3A = VFNMS(LDK(KP667278218), T36, T35);
Chris@82 310 T3e = VFNMS(LDK(KP059835404), T35, T36);
Chris@82 311 T37 = VFMA(LDK(KP066152395), T36, T35);
Chris@82 312 T2E = VFMA(LDK(KP447533225), T1y, T1v);
Chris@82 313 }
Chris@82 314 {
Chris@82 315 V T4m, T4o, T49, T4g, T4h, T4i, T4n, T4j;
Chris@82 316 {
Chris@82 317 V T4k, T4l, T4c, T4f;
Chris@82 318 T4k = VSUB(T4a, T4b);
Chris@82 319 T4l = VSUB(T4d, T4e);
Chris@82 320 T4m = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T4l, T4k));
Chris@82 321 T4o = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T4k, T4l));
Chris@82 322 T49 = VADD(T1, Tl);
Chris@82 323 T4c = VADD(T4a, T4b);
Chris@82 324 T4f = VADD(T4d, T4e);
Chris@82 325 T4g = VADD(T4c, T4f);
Chris@82 326 T4h = VFNMS(LDK(KP250000000), T4g, T49);
Chris@82 327 T4i = VSUB(T4c, T4f);
Chris@82 328 }
Chris@82 329 ST(&(x[0]), VADD(T4g, T49), ms, &(x[0]));
Chris@82 330 T4n = VFNMS(LDK(KP559016994), T4i, T4h);
Chris@82 331 ST(&(x[WS(rs, 10)]), VFNMSI(T4o, T4n), ms, &(x[0]));
Chris@82 332 ST(&(x[WS(rs, 15)]), VFMAI(T4o, T4n), ms, &(x[WS(rs, 1)]));
Chris@82 333 T4j = VFMA(LDK(KP559016994), T4i, T4h);
Chris@82 334 ST(&(x[WS(rs, 5)]), VFMAI(T4m, T4j), ms, &(x[WS(rs, 1)]));
Chris@82 335 ST(&(x[WS(rs, 20)]), VFNMSI(T4m, T4j), ms, &(x[0]));
Chris@82 336 }
Chris@82 337 {
Chris@82 338 V T3n, T3t, T3Z, T46, T3k, T3w, T3c, T3q, T2X, T3R, T3F, T3Q, T3N, T43, T3P;
Chris@82 339 V T3T, T40, T3X, T3Y;
Chris@82 340 T3n = VFMA(LDK(KP734762448), T3i, T3h);
Chris@82 341 T3t = VFNMS(LDK(KP734762448), T33, T30);
Chris@82 342 T3X = VFMA(LDK(KP845997307), T3A, T3z);
Chris@82 343 T3Y = VFMA(LDK(KP982009705), T3D, T3C);
Chris@82 344 T3Z = VFMA(LDK(KP570584518), T3Y, T3X);
Chris@82 345 T46 = VFNMS(LDK(KP669429328), T3X, T3Y);
Chris@82 346 {
Chris@82 347 V T3g, T3j, T3v, T3u;
Chris@82 348 T3g = VFMA(LDK(KP772036680), T3f, T3e);
Chris@82 349 T3j = VFNMS(LDK(KP734762448), T3i, T3h);
Chris@82 350 T3u = VFMA(LDK(KP772036680), T3a, T37);
Chris@82 351 T3v = VFMA(LDK(KP522616830), T3j, T3u);
Chris@82 352 T3k = VFMA(LDK(KP945422727), T3j, T3g);
Chris@82 353 T3w = VFNMS(LDK(KP690983005), T3v, T3g);
Chris@82 354 }
Chris@82 355 {
Chris@82 356 V T3b, T34, T3p, T3o;
Chris@82 357 T3b = VFNMS(LDK(KP772036680), T3a, T37);
Chris@82 358 T34 = VFMA(LDK(KP734762448), T33, T30);
Chris@82 359 T3o = VFNMS(LDK(KP772036680), T3f, T3e);
Chris@82 360 T3p = VFNMS(LDK(KP522616830), T34, T3o);
Chris@82 361 T3c = VFMA(LDK(KP956723877), T3b, T34);
Chris@82 362 T3q = VFMA(LDK(KP763932022), T3p, T3b);
Chris@82 363 }
Chris@82 364 {
Chris@82 365 V T3M, T3S, T3J, T3K, T3L;
Chris@82 366 T2X = VFNMS(LDK(KP559016994), Tn, Tm);
Chris@82 367 T3K = VFMA(LDK(KP447533225), T2Z, T2Y);
Chris@82 368 T3L = VFMA(LDK(KP578046249), T31, T32);
Chris@82 369 T3M = VFNMS(LDK(KP921078979), T3L, T3K);
Chris@82 370 T3R = VFMA(LDK(KP921078979), T3L, T3K);
Chris@82 371 {
Chris@82 372 V T3B, T3E, T3H, T3I;
Chris@82 373 T3B = VFNMS(LDK(KP845997307), T3A, T3z);
Chris@82 374 T3E = VFNMS(LDK(KP982009705), T3D, T3C);
Chris@82 375 T3F = VFMA(LDK(KP923225144), T3E, T3B);
Chris@82 376 T3S = VFNMS(LDK(KP923225144), T3E, T3B);
Chris@82 377 T3H = VFNMS(LDK(KP059835404), T38, T39);
Chris@82 378 T3I = VFMA(LDK(KP603558818), T35, T36);
Chris@82 379 T3J = VFMA(LDK(KP845997307), T3I, T3H);
Chris@82 380 T3Q = VFNMS(LDK(KP845997307), T3I, T3H);
Chris@82 381 }
Chris@82 382 T3N = VFNMS(LDK(KP906616052), T3M, T3J);
Chris@82 383 T43 = VFNMS(LDK(KP904508497), T3S, T3Q);
Chris@82 384 T3P = VFNMS(LDK(KP237294955), T3F, T2X);
Chris@82 385 T3T = VFNMS(LDK(KP997675361), T3S, T3R);
Chris@82 386 T40 = VFMA(LDK(KP906616052), T3M, T3J);
Chris@82 387 }
Chris@82 388 {
Chris@82 389 V T3d, T3m, T3G, T3O;
Chris@82 390 T3d = VFMA(LDK(KP992114701), T3c, T2X);
Chris@82 391 T3m = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T3l, T3k));
Chris@82 392 ST(&(x[WS(rs, 22)]), VFNMSI(T3m, T3d), ms, &(x[0]));
Chris@82 393 ST(&(x[WS(rs, 3)]), VFMAI(T3m, T3d), ms, &(x[WS(rs, 1)]));
Chris@82 394 T3G = VFMA(LDK(KP949179823), T3F, T2X);
Chris@82 395 T3O = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T3l, T3N));
Chris@82 396 ST(&(x[WS(rs, 23)]), VFNMSI(T3O, T3G), ms, &(x[WS(rs, 1)]));
Chris@82 397 ST(&(x[WS(rs, 2)]), VFMAI(T3O, T3G), ms, &(x[0]));
Chris@82 398 }
Chris@82 399 {
Chris@82 400 V T3s, T3y, T3r, T3x;
Chris@82 401 T3r = VFNMS(LDK(KP855719849), T3q, T3n);
Chris@82 402 T3s = VFMA(LDK(KP897376177), T3r, T2X);
Chris@82 403 T3x = VFMA(LDK(KP855719849), T3w, T3t);
Chris@82 404 T3y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T3x, T3l));
Chris@82 405 ST(&(x[WS(rs, 8)]), VFMAI(T3y, T3s), ms, &(x[0]));
Chris@82 406 ST(&(x[WS(rs, 17)]), VFNMSI(T3y, T3s), ms, &(x[WS(rs, 1)]));
Chris@82 407 }
Chris@82 408 {
Chris@82 409 V T3V, T45, T42, T48, T3U;
Chris@82 410 T3U = VFMA(LDK(KP560319534), T3T, T3Q);
Chris@82 411 T3V = VFNMS(LDK(KP949179823), T3U, T3P);
Chris@82 412 {
Chris@82 413 V T44, T3W, T47, T41;
Chris@82 414 T44 = VFNMS(LDK(KP681693190), T43, T3R);
Chris@82 415 T45 = VFNMS(LDK(KP860541664), T44, T3P);
Chris@82 416 T3W = VFMA(LDK(KP262346850), T3N, T3l);
Chris@82 417 T47 = VFNMS(LDK(KP669429328), T40, T46);
Chris@82 418 T41 = VFMA(LDK(KP618033988), T40, T3Z);
Chris@82 419 T42 = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T41, T3W));
Chris@82 420 T48 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T47, T3W));
Chris@82 421 }
Chris@82 422 ST(&(x[WS(rs, 12)]), VFNMSI(T42, T3V), ms, &(x[0]));
Chris@82 423 ST(&(x[WS(rs, 18)]), VFMAI(T48, T45), ms, &(x[0]));
Chris@82 424 ST(&(x[WS(rs, 13)]), VFMAI(T42, T3V), ms, &(x[WS(rs, 1)]));
Chris@82 425 ST(&(x[WS(rs, 7)]), VFNMSI(T48, T45), ms, &(x[WS(rs, 1)]));
Chris@82 426 }
Chris@82 427 }
Chris@82 428 {
Chris@82 429 V T2L, T2R, T2j, T2q, T2J, T2U, T2B, T2O, To, T26, T1Y, T22, T1Z, T2n, T27;
Chris@82 430 V T2f, T2k, T2h, T2i;
Chris@82 431 T2L = VFNMS(LDK(KP912575812), T2H, T2G);
Chris@82 432 T2R = VFNMS(LDK(KP912575812), T2z, T2y);
Chris@82 433 T2h = VFNMS(LDK(KP829049696), T1a, TO);
Chris@82 434 T2i = VFNMS(LDK(KP831864738), T1W, T1z);
Chris@82 435 T2j = VFMA(LDK(KP559154169), T2i, T2h);
Chris@82 436 T2q = VFNMS(LDK(KP683113946), T2h, T2i);
Chris@82 437 {
Chris@82 438 V T2F, T2I, T2T, T2S;
Chris@82 439 T2F = VFMA(LDK(KP958953096), T2E, T2D);
Chris@82 440 T2I = VFMA(LDK(KP912575812), T2H, T2G);
Chris@82 441 T2S = VFMA(LDK(KP867381224), T2w, T2v);
Chris@82 442 T2T = VFMA(LDK(KP447417479), T2I, T2S);
Chris@82 443 T2J = VFMA(LDK(KP894834959), T2I, T2F);
Chris@82 444 T2U = VFNMS(LDK(KP763932022), T2T, T2F);
Chris@82 445 }
Chris@82 446 {
Chris@82 447 V T2x, T2A, T2N, T2M;
Chris@82 448 T2x = VFNMS(LDK(KP867381224), T2w, T2v);
Chris@82 449 T2A = VFMA(LDK(KP912575812), T2z, T2y);
Chris@82 450 T2M = VFNMS(LDK(KP958953096), T2E, T2D);
Chris@82 451 T2N = VFMA(LDK(KP447417479), T2A, T2M);
Chris@82 452 T2B = VFNMS(LDK(KP809385824), T2A, T2x);
Chris@82 453 T2O = VFMA(LDK(KP690983005), T2N, T2x);
Chris@82 454 }
Chris@82 455 {
Chris@82 456 V T2e, T23, T2d, T24, T25;
Chris@82 457 To = VFMA(LDK(KP559016994), Tn, Tm);
Chris@82 458 T24 = VFMA(LDK(KP578046249), T1v, T1y);
Chris@82 459 T25 = VFMA(LDK(KP987388751), T1S, T1V);
Chris@82 460 T26 = VFNMS(LDK(KP831864738), T25, T24);
Chris@82 461 T2e = VFMA(LDK(KP831864738), T25, T24);
Chris@82 462 {
Chris@82 463 V T1b, T1X, T20, T21;
Chris@82 464 T1b = VFMA(LDK(KP829049696), T1a, TO);
Chris@82 465 T1X = VFMA(LDK(KP831864738), T1W, T1z);
Chris@82 466 T1Y = VFMA(LDK(KP904730450), T1X, T1b);
Chris@82 467 T23 = VFNMS(LDK(KP904730450), T1X, T1b);
Chris@82 468 T20 = VFMA(LDK(KP269969613), TK, TN);
Chris@82 469 T21 = VFMA(LDK(KP603558818), T11, T19);
Chris@82 470 T22 = VFMA(LDK(KP916574801), T21, T20);
Chris@82 471 T2d = VFNMS(LDK(KP916574801), T21, T20);
Chris@82 472 }
Chris@82 473 T1Z = VFNMS(LDK(KP242145790), T1Y, To);
Chris@82 474 T2n = VADD(T22, T23);
Chris@82 475 T27 = VFNMS(LDK(KP904730450), T26, T23);
Chris@82 476 T2f = VFMA(LDK(KP904730450), T2e, T2d);
Chris@82 477 T2k = VFNMS(LDK(KP904730450), T2e, T2d);
Chris@82 478 }
Chris@82 479 {
Chris@82 480 V T2t, T2u, T2C, T2K;
Chris@82 481 T2t = VFMA(LDK(KP968583161), T1Y, To);
Chris@82 482 T2u = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2f, T2c));
Chris@82 483 ST(&(x[WS(rs, 1)]), VFMAI(T2u, T2t), ms, &(x[WS(rs, 1)]));
Chris@82 484 ST(&(x[WS(rs, 24)]), VFNMSI(T2u, T2t), ms, &(x[0]));
Chris@82 485 T2C = VFNMS(LDK(KP992114701), T2B, To);
Chris@82 486 T2K = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2J, T2c));
Chris@82 487 ST(&(x[WS(rs, 4)]), VFNMSI(T2K, T2C), ms, &(x[0]));
Chris@82 488 ST(&(x[WS(rs, 21)]), VFMAI(T2K, T2C), ms, &(x[WS(rs, 1)]));
Chris@82 489 }
Chris@82 490 {
Chris@82 491 V T2Q, T2W, T2P, T2V;
Chris@82 492 T2P = VFNMS(LDK(KP999544308), T2O, T2L);
Chris@82 493 T2Q = VFNMS(LDK(KP803003575), T2P, To);
Chris@82 494 T2V = VFNMS(LDK(KP999544308), T2U, T2R);
Chris@82 495 T2W = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2V, T2c));
Chris@82 496 ST(&(x[WS(rs, 9)]), VFNMSI(T2W, T2Q), ms, &(x[WS(rs, 1)]));
Chris@82 497 ST(&(x[WS(rs, 16)]), VFMAI(T2W, T2Q), ms, &(x[0]));
Chris@82 498 }
Chris@82 499 {
Chris@82 500 V T29, T2p, T2m, T2s, T28;
Chris@82 501 T28 = VFNMS(LDK(KP618033988), T27, T22);
Chris@82 502 T29 = VFNMS(LDK(KP876091699), T28, T1Z);
Chris@82 503 {
Chris@82 504 V T2o, T2g, T2r, T2l;
Chris@82 505 T2o = VFNMS(LDK(KP683113946), T2n, T26);
Chris@82 506 T2p = VFMA(LDK(KP792626838), T2o, T1Z);
Chris@82 507 T2g = VFNMS(LDK(KP242145790), T2f, T2c);
Chris@82 508 T2r = VFMA(LDK(KP617882369), T2k, T2q);
Chris@82 509 T2l = VFMA(LDK(KP559016994), T2k, T2j);
Chris@82 510 T2m = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T2l, T2g));
Chris@82 511 T2s = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T2r, T2g));
Chris@82 512 }
Chris@82 513 ST(&(x[WS(rs, 6)]), VFMAI(T2m, T29), ms, &(x[0]));
Chris@82 514 ST(&(x[WS(rs, 14)]), VFNMSI(T2s, T2p), ms, &(x[0]));
Chris@82 515 ST(&(x[WS(rs, 19)]), VFNMSI(T2m, T29), ms, &(x[WS(rs, 1)]));
Chris@82 516 ST(&(x[WS(rs, 11)]), VFMAI(T2s, T2p), ms, &(x[WS(rs, 1)]));
Chris@82 517 }
Chris@82 518 }
Chris@82 519 }
Chris@82 520 }
Chris@82 521 }
Chris@82 522 VLEAVE();
Chris@82 523 }
Chris@82 524
Chris@82 525 static const tw_instr twinstr[] = {
Chris@82 526 VTW(0, 1),
Chris@82 527 VTW(0, 3),
Chris@82 528 VTW(0, 9),
Chris@82 529 VTW(0, 24),
Chris@82 530 {TW_NEXT, VL, 0}
Chris@82 531 };
Chris@82 532
Chris@82 533 static const ct_desc desc = { 25, XSIMD_STRING("t3bv_25"), twinstr, &GENUS, {87, 100, 181, 0}, 0, 0, 0 };
Chris@82 534
Chris@82 535 void XSIMD(codelet_t3bv_25) (planner *p) {
Chris@82 536 X(kdft_dit_register) (p, t3bv_25, &desc);
Chris@82 537 }
Chris@82 538 #else
Chris@82 539
Chris@82 540 /* Generated by: ../../../genfft/gen_twiddle_c.native -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -no-generate-bytw -n 25 -name t3bv_25 -include dft/simd/t3b.h -sign 1 */
Chris@82 541
Chris@82 542 /*
Chris@82 543 * This function contains 268 FP additions, 228 FP multiplications,
Chris@82 544 * (or, 191 additions, 151 multiplications, 77 fused multiply/add),
Chris@82 545 * 124 stack variables, 40 constants, and 50 memory accesses
Chris@82 546 */
Chris@82 547 #include "dft/simd/t3b.h"
Chris@82 548
Chris@82 549 static void t3bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
Chris@82 550 {
Chris@82 551 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
Chris@82 552 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 553 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 554 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@82 555 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@82 556 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
Chris@82 557 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
Chris@82 558 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 559 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 560 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@82 561 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@82 562 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 563 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 564 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
Chris@82 565 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 566 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@82 567 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
Chris@82 568 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 569 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@82 570 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
Chris@82 571 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
Chris@82 572 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 573 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@82 574 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
Chris@82 575 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 576 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@82 577 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@82 578 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
Chris@82 579 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
Chris@82 580 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 581 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 582 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
Chris@82 583 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 584 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
Chris@82 585 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
Chris@82 586 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
Chris@82 587 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 588 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 589 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 590 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 591 {
Chris@82 592 INT m;
Chris@82 593 R *x;
Chris@82 594 x = ii;
Chris@82 595 for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(25, rs)) {
Chris@82 596 V T1, Td, T8, T9, TF, Te, Tu, TB, TC, T1s, T15, Tf, TY, T4, Ta;
Chris@82 597 V Tx, T1T, Tg, T1N, T1v, T18, TG, T1o, T11;
Chris@82 598 T1 = LDW(&(W[TWVL * 4]));
Chris@82 599 Td = LDW(&(W[TWVL * 2]));
Chris@82 600 T8 = LDW(&(W[0]));
Chris@82 601 T9 = VZMUL(T8, T1);
Chris@82 602 TF = VZMULJ(T8, T1);
Chris@82 603 Te = VZMUL(T8, Td);
Chris@82 604 Tu = VZMULJ(Td, T1);
Chris@82 605 TB = VZMULJ(T8, Td);
Chris@82 606 TC = VZMUL(TB, T1);
Chris@82 607 T1s = VZMUL(Te, T1);
Chris@82 608 T15 = VZMUL(Td, T1);
Chris@82 609 Tf = VZMULJ(Te, T1);
Chris@82 610 TY = VZMULJ(TB, T1);
Chris@82 611 T4 = LDW(&(W[TWVL * 6]));
Chris@82 612 Ta = VZMULJ(T9, T4);
Chris@82 613 Tx = VZMULJ(Td, T4);
Chris@82 614 T1T = VZMULJ(T1, T4);
Chris@82 615 Tg = VZMULJ(Tf, T4);
Chris@82 616 T1N = VZMULJ(Te, T4);
Chris@82 617 T1v = VZMULJ(Tu, T4);
Chris@82 618 T18 = VZMULJ(TY, T4);
Chris@82 619 TG = VZMULJ(TF, T4);
Chris@82 620 T1o = VZMULJ(T8, T4);
Chris@82 621 T11 = VZMULJ(TB, T4);
Chris@82 622 {
Chris@82 623 V T1Y, T1X, T2f, T2g, T1Z, T20, T2e, T39, T1H, T2T, T1E, T3C, T2S, Tk, T2G;
Chris@82 624 V Ts, T3z, T2F, TK, T2I, TS, T3y, T2J, T1k, T2Q, T1h, T3B, T2P;
Chris@82 625 {
Chris@82 626 V T1S, T1V, T1W, T1M, T1P, T1Q, T2d;
Chris@82 627 T1Y = LD(&(x[0]), ms, &(x[0]));
Chris@82 628 {
Chris@82 629 V T1R, T1U, T1L, T1O;
Chris@82 630 T1R = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
Chris@82 631 T1S = VZMUL(T9, T1R);
Chris@82 632 T1U = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
Chris@82 633 T1V = VZMUL(T1T, T1U);
Chris@82 634 T1W = VADD(T1S, T1V);
Chris@82 635 T1L = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Chris@82 636 T1M = VZMUL(Tf, T1L);
Chris@82 637 T1O = LD(&(x[WS(rs, 20)]), ms, &(x[0]));
Chris@82 638 T1P = VZMUL(T1N, T1O);
Chris@82 639 T1Q = VADD(T1M, T1P);
Chris@82 640 }
Chris@82 641 T1X = VMUL(LDK(KP559016994), VSUB(T1Q, T1W));
Chris@82 642 T2f = VSUB(T1S, T1V);
Chris@82 643 T2g = VMUL(LDK(KP587785252), T2f);
Chris@82 644 T1Z = VADD(T1Q, T1W);
Chris@82 645 T20 = VFNMS(LDK(KP250000000), T1Z, T1Y);
Chris@82 646 T2d = VSUB(T1M, T1P);
Chris@82 647 T2e = VMUL(LDK(KP951056516), T2d);
Chris@82 648 T39 = VMUL(LDK(KP587785252), T2d);
Chris@82 649 }
Chris@82 650 {
Chris@82 651 V T1B, T1u, T1x, T1y, T1n, T1q, T1r, T1A;
Chris@82 652 T1A = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Chris@82 653 T1B = VZMUL(Td, T1A);
Chris@82 654 {
Chris@82 655 V T1t, T1w, T1m, T1p;
Chris@82 656 T1t = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
Chris@82 657 T1u = VZMUL(T1s, T1t);
Chris@82 658 T1w = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
Chris@82 659 T1x = VZMUL(T1v, T1w);
Chris@82 660 T1y = VADD(T1u, T1x);
Chris@82 661 T1m = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Chris@82 662 T1n = VZMUL(TF, T1m);
Chris@82 663 T1p = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));
Chris@82 664 T1q = VZMUL(T1o, T1p);
Chris@82 665 T1r = VADD(T1n, T1q);
Chris@82 666 }
Chris@82 667 {
Chris@82 668 V T1F, T1G, T1z, T1C, T1D;
Chris@82 669 T1F = VSUB(T1n, T1q);
Chris@82 670 T1G = VSUB(T1u, T1x);
Chris@82 671 T1H = VFMA(LDK(KP475528258), T1F, VMUL(LDK(KP293892626), T1G));
Chris@82 672 T2T = VFNMS(LDK(KP475528258), T1G, VMUL(LDK(KP293892626), T1F));
Chris@82 673 T1z = VMUL(LDK(KP559016994), VSUB(T1r, T1y));
Chris@82 674 T1C = VADD(T1r, T1y);
Chris@82 675 T1D = VFNMS(LDK(KP250000000), T1C, T1B);
Chris@82 676 T1E = VADD(T1z, T1D);
Chris@82 677 T3C = VADD(T1B, T1C);
Chris@82 678 T2S = VSUB(T1D, T1z);
Chris@82 679 }
Chris@82 680 }
Chris@82 681 {
Chris@82 682 V Tp, Tc, Ti, Tm, T3, T6, Tl, To;
Chris@82 683 To = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Chris@82 684 Tp = VZMUL(Te, To);
Chris@82 685 {
Chris@82 686 V Tb, Th, T2, T5;
Chris@82 687 Tb = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
Chris@82 688 Tc = VZMUL(Ta, Tb);
Chris@82 689 Th = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
Chris@82 690 Ti = VZMUL(Tg, Th);
Chris@82 691 Tm = VADD(Tc, Ti);
Chris@82 692 T2 = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Chris@82 693 T3 = VZMUL(T1, T2);
Chris@82 694 T5 = LD(&(x[WS(rs, 24)]), ms, &(x[0]));
Chris@82 695 T6 = VZMUL(T4, T5);
Chris@82 696 Tl = VADD(T3, T6);
Chris@82 697 }
Chris@82 698 {
Chris@82 699 V T7, Tj, Tn, Tq, Tr;
Chris@82 700 T7 = VSUB(T3, T6);
Chris@82 701 Tj = VSUB(Tc, Ti);
Chris@82 702 Tk = VFMA(LDK(KP475528258), T7, VMUL(LDK(KP293892626), Tj));
Chris@82 703 T2G = VFNMS(LDK(KP475528258), Tj, VMUL(LDK(KP293892626), T7));
Chris@82 704 Tn = VMUL(LDK(KP559016994), VSUB(Tl, Tm));
Chris@82 705 Tq = VADD(Tl, Tm);
Chris@82 706 Tr = VFNMS(LDK(KP250000000), Tq, Tp);
Chris@82 707 Ts = VADD(Tn, Tr);
Chris@82 708 T3z = VADD(Tp, Tq);
Chris@82 709 T2F = VSUB(Tr, Tn);
Chris@82 710 }
Chris@82 711 }
Chris@82 712 {
Chris@82 713 V TP, TE, TI, TM, Tw, Tz, TL, TO;
Chris@82 714 TO = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Chris@82 715 TP = VZMUL(T8, TO);
Chris@82 716 {
Chris@82 717 V TD, TH, Tv, Ty;
Chris@82 718 TD = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Chris@82 719 TE = VZMUL(TC, TD);
Chris@82 720 TH = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
Chris@82 721 TI = VZMUL(TG, TH);
Chris@82 722 TM = VADD(TE, TI);
Chris@82 723 Tv = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Chris@82 724 Tw = VZMUL(Tu, Tv);
Chris@82 725 Ty = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));
Chris@82 726 Tz = VZMUL(Tx, Ty);
Chris@82 727 TL = VADD(Tw, Tz);
Chris@82 728 }
Chris@82 729 {
Chris@82 730 V TA, TJ, TN, TQ, TR;
Chris@82 731 TA = VSUB(Tw, Tz);
Chris@82 732 TJ = VSUB(TE, TI);
Chris@82 733 TK = VFMA(LDK(KP475528258), TA, VMUL(LDK(KP293892626), TJ));
Chris@82 734 T2I = VFNMS(LDK(KP475528258), TJ, VMUL(LDK(KP293892626), TA));
Chris@82 735 TN = VMUL(LDK(KP559016994), VSUB(TL, TM));
Chris@82 736 TQ = VADD(TL, TM);
Chris@82 737 TR = VFNMS(LDK(KP250000000), TQ, TP);
Chris@82 738 TS = VADD(TN, TR);
Chris@82 739 T3y = VADD(TP, TQ);
Chris@82 740 T2J = VSUB(TR, TN);
Chris@82 741 }
Chris@82 742 }
Chris@82 743 {
Chris@82 744 V T1e, T17, T1a, T1b, T10, T13, T14, T1d;
Chris@82 745 T1d = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Chris@82 746 T1e = VZMUL(TB, T1d);
Chris@82 747 {
Chris@82 748 V T16, T19, TZ, T12;
Chris@82 749 T16 = LD(&(x[WS(rs, 12)]), ms, &(x[0]));
Chris@82 750 T17 = VZMUL(T15, T16);
Chris@82 751 T19 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
Chris@82 752 T1a = VZMUL(T18, T19);
Chris@82 753 T1b = VADD(T17, T1a);
Chris@82 754 TZ = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Chris@82 755 T10 = VZMUL(TY, TZ);
Chris@82 756 T12 = LD(&(x[WS(rs, 22)]), ms, &(x[0]));
Chris@82 757 T13 = VZMUL(T11, T12);
Chris@82 758 T14 = VADD(T10, T13);
Chris@82 759 }
Chris@82 760 {
Chris@82 761 V T1i, T1j, T1c, T1f, T1g;
Chris@82 762 T1i = VSUB(T10, T13);
Chris@82 763 T1j = VSUB(T17, T1a);
Chris@82 764 T1k = VFMA(LDK(KP475528258), T1i, VMUL(LDK(KP293892626), T1j));
Chris@82 765 T2Q = VFNMS(LDK(KP475528258), T1j, VMUL(LDK(KP293892626), T1i));
Chris@82 766 T1c = VMUL(LDK(KP559016994), VSUB(T14, T1b));
Chris@82 767 T1f = VADD(T14, T1b);
Chris@82 768 T1g = VFNMS(LDK(KP250000000), T1f, T1e);
Chris@82 769 T1h = VADD(T1c, T1g);
Chris@82 770 T3B = VADD(T1e, T1f);
Chris@82 771 T2P = VSUB(T1g, T1c);
Chris@82 772 }
Chris@82 773 }
Chris@82 774 {
Chris@82 775 V T3E, T3M, T3I, T3J, T3H, T3K, T3N, T3L;
Chris@82 776 {
Chris@82 777 V T3A, T3D, T3F, T3G;
Chris@82 778 T3A = VSUB(T3y, T3z);
Chris@82 779 T3D = VSUB(T3B, T3C);
Chris@82 780 T3E = VBYI(VFMA(LDK(KP951056516), T3A, VMUL(LDK(KP587785252), T3D)));
Chris@82 781 T3M = VBYI(VFNMS(LDK(KP951056516), T3D, VMUL(LDK(KP587785252), T3A)));
Chris@82 782 T3I = VADD(T1Y, T1Z);
Chris@82 783 T3F = VADD(T3y, T3z);
Chris@82 784 T3G = VADD(T3B, T3C);
Chris@82 785 T3J = VADD(T3F, T3G);
Chris@82 786 T3H = VMUL(LDK(KP559016994), VSUB(T3F, T3G));
Chris@82 787 T3K = VFNMS(LDK(KP250000000), T3J, T3I);
Chris@82 788 }
Chris@82 789 ST(&(x[0]), VADD(T3I, T3J), ms, &(x[0]));
Chris@82 790 T3N = VSUB(T3K, T3H);
Chris@82 791 ST(&(x[WS(rs, 10)]), VADD(T3M, T3N), ms, &(x[0]));
Chris@82 792 ST(&(x[WS(rs, 15)]), VSUB(T3N, T3M), ms, &(x[WS(rs, 1)]));
Chris@82 793 T3L = VADD(T3H, T3K);
Chris@82 794 ST(&(x[WS(rs, 5)]), VADD(T3E, T3L), ms, &(x[WS(rs, 1)]));
Chris@82 795 ST(&(x[WS(rs, 20)]), VSUB(T3L, T3E), ms, &(x[0]));
Chris@82 796 }
Chris@82 797 {
Chris@82 798 V T2X, T3a, T3i, T3j, T3k, T3s, T3t, T3u, T3l, T3m, T3n, T3p, T3q, T3r, T2L;
Chris@82 799 V T3b, T32, T38, T2W, T35, T2Y, T34, T3w, T3x;
Chris@82 800 T2X = VSUB(T20, T1X);
Chris@82 801 T3a = VFNMS(LDK(KP951056516), T2f, T39);
Chris@82 802 T3i = VFMA(LDK(KP1_369094211), T2I, VMUL(LDK(KP728968627), T2J));
Chris@82 803 T3j = VFNMS(LDK(KP992114701), T2F, VMUL(LDK(KP250666467), T2G));
Chris@82 804 T3k = VADD(T3i, T3j);
Chris@82 805 T3s = VFNMS(LDK(KP125581039), T2Q, VMUL(LDK(KP998026728), T2P));
Chris@82 806 T3t = VFMA(LDK(KP1_274847979), T2T, VMUL(LDK(KP770513242), T2S));
Chris@82 807 T3u = VADD(T3s, T3t);
Chris@82 808 T3l = VFMA(LDK(KP1_996053456), T2Q, VMUL(LDK(KP062790519), T2P));
Chris@82 809 T3m = VFNMS(LDK(KP637423989), T2S, VMUL(LDK(KP1_541026485), T2T));
Chris@82 810 T3n = VADD(T3l, T3m);
Chris@82 811 T3p = VFNMS(LDK(KP1_457937254), T2I, VMUL(LDK(KP684547105), T2J));
Chris@82 812 T3q = VFMA(LDK(KP1_984229402), T2G, VMUL(LDK(KP125333233), T2F));
Chris@82 813 T3r = VADD(T3p, T3q);
Chris@82 814 {
Chris@82 815 V T2H, T2K, T36, T30, T31, T37;
Chris@82 816 T2H = VFNMS(LDK(KP851558583), T2G, VMUL(LDK(KP904827052), T2F));
Chris@82 817 T2K = VFMA(LDK(KP1_752613360), T2I, VMUL(LDK(KP481753674), T2J));
Chris@82 818 T36 = VADD(T2K, T2H);
Chris@82 819 T30 = VFMA(LDK(KP1_071653589), T2Q, VMUL(LDK(KP844327925), T2P));
Chris@82 820 T31 = VFMA(LDK(KP125581039), T2T, VMUL(LDK(KP998026728), T2S));
Chris@82 821 T37 = VADD(T30, T31);
Chris@82 822 T2L = VSUB(T2H, T2K);
Chris@82 823 T3b = VADD(T36, T37);
Chris@82 824 T32 = VSUB(T30, T31);
Chris@82 825 T38 = VMUL(LDK(KP559016994), VSUB(T36, T37));
Chris@82 826 }
Chris@82 827 {
Chris@82 828 V T2M, T2N, T2O, T2R, T2U, T2V;
Chris@82 829 T2M = VFNMS(LDK(KP963507348), T2I, VMUL(LDK(KP876306680), T2J));
Chris@82 830 T2N = VFMA(LDK(KP1_809654104), T2G, VMUL(LDK(KP425779291), T2F));
Chris@82 831 T2O = VSUB(T2M, T2N);
Chris@82 832 T2R = VFNMS(LDK(KP1_688655851), T2Q, VMUL(LDK(KP535826794), T2P));
Chris@82 833 T2U = VFNMS(LDK(KP1_996053456), T2T, VMUL(LDK(KP062790519), T2S));
Chris@82 834 T2V = VADD(T2R, T2U);
Chris@82 835 T2W = VMUL(LDK(KP559016994), VSUB(T2O, T2V));
Chris@82 836 T35 = VSUB(T2R, T2U);
Chris@82 837 T2Y = VADD(T2O, T2V);
Chris@82 838 T34 = VADD(T2M, T2N);
Chris@82 839 }
Chris@82 840 {
Chris@82 841 V T3g, T3h, T3o, T3v;
Chris@82 842 T3g = VADD(T2X, T2Y);
Chris@82 843 T3h = VBYI(VADD(T3a, T3b));
Chris@82 844 ST(&(x[WS(rs, 23)]), VSUB(T3g, T3h), ms, &(x[WS(rs, 1)]));
Chris@82 845 ST(&(x[WS(rs, 2)]), VADD(T3g, T3h), ms, &(x[0]));
Chris@82 846 T3o = VADD(T2X, VADD(T3k, T3n));
Chris@82 847 T3v = VBYI(VSUB(VADD(T3r, T3u), T3a));
Chris@82 848 ST(&(x[WS(rs, 22)]), VSUB(T3o, T3v), ms, &(x[0]));
Chris@82 849 ST(&(x[WS(rs, 3)]), VADD(T3o, T3v), ms, &(x[WS(rs, 1)]));
Chris@82 850 }
Chris@82 851 T3w = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T3i, T3j), VFMA(LDK(KP309016994), T3r, VFNMS(LDK(KP809016994), T3u, VMUL(LDK(KP587785252), VSUB(T3l, T3m))))), T3a));
Chris@82 852 T3x = VFMA(LDK(KP309016994), T3k, VFMA(LDK(KP951056516), VSUB(T3q, T3p), VFMA(LDK(KP587785252), VSUB(T3t, T3s), VFNMS(LDK(KP809016994), T3n, T2X))));
Chris@82 853 ST(&(x[WS(rs, 8)]), VADD(T3w, T3x), ms, &(x[0]));
Chris@82 854 ST(&(x[WS(rs, 17)]), VSUB(T3x, T3w), ms, &(x[WS(rs, 1)]));
Chris@82 855 {
Chris@82 856 V T33, T3e, T3d, T3f, T2Z, T3c;
Chris@82 857 T2Z = VFNMS(LDK(KP250000000), T2Y, T2X);
Chris@82 858 T33 = VFMA(LDK(KP951056516), T2L, VADD(T2W, VFNMS(LDK(KP587785252), T32, T2Z)));
Chris@82 859 T3e = VFMA(LDK(KP587785252), T2L, VFMA(LDK(KP951056516), T32, VSUB(T2Z, T2W)));
Chris@82 860 T3c = VFNMS(LDK(KP250000000), T3b, T3a);
Chris@82 861 T3d = VBYI(VADD(VFMA(LDK(KP951056516), T34, VMUL(LDK(KP587785252), T35)), VADD(T38, T3c)));
Chris@82 862 T3f = VBYI(VADD(VFNMS(LDK(KP951056516), T35, VMUL(LDK(KP587785252), T34)), VSUB(T3c, T38)));
Chris@82 863 ST(&(x[WS(rs, 18)]), VSUB(T33, T3d), ms, &(x[0]));
Chris@82 864 ST(&(x[WS(rs, 12)]), VADD(T3e, T3f), ms, &(x[0]));
Chris@82 865 ST(&(x[WS(rs, 7)]), VADD(T33, T3d), ms, &(x[WS(rs, 1)]));
Chris@82 866 ST(&(x[WS(rs, 13)]), VSUB(T3e, T3f), ms, &(x[WS(rs, 1)]));
Chris@82 867 }
Chris@82 868 }
Chris@82 869 {
Chris@82 870 V T21, T2h, T2p, T2q, T2r, T2z, T2A, T2B, T2s, T2t, T2u, T2w, T2x, T2y, TU;
Chris@82 871 V T2i, T26, T2c, T1K, T29, T22, T28, T2D, T2E;
Chris@82 872 T21 = VADD(T1X, T20);
Chris@82 873 T2h = VADD(T2e, T2g);
Chris@82 874 T2p = VFMA(LDK(KP1_688655851), TK, VMUL(LDK(KP535826794), TS));
Chris@82 875 T2q = VFMA(LDK(KP1_541026485), Tk, VMUL(LDK(KP637423989), Ts));
Chris@82 876 T2r = VSUB(T2p, T2q);
Chris@82 877 T2z = VFMA(LDK(KP851558583), T1k, VMUL(LDK(KP904827052), T1h));
Chris@82 878 T2A = VFMA(LDK(KP1_984229402), T1H, VMUL(LDK(KP125333233), T1E));
Chris@82 879 T2B = VADD(T2z, T2A);
Chris@82 880 T2s = VFNMS(LDK(KP425779291), T1h, VMUL(LDK(KP1_809654104), T1k));
Chris@82 881 T2t = VFNMS(LDK(KP992114701), T1E, VMUL(LDK(KP250666467), T1H));
Chris@82 882 T2u = VADD(T2s, T2t);
Chris@82 883 T2w = VFNMS(LDK(KP1_071653589), TK, VMUL(LDK(KP844327925), TS));
Chris@82 884 T2x = VFNMS(LDK(KP770513242), Ts, VMUL(LDK(KP1_274847979), Tk));
Chris@82 885 T2y = VADD(T2w, T2x);
Chris@82 886 {
Chris@82 887 V Tt, TT, T2a, T24, T25, T2b;
Chris@82 888 Tt = VFMA(LDK(KP1_071653589), Tk, VMUL(LDK(KP844327925), Ts));
Chris@82 889 TT = VFMA(LDK(KP1_937166322), TK, VMUL(LDK(KP248689887), TS));
Chris@82 890 T2a = VADD(TT, Tt);
Chris@82 891 T24 = VFMA(LDK(KP1_752613360), T1k, VMUL(LDK(KP481753674), T1h));
Chris@82 892 T25 = VFMA(LDK(KP1_457937254), T1H, VMUL(LDK(KP684547105), T1E));
Chris@82 893 T2b = VADD(T24, T25);
Chris@82 894 TU = VSUB(Tt, TT);
Chris@82 895 T2i = VADD(T2a, T2b);
Chris@82 896 T26 = VSUB(T24, T25);
Chris@82 897 T2c = VMUL(LDK(KP559016994), VSUB(T2a, T2b));
Chris@82 898 }
Chris@82 899 {
Chris@82 900 V TV, TW, TX, T1l, T1I, T1J;
Chris@82 901 TV = VFNMS(LDK(KP497379774), TK, VMUL(LDK(KP968583161), TS));
Chris@82 902 TW = VFNMS(LDK(KP1_688655851), Tk, VMUL(LDK(KP535826794), Ts));
Chris@82 903 TX = VADD(TV, TW);
Chris@82 904 T1l = VFNMS(LDK(KP963507348), T1k, VMUL(LDK(KP876306680), T1h));
Chris@82 905 T1I = VFNMS(LDK(KP1_369094211), T1H, VMUL(LDK(KP728968627), T1E));
Chris@82 906 T1J = VADD(T1l, T1I);
Chris@82 907 T1K = VMUL(LDK(KP559016994), VSUB(TX, T1J));
Chris@82 908 T29 = VSUB(T1l, T1I);
Chris@82 909 T22 = VADD(TX, T1J);
Chris@82 910 T28 = VSUB(TV, TW);
Chris@82 911 }
Chris@82 912 {
Chris@82 913 V T2n, T2o, T2v, T2C;
Chris@82 914 T2n = VADD(T21, T22);
Chris@82 915 T2o = VBYI(VADD(T2h, T2i));
Chris@82 916 ST(&(x[WS(rs, 24)]), VSUB(T2n, T2o), ms, &(x[0]));
Chris@82 917 ST(&(x[WS(rs, 1)]), VADD(T2n, T2o), ms, &(x[WS(rs, 1)]));
Chris@82 918 T2v = VADD(T21, VADD(T2r, T2u));
Chris@82 919 T2C = VBYI(VSUB(VADD(T2y, T2B), T2h));
Chris@82 920 ST(&(x[WS(rs, 21)]), VSUB(T2v, T2C), ms, &(x[WS(rs, 1)]));
Chris@82 921 ST(&(x[WS(rs, 4)]), VADD(T2v, T2C), ms, &(x[0]));
Chris@82 922 }
Chris@82 923 T2D = VBYI(VSUB(VFMA(LDK(KP309016994), T2y, VFMA(LDK(KP951056516), VADD(T2p, T2q), VFNMS(LDK(KP809016994), T2B, VMUL(LDK(KP587785252), VSUB(T2s, T2t))))), T2h));
Chris@82 924 T2E = VFMA(LDK(KP951056516), VSUB(T2x, T2w), VFMA(LDK(KP309016994), T2r, VFMA(LDK(KP587785252), VSUB(T2A, T2z), VFNMS(LDK(KP809016994), T2u, T21))));
Chris@82 925 ST(&(x[WS(rs, 9)]), VADD(T2D, T2E), ms, &(x[WS(rs, 1)]));
Chris@82 926 ST(&(x[WS(rs, 16)]), VSUB(T2E, T2D), ms, &(x[0]));
Chris@82 927 {
Chris@82 928 V T27, T2l, T2k, T2m, T23, T2j;
Chris@82 929 T23 = VFNMS(LDK(KP250000000), T22, T21);
Chris@82 930 T27 = VFMA(LDK(KP951056516), TU, VADD(T1K, VFNMS(LDK(KP587785252), T26, T23)));
Chris@82 931 T2l = VFMA(LDK(KP587785252), TU, VFMA(LDK(KP951056516), T26, VSUB(T23, T1K)));
Chris@82 932 T2j = VFNMS(LDK(KP250000000), T2i, T2h);
Chris@82 933 T2k = VBYI(VADD(VFMA(LDK(KP951056516), T28, VMUL(LDK(KP587785252), T29)), VADD(T2c, T2j)));
Chris@82 934 T2m = VBYI(VADD(VFNMS(LDK(KP951056516), T29, VMUL(LDK(KP587785252), T28)), VSUB(T2j, T2c)));
Chris@82 935 ST(&(x[WS(rs, 19)]), VSUB(T27, T2k), ms, &(x[WS(rs, 1)]));
Chris@82 936 ST(&(x[WS(rs, 11)]), VADD(T2l, T2m), ms, &(x[WS(rs, 1)]));
Chris@82 937 ST(&(x[WS(rs, 6)]), VADD(T27, T2k), ms, &(x[0]));
Chris@82 938 ST(&(x[WS(rs, 14)]), VSUB(T2l, T2m), ms, &(x[0]));
Chris@82 939 }
Chris@82 940 }
Chris@82 941 }
Chris@82 942 }
Chris@82 943 }
Chris@82 944 VLEAVE();
Chris@82 945 }
Chris@82 946
Chris@82 947 static const tw_instr twinstr[] = {
Chris@82 948 VTW(0, 1),
Chris@82 949 VTW(0, 3),
Chris@82 950 VTW(0, 9),
Chris@82 951 VTW(0, 24),
Chris@82 952 {TW_NEXT, VL, 0}
Chris@82 953 };
Chris@82 954
Chris@82 955 static const ct_desc desc = { 25, XSIMD_STRING("t3bv_25"), twinstr, &GENUS, {191, 151, 77, 0}, 0, 0, 0 };
Chris@82 956
Chris@82 957 void XSIMD(codelet_t3bv_25) (planner *p) {
Chris@82 958 X(kdft_dit_register) (p, t3bv_25, &desc);
Chris@82 959 }
Chris@82 960 #endif