annotate src/fftw-3.3.8/dft/simd/common/n1bv_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:05:03 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 25 -name n1bv_25 -include dft/simd/n1b.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 224 FP additions, 193 FP multiplications,
Chris@82 32 * (or, 43 additions, 12 multiplications, 181 fused multiply/add),
Chris@82 33 * 140 stack variables, 67 constants, and 50 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/simd/n1b.h"
Chris@82 36
Chris@82 37 static void n1bv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@82 40 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
Chris@82 41 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
Chris@82 42 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
Chris@82 43 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 44 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
Chris@82 45 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@82 46 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 47 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 48 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
Chris@82 49 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 50 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@82 51 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
Chris@82 52 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
Chris@82 53 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
Chris@82 54 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
Chris@82 55 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
Chris@82 56 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
Chris@82 57 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@82 58 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
Chris@82 59 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
Chris@82 60 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@82 61 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@82 62 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 63 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@82 64 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@82 65 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
Chris@82 66 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 67 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 68 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
Chris@82 69 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
Chris@82 70 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
Chris@82 71 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
Chris@82 72 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
Chris@82 73 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
Chris@82 74 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
Chris@82 75 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
Chris@82 76 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@82 77 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
Chris@82 78 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@82 79 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
Chris@82 80 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
Chris@82 81 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
Chris@82 82 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
Chris@82 83 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
Chris@82 84 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@82 85 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@82 86 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 87 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
Chris@82 88 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
Chris@82 89 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
Chris@82 90 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
Chris@82 91 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
Chris@82 92 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
Chris@82 93 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
Chris@82 94 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
Chris@82 95 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
Chris@82 96 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
Chris@82 97 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
Chris@82 98 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
Chris@82 99 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
Chris@82 100 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
Chris@82 101 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
Chris@82 102 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
Chris@82 103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 104 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 105 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 106 {
Chris@82 107 INT i;
Chris@82 108 const R *xi;
Chris@82 109 R *xo;
Chris@82 110 xi = ii;
Chris@82 111 xo = io;
Chris@82 112 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) {
Chris@82 113 V Ta, T2z, T1q, T9, T3n, T3r, T3s, T3t, T1a, T2N, T2V, T1j, T1J, T2o, T2t;
Chris@82 114 V T1R, TV, T2O, T2W, T1i, T1K, T2l, T2s, T1S, T3o, T3p, T3q, TF, T2R, T2Y;
Chris@82 115 V T1f, T1N, T2e, T2v, T1V, Tq, T2Q, T2Z, T1e, T1M, T2h, T2w, T1U;
Chris@82 116 {
Chris@82 117 V T1, T7, T1p, T4, T1o, T8;
Chris@82 118 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 119 {
Chris@82 120 V T5, T6, T2, T3;
Chris@82 121 T5 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 122 T6 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
Chris@82 123 T7 = VADD(T5, T6);
Chris@82 124 T1p = VSUB(T5, T6);
Chris@82 125 T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 126 T3 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
Chris@82 127 T4 = VADD(T2, T3);
Chris@82 128 T1o = VSUB(T2, T3);
Chris@82 129 }
Chris@82 130 Ta = VSUB(T4, T7);
Chris@82 131 T2z = VFNMS(LDK(KP618033988), T1o, T1p);
Chris@82 132 T1q = VFMA(LDK(KP618033988), T1p, T1o);
Chris@82 133 T8 = VADD(T4, T7);
Chris@82 134 T9 = VFNMS(LDK(KP250000000), T8, T1);
Chris@82 135 T3n = VADD(T1, T8);
Chris@82 136 }
Chris@82 137 {
Chris@82 138 V TH, TW, TO, TT, TQ, TS, T13, T18, T15, T17;
Chris@82 139 TH = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 140 TW = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 141 {
Chris@82 142 V TI, TJ, TK, TL, TM, TN;
Chris@82 143 TI = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 144 TJ = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
Chris@82 145 TK = VADD(TI, TJ);
Chris@82 146 TL = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 147 TM = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
Chris@82 148 TN = VADD(TL, TM);
Chris@82 149 TO = VADD(TK, TN);
Chris@82 150 TT = VSUB(TM, TL);
Chris@82 151 TQ = VSUB(TN, TK);
Chris@82 152 TS = VSUB(TI, TJ);
Chris@82 153 }
Chris@82 154 {
Chris@82 155 V TX, TY, TZ, T10, T11, T12;
Chris@82 156 TX = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 157 TY = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
Chris@82 158 TZ = VADD(TX, TY);
Chris@82 159 T10 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 160 T11 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Chris@82 161 T12 = VADD(T10, T11);
Chris@82 162 T13 = VADD(TZ, T12);
Chris@82 163 T18 = VSUB(T10, T11);
Chris@82 164 T15 = VSUB(T12, TZ);
Chris@82 165 T17 = VSUB(TX, TY);
Chris@82 166 }
Chris@82 167 T3r = VADD(TH, TO);
Chris@82 168 T3s = VADD(TW, T13);
Chris@82 169 T3t = VADD(T3r, T3s);
Chris@82 170 {
Chris@82 171 V T19, T2m, T16, T2n, T14;
Chris@82 172 T19 = VFMA(LDK(KP618033988), T18, T17);
Chris@82 173 T2m = VFNMS(LDK(KP618033988), T17, T18);
Chris@82 174 T14 = VFNMS(LDK(KP250000000), T13, TW);
Chris@82 175 T16 = VFNMS(LDK(KP559016994), T15, T14);
Chris@82 176 T2n = VFMA(LDK(KP559016994), T15, T14);
Chris@82 177 T1a = VFNMS(LDK(KP893101515), T19, T16);
Chris@82 178 T2N = VFMA(LDK(KP066152395), T2n, T2m);
Chris@82 179 T2V = VFNMS(LDK(KP059835404), T2m, T2n);
Chris@82 180 T1j = VFMA(LDK(KP987388751), T16, T19);
Chris@82 181 T1J = VFNMS(LDK(KP120146378), T19, T16);
Chris@82 182 T2o = VFMA(LDK(KP869845200), T2n, T2m);
Chris@82 183 T2t = VFNMS(LDK(KP786782374), T2m, T2n);
Chris@82 184 T1R = VFMA(LDK(KP132830569), T16, T19);
Chris@82 185 }
Chris@82 186 {
Chris@82 187 V TU, T2j, TR, T2k, TP;
Chris@82 188 TU = VFNMS(LDK(KP618033988), TT, TS);
Chris@82 189 T2j = VFMA(LDK(KP618033988), TS, TT);
Chris@82 190 TP = VFNMS(LDK(KP250000000), TO, TH);
Chris@82 191 TR = VFNMS(LDK(KP559016994), TQ, TP);
Chris@82 192 T2k = VFMA(LDK(KP559016994), TQ, TP);
Chris@82 193 TV = VFNMS(LDK(KP522847744), TU, TR);
Chris@82 194 T2O = VFNMS(LDK(KP667278218), T2k, T2j);
Chris@82 195 T2W = VFMA(LDK(KP603558818), T2j, T2k);
Chris@82 196 T1i = VFMA(LDK(KP578046249), TR, TU);
Chris@82 197 T1K = VFNMS(LDK(KP494780565), TR, TU);
Chris@82 198 T2l = VFMA(LDK(KP066152395), T2k, T2j);
Chris@82 199 T2s = VFNMS(LDK(KP059835404), T2j, T2k);
Chris@82 200 T1S = VFMA(LDK(KP447533225), TU, TR);
Chris@82 201 }
Chris@82 202 }
Chris@82 203 {
Chris@82 204 V Tc, Ty, Tj, To, Tl, Tn, Tt, TD, Tw, TB;
Chris@82 205 Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 206 Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 207 {
Chris@82 208 V Td, Te, Tf, Tg, Th, Ti;
Chris@82 209 Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 210 Te = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
Chris@82 211 Tf = VADD(Td, Te);
Chris@82 212 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 213 Th = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
Chris@82 214 Ti = VADD(Tg, Th);
Chris@82 215 Tj = VADD(Tf, Ti);
Chris@82 216 To = VSUB(Th, Tg);
Chris@82 217 Tl = VSUB(Tf, Ti);
Chris@82 218 Tn = VSUB(Td, Te);
Chris@82 219 }
Chris@82 220 {
Chris@82 221 V Tr, Ts, Tz, Tu, Tv, TA;
Chris@82 222 Tr = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
Chris@82 223 Ts = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 224 Tz = VADD(Ts, Tr);
Chris@82 225 Tu = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Chris@82 226 Tv = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@82 227 TA = VADD(Tv, Tu);
Chris@82 228 Tt = VSUB(Tr, Ts);
Chris@82 229 TD = VSUB(Tz, TA);
Chris@82 230 Tw = VSUB(Tu, Tv);
Chris@82 231 TB = VADD(Tz, TA);
Chris@82 232 }
Chris@82 233 T3o = VADD(Tc, Tj);
Chris@82 234 T3p = VADD(Ty, TB);
Chris@82 235 T3q = VADD(T3o, T3p);
Chris@82 236 {
Chris@82 237 V Tx, T2d, TE, T2c, TC;
Chris@82 238 Tx = VFMA(LDK(KP618033988), Tw, Tt);
Chris@82 239 T2d = VFNMS(LDK(KP618033988), Tt, Tw);
Chris@82 240 TC = VFMS(LDK(KP250000000), TB, Ty);
Chris@82 241 TE = VFNMS(LDK(KP559016994), TD, TC);
Chris@82 242 T2c = VFMA(LDK(KP559016994), TD, TC);
Chris@82 243 TF = VFNMS(LDK(KP667278218), TE, Tx);
Chris@82 244 T2R = VFNMS(LDK(KP494780565), T2c, T2d);
Chris@82 245 T2Y = VFMA(LDK(KP447533225), T2d, T2c);
Chris@82 246 T1f = VFMA(LDK(KP603558818), Tx, TE);
Chris@82 247 T1N = VFMA(LDK(KP869845200), TE, Tx);
Chris@82 248 T2e = VFMA(LDK(KP120146378), T2d, T2c);
Chris@82 249 T2v = VFNMS(LDK(KP132830569), T2c, T2d);
Chris@82 250 T1V = VFNMS(LDK(KP786782374), Tx, TE);
Chris@82 251 }
Chris@82 252 {
Chris@82 253 V Tp, T2g, Tm, T2f, Tk;
Chris@82 254 Tp = VFNMS(LDK(KP618033988), To, Tn);
Chris@82 255 T2g = VFMA(LDK(KP618033988), Tn, To);
Chris@82 256 Tk = VFNMS(LDK(KP250000000), Tj, Tc);
Chris@82 257 Tm = VFMA(LDK(KP559016994), Tl, Tk);
Chris@82 258 T2f = VFNMS(LDK(KP559016994), Tl, Tk);
Chris@82 259 Tq = VFNMS(LDK(KP244189809), Tp, Tm);
Chris@82 260 T2Q = VFNMS(LDK(KP522847744), T2g, T2f);
Chris@82 261 T2Z = VFMA(LDK(KP578046249), T2f, T2g);
Chris@82 262 T1e = VFMA(LDK(KP269969613), Tm, Tp);
Chris@82 263 T1M = VFMA(LDK(KP667278218), Tm, Tp);
Chris@82 264 T2h = VFMA(LDK(KP893101515), T2g, T2f);
Chris@82 265 T2w = VFNMS(LDK(KP987388751), T2f, T2g);
Chris@82 266 T1U = VFNMS(LDK(KP603558818), Tp, Tm);
Chris@82 267 }
Chris@82 268 }
Chris@82 269 {
Chris@82 270 V T3w, T3u, T3v, T3A, T3C, T3y, T3z, T3B, T3x;
Chris@82 271 T3w = VSUB(T3q, T3t);
Chris@82 272 T3u = VADD(T3q, T3t);
Chris@82 273 T3v = VFNMS(LDK(KP250000000), T3u, T3n);
Chris@82 274 T3y = VSUB(T3o, T3p);
Chris@82 275 T3z = VSUB(T3r, T3s);
Chris@82 276 T3A = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3z, T3y));
Chris@82 277 T3C = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3y, T3z));
Chris@82 278 ST(&(xo[0]), VADD(T3u, T3n), ovs, &(xo[0]));
Chris@82 279 T3B = VFNMS(LDK(KP559016994), T3w, T3v);
Chris@82 280 ST(&(xo[WS(os, 10)]), VFNMSI(T3C, T3B), ovs, &(xo[0]));
Chris@82 281 ST(&(xo[WS(os, 15)]), VFMAI(T3C, T3B), ovs, &(xo[WS(os, 1)]));
Chris@82 282 T3x = VFMA(LDK(KP559016994), T3w, T3v);
Chris@82 283 ST(&(xo[WS(os, 5)]), VFMAI(T3A, T3x), ovs, &(xo[WS(os, 1)]));
Chris@82 284 ST(&(xo[WS(os, 20)]), VFNMSI(T3A, T3x), ovs, &(xo[0]));
Chris@82 285 }
Chris@82 286 {
Chris@82 287 V T2B, T2H, T2q, T2E, T2y, T2K, T31, T3a, T3l, T3f, T2b, T35, T34, T2T, T33;
Chris@82 288 V T3h, T37;
Chris@82 289 T2B = VFMA(LDK(KP734762448), T2w, T2v);
Chris@82 290 T2H = VFNMS(LDK(KP734762448), T2h, T2e);
Chris@82 291 {
Chris@82 292 V T2p, T2i, T2D, T2C;
Chris@82 293 T2p = VFNMS(LDK(KP772036680), T2o, T2l);
Chris@82 294 T2i = VFMA(LDK(KP734762448), T2h, T2e);
Chris@82 295 T2C = VFNMS(LDK(KP772036680), T2t, T2s);
Chris@82 296 T2D = VFNMS(LDK(KP522616830), T2i, T2C);
Chris@82 297 T2q = VFMA(LDK(KP956723877), T2p, T2i);
Chris@82 298 T2E = VFMA(LDK(KP763932022), T2D, T2p);
Chris@82 299 }
Chris@82 300 {
Chris@82 301 V T2u, T2x, T2J, T2I;
Chris@82 302 T2u = VFMA(LDK(KP772036680), T2t, T2s);
Chris@82 303 T2x = VFNMS(LDK(KP734762448), T2w, T2v);
Chris@82 304 T2I = VFMA(LDK(KP772036680), T2o, T2l);
Chris@82 305 T2J = VFMA(LDK(KP522616830), T2x, T2I);
Chris@82 306 T2y = VFMA(LDK(KP945422727), T2x, T2u);
Chris@82 307 T2K = VFNMS(LDK(KP690983005), T2J, T2u);
Chris@82 308 }
Chris@82 309 {
Chris@82 310 V T3e, T3d, T3k, T36, T2P, T2S;
Chris@82 311 {
Chris@82 312 V T2X, T30, T3b, T3c;
Chris@82 313 T2X = VFMA(LDK(KP845997307), T2W, T2V);
Chris@82 314 T30 = VFNMS(LDK(KP921078979), T2Z, T2Y);
Chris@82 315 T31 = VFNMS(LDK(KP906616052), T30, T2X);
Chris@82 316 T3e = VFMA(LDK(KP906616052), T30, T2X);
Chris@82 317 T3b = VFMA(LDK(KP845997307), T2O, T2N);
Chris@82 318 T3c = VFMA(LDK(KP982009705), T2R, T2Q);
Chris@82 319 T3d = VFMA(LDK(KP570584518), T3c, T3b);
Chris@82 320 T3k = VFNMS(LDK(KP669429328), T3b, T3c);
Chris@82 321 }
Chris@82 322 T3a = VFMA(LDK(KP262346850), T31, T2z);
Chris@82 323 T3l = VFNMS(LDK(KP669429328), T3e, T3k);
Chris@82 324 T3f = VFMA(LDK(KP618033988), T3e, T3d);
Chris@82 325 T2b = VFNMS(LDK(KP559016994), Ta, T9);
Chris@82 326 T35 = VFMA(LDK(KP921078979), T2Z, T2Y);
Chris@82 327 T34 = VFNMS(LDK(KP845997307), T2W, T2V);
Chris@82 328 T2P = VFNMS(LDK(KP845997307), T2O, T2N);
Chris@82 329 T2S = VFNMS(LDK(KP982009705), T2R, T2Q);
Chris@82 330 T2T = VFMA(LDK(KP923225144), T2S, T2P);
Chris@82 331 T36 = VFNMS(LDK(KP923225144), T2S, T2P);
Chris@82 332 T33 = VFNMS(LDK(KP237294955), T2T, T2b);
Chris@82 333 T3h = VFNMS(LDK(KP904508497), T36, T34);
Chris@82 334 T37 = VFNMS(LDK(KP997675361), T36, T35);
Chris@82 335 }
Chris@82 336 {
Chris@82 337 V T2r, T2A, T2U, T32;
Chris@82 338 T2r = VFMA(LDK(KP992114701), T2q, T2b);
Chris@82 339 T2A = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2z, T2y));
Chris@82 340 ST(&(xo[WS(os, 22)]), VFNMSI(T2A, T2r), ovs, &(xo[0]));
Chris@82 341 ST(&(xo[WS(os, 3)]), VFMAI(T2A, T2r), ovs, &(xo[WS(os, 1)]));
Chris@82 342 T2U = VFMA(LDK(KP949179823), T2T, T2b);
Chris@82 343 T32 = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2z, T31));
Chris@82 344 ST(&(xo[WS(os, 23)]), VFNMSI(T32, T2U), ovs, &(xo[WS(os, 1)]));
Chris@82 345 ST(&(xo[WS(os, 2)]), VFMAI(T32, T2U), ovs, &(xo[0]));
Chris@82 346 }
Chris@82 347 {
Chris@82 348 V T3g, T39, T38, T3m, T3j, T3i;
Chris@82 349 T3g = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3f, T3a));
Chris@82 350 T38 = VFMA(LDK(KP560319534), T37, T34);
Chris@82 351 T39 = VFNMS(LDK(KP949179823), T38, T33);
Chris@82 352 ST(&(xo[WS(os, 12)]), VFNMSI(T3g, T39), ovs, &(xo[0]));
Chris@82 353 ST(&(xo[WS(os, 13)]), VFMAI(T3g, T39), ovs, &(xo[WS(os, 1)]));
Chris@82 354 T3m = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3l, T3a));
Chris@82 355 T3i = VFNMS(LDK(KP681693190), T3h, T35);
Chris@82 356 T3j = VFNMS(LDK(KP860541664), T3i, T33);
Chris@82 357 ST(&(xo[WS(os, 7)]), VFNMSI(T3m, T3j), ovs, &(xo[WS(os, 1)]));
Chris@82 358 ST(&(xo[WS(os, 18)]), VFMAI(T3m, T3j), ovs, &(xo[0]));
Chris@82 359 {
Chris@82 360 V T2G, T2M, T2F, T2L;
Chris@82 361 T2F = VFNMS(LDK(KP855719849), T2E, T2B);
Chris@82 362 T2G = VFMA(LDK(KP897376177), T2F, T2b);
Chris@82 363 T2L = VFMA(LDK(KP855719849), T2K, T2H);
Chris@82 364 T2M = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2L, T2z));
Chris@82 365 ST(&(xo[WS(os, 8)]), VFMAI(T2M, T2G), ovs, &(xo[0]));
Chris@82 366 ST(&(xo[WS(os, 17)]), VFNMSI(T2M, T2G), ovs, &(xo[WS(os, 1)]));
Chris@82 367 }
Chris@82 368 }
Chris@82 369 }
Chris@82 370 {
Chris@82 371 V T1Z, T25, T1P, T22, T1X, T28, T1t, T1u, T1F, T1z, Tb, T1k, T1g, T1c, T1d;
Chris@82 372 V T1B, T1l;
Chris@82 373 T1Z = VFNMS(LDK(KP912575812), T1V, T1U);
Chris@82 374 T25 = VFNMS(LDK(KP912575812), T1N, T1M);
Chris@82 375 {
Chris@82 376 V T1L, T1O, T21, T20;
Chris@82 377 T1L = VFNMS(LDK(KP867381224), T1K, T1J);
Chris@82 378 T1O = VFMA(LDK(KP912575812), T1N, T1M);
Chris@82 379 T20 = VFNMS(LDK(KP958953096), T1S, T1R);
Chris@82 380 T21 = VFMA(LDK(KP447417479), T1O, T20);
Chris@82 381 T1P = VFNMS(LDK(KP809385824), T1O, T1L);
Chris@82 382 T22 = VFMA(LDK(KP690983005), T21, T1L);
Chris@82 383 }
Chris@82 384 {
Chris@82 385 V T1T, T1W, T27, T26;
Chris@82 386 T1T = VFMA(LDK(KP958953096), T1S, T1R);
Chris@82 387 T1W = VFMA(LDK(KP912575812), T1V, T1U);
Chris@82 388 T26 = VFMA(LDK(KP867381224), T1K, T1J);
Chris@82 389 T27 = VFMA(LDK(KP447417479), T1W, T26);
Chris@82 390 T1X = VFMA(LDK(KP894834959), T1W, T1T);
Chris@82 391 T28 = VFNMS(LDK(KP763932022), T27, T1T);
Chris@82 392 }
Chris@82 393 {
Chris@82 394 V T1y, T1x, T1E, T1h, TG, T1b;
Chris@82 395 {
Chris@82 396 V T1r, T1s, T1v, T1w;
Chris@82 397 T1r = VFNMS(LDK(KP916574801), T1f, T1e);
Chris@82 398 T1s = VFMA(LDK(KP831864738), T1j, T1i);
Chris@82 399 T1t = VFMA(LDK(KP904730450), T1s, T1r);
Chris@82 400 T1y = VFNMS(LDK(KP904730450), T1s, T1r);
Chris@82 401 T1v = VFNMS(LDK(KP829049696), TF, Tq);
Chris@82 402 T1w = VFNMS(LDK(KP831864738), T1a, TV);
Chris@82 403 T1x = VFMA(LDK(KP559154169), T1w, T1v);
Chris@82 404 T1E = VFNMS(LDK(KP683113946), T1v, T1w);
Chris@82 405 }
Chris@82 406 T1u = VFNMS(LDK(KP242145790), T1t, T1q);
Chris@82 407 T1F = VFMA(LDK(KP617882369), T1y, T1E);
Chris@82 408 T1z = VFMA(LDK(KP559016994), T1y, T1x);
Chris@82 409 Tb = VFMA(LDK(KP559016994), Ta, T9);
Chris@82 410 T1k = VFNMS(LDK(KP831864738), T1j, T1i);
Chris@82 411 T1g = VFMA(LDK(KP916574801), T1f, T1e);
Chris@82 412 TG = VFMA(LDK(KP829049696), TF, Tq);
Chris@82 413 T1b = VFMA(LDK(KP831864738), T1a, TV);
Chris@82 414 T1c = VFMA(LDK(KP904730450), T1b, TG);
Chris@82 415 T1h = VFNMS(LDK(KP904730450), T1b, TG);
Chris@82 416 T1d = VFNMS(LDK(KP242145790), T1c, Tb);
Chris@82 417 T1B = VADD(T1g, T1h);
Chris@82 418 T1l = VFNMS(LDK(KP904730450), T1k, T1h);
Chris@82 419 }
Chris@82 420 {
Chris@82 421 V T1H, T1I, T1Q, T1Y;
Chris@82 422 T1H = VFMA(LDK(KP968583161), T1c, Tb);
Chris@82 423 T1I = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1t, T1q));
Chris@82 424 ST(&(xo[WS(os, 1)]), VFMAI(T1I, T1H), ovs, &(xo[WS(os, 1)]));
Chris@82 425 ST(&(xo[WS(os, 24)]), VFNMSI(T1I, T1H), ovs, &(xo[0]));
Chris@82 426 T1Q = VFNMS(LDK(KP992114701), T1P, Tb);
Chris@82 427 T1Y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T1X, T1q));
Chris@82 428 ST(&(xo[WS(os, 4)]), VFNMSI(T1Y, T1Q), ovs, &(xo[0]));
Chris@82 429 ST(&(xo[WS(os, 21)]), VFMAI(T1Y, T1Q), ovs, &(xo[WS(os, 1)]));
Chris@82 430 }
Chris@82 431 {
Chris@82 432 V T1A, T1n, T1m, T1G, T1D, T1C;
Chris@82 433 T1A = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1z, T1u));
Chris@82 434 T1m = VFNMS(LDK(KP618033988), T1l, T1g);
Chris@82 435 T1n = VFNMS(LDK(KP876091699), T1m, T1d);
Chris@82 436 ST(&(xo[WS(os, 6)]), VFMAI(T1A, T1n), ovs, &(xo[0]));
Chris@82 437 ST(&(xo[WS(os, 19)]), VFNMSI(T1A, T1n), ovs, &(xo[WS(os, 1)]));
Chris@82 438 T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T1F, T1u));
Chris@82 439 T1C = VFNMS(LDK(KP683113946), T1B, T1k);
Chris@82 440 T1D = VFMA(LDK(KP792626838), T1C, T1d);
Chris@82 441 ST(&(xo[WS(os, 11)]), VFMAI(T1G, T1D), ovs, &(xo[WS(os, 1)]));
Chris@82 442 ST(&(xo[WS(os, 14)]), VFNMSI(T1G, T1D), ovs, &(xo[0]));
Chris@82 443 {
Chris@82 444 V T24, T2a, T23, T29;
Chris@82 445 T23 = VFNMS(LDK(KP999544308), T22, T1Z);
Chris@82 446 T24 = VFNMS(LDK(KP803003575), T23, Tb);
Chris@82 447 T29 = VFNMS(LDK(KP999544308), T28, T25);
Chris@82 448 T2a = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T29, T1q));
Chris@82 449 ST(&(xo[WS(os, 9)]), VFNMSI(T2a, T24), ovs, &(xo[WS(os, 1)]));
Chris@82 450 ST(&(xo[WS(os, 16)]), VFMAI(T2a, T24), ovs, &(xo[0]));
Chris@82 451 }
Chris@82 452 }
Chris@82 453 }
Chris@82 454 }
Chris@82 455 }
Chris@82 456 VLEAVE();
Chris@82 457 }
Chris@82 458
Chris@82 459 static const kdft_desc desc = { 25, XSIMD_STRING("n1bv_25"), {43, 12, 181, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 460
Chris@82 461 void XSIMD(codelet_n1bv_25) (planner *p) {
Chris@82 462 X(kdft_register) (p, n1bv_25, &desc);
Chris@82 463 }
Chris@82 464
Chris@82 465 #else
Chris@82 466
Chris@82 467 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 25 -name n1bv_25 -include dft/simd/n1b.h */
Chris@82 468
Chris@82 469 /*
Chris@82 470 * This function contains 224 FP additions, 140 FP multiplications,
Chris@82 471 * (or, 147 additions, 63 multiplications, 77 fused multiply/add),
Chris@82 472 * 115 stack variables, 40 constants, and 50 memory accesses
Chris@82 473 */
Chris@82 474 #include "dft/simd/n1b.h"
Chris@82 475
Chris@82 476 static void n1bv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 477 {
Chris@82 478 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@82 479 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
Chris@82 480 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@82 481 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
Chris@82 482 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@82 483 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
Chris@82 484 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@82 485 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
Chris@82 486 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 487 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
Chris@82 488 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
Chris@82 489 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 490 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 491 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@82 492 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
Chris@82 493 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 494 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 495 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@82 496 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
Chris@82 497 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 498 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 499 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@82 500 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 501 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
Chris@82 502 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@82 503 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 504 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@82 505 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 506 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 507 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
Chris@82 508 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 509 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
Chris@82 510 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
Chris@82 511 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 512 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
Chris@82 513 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
Chris@82 514 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 515 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 516 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 517 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 518 {
Chris@82 519 INT i;
Chris@82 520 const R *xi;
Chris@82 521 R *xo;
Chris@82 522 xi = ii;
Chris@82 523 xo = io;
Chris@82 524 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) {
Chris@82 525 V T1b, T2o, T1v, T1e, T2W, T2P, T2Q, T2U, T11, T27, TY, T26, T12, T2f, T1j;
Chris@82 526 V T28, TM, T24, TJ, T23, TN, T2e, T1i, T25, T2M, T2N, T2T, Tm, T1W, Tt;
Chris@82 527 V T1X, Tu, T20, Tw, T1Y, T7, T1U, Te, T1T, Tf, T21, Tx, T1V;
Chris@82 528 {
Chris@82 529 V T1c, T1a, T1t, T17, T1r;
Chris@82 530 T1c = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 531 {
Chris@82 532 V T18, T19, T15, T16;
Chris@82 533 T18 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 534 T19 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
Chris@82 535 T1a = VADD(T18, T19);
Chris@82 536 T1t = VSUB(T18, T19);
Chris@82 537 T15 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 538 T16 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
Chris@82 539 T17 = VADD(T15, T16);
Chris@82 540 T1r = VSUB(T15, T16);
Chris@82 541 }
Chris@82 542 {
Chris@82 543 V T2n, T1s, T1u, T1d;
Chris@82 544 T1b = VMUL(LDK(KP559016994), VSUB(T17, T1a));
Chris@82 545 T2n = VMUL(LDK(KP587785252), T1r);
Chris@82 546 T2o = VFNMS(LDK(KP951056516), T1t, T2n);
Chris@82 547 T1s = VMUL(LDK(KP951056516), T1r);
Chris@82 548 T1u = VMUL(LDK(KP587785252), T1t);
Chris@82 549 T1v = VADD(T1s, T1u);
Chris@82 550 T1d = VADD(T17, T1a);
Chris@82 551 T1e = VFNMS(LDK(KP250000000), T1d, T1c);
Chris@82 552 T2W = VADD(T1c, T1d);
Chris@82 553 }
Chris@82 554 }
Chris@82 555 {
Chris@82 556 V TG, TV, TF, TL, TH, TK, TU, T10, TW, TZ, TX, TI;
Chris@82 557 TG = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 558 TV = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 559 {
Chris@82 560 V Tz, TA, TB, TC, TD, TE;
Chris@82 561 Tz = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 562 TA = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
Chris@82 563 TB = VADD(Tz, TA);
Chris@82 564 TC = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 565 TD = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
Chris@82 566 TE = VADD(TC, TD);
Chris@82 567 TF = VMUL(LDK(KP559016994), VSUB(TB, TE));
Chris@82 568 TL = VSUB(TC, TD);
Chris@82 569 TH = VADD(TB, TE);
Chris@82 570 TK = VSUB(Tz, TA);
Chris@82 571 }
Chris@82 572 {
Chris@82 573 V TO, TP, TQ, TR, TS, TT;
Chris@82 574 TO = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 575 TP = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
Chris@82 576 TQ = VADD(TO, TP);
Chris@82 577 TR = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 578 TS = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Chris@82 579 TT = VADD(TR, TS);
Chris@82 580 TU = VMUL(LDK(KP559016994), VSUB(TQ, TT));
Chris@82 581 T10 = VSUB(TR, TS);
Chris@82 582 TW = VADD(TQ, TT);
Chris@82 583 TZ = VSUB(TO, TP);
Chris@82 584 }
Chris@82 585 T2P = VADD(TG, TH);
Chris@82 586 T2Q = VADD(TV, TW);
Chris@82 587 T2U = VADD(T2P, T2Q);
Chris@82 588 T11 = VFMA(LDK(KP475528258), TZ, VMUL(LDK(KP293892626), T10));
Chris@82 589 T27 = VFNMS(LDK(KP475528258), T10, VMUL(LDK(KP293892626), TZ));
Chris@82 590 TX = VFNMS(LDK(KP250000000), TW, TV);
Chris@82 591 TY = VADD(TU, TX);
Chris@82 592 T26 = VSUB(TX, TU);
Chris@82 593 T12 = VFNMS(LDK(KP1_369094211), T11, VMUL(LDK(KP728968627), TY));
Chris@82 594 T2f = VFMA(LDK(KP125581039), T27, VMUL(LDK(KP998026728), T26));
Chris@82 595 T1j = VFMA(LDK(KP1_457937254), T11, VMUL(LDK(KP684547105), TY));
Chris@82 596 T28 = VFNMS(LDK(KP1_996053456), T27, VMUL(LDK(KP062790519), T26));
Chris@82 597 TM = VFMA(LDK(KP475528258), TK, VMUL(LDK(KP293892626), TL));
Chris@82 598 T24 = VFNMS(LDK(KP475528258), TL, VMUL(LDK(KP293892626), TK));
Chris@82 599 TI = VFNMS(LDK(KP250000000), TH, TG);
Chris@82 600 TJ = VADD(TF, TI);
Chris@82 601 T23 = VSUB(TI, TF);
Chris@82 602 TN = VFNMS(LDK(KP963507348), TM, VMUL(LDK(KP876306680), TJ));
Chris@82 603 T2e = VFMA(LDK(KP1_071653589), T24, VMUL(LDK(KP844327925), T23));
Chris@82 604 T1i = VFMA(LDK(KP1_752613360), TM, VMUL(LDK(KP481753674), TJ));
Chris@82 605 T25 = VFNMS(LDK(KP1_688655851), T24, VMUL(LDK(KP535826794), T23));
Chris@82 606 }
Chris@82 607 {
Chris@82 608 V Tb, Tq, T3, Tc, T6, Ta, Ti, Tr, Tl, Tp, Ts, Td;
Chris@82 609 Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 610 Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 611 {
Chris@82 612 V T1, T2, T8, T4, T5, T9;
Chris@82 613 T1 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 614 T2 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
Chris@82 615 T8 = VADD(T1, T2);
Chris@82 616 T4 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@82 617 T5 = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Chris@82 618 T9 = VADD(T4, T5);
Chris@82 619 T3 = VSUB(T1, T2);
Chris@82 620 Tc = VADD(T8, T9);
Chris@82 621 T6 = VSUB(T4, T5);
Chris@82 622 Ta = VMUL(LDK(KP559016994), VSUB(T8, T9));
Chris@82 623 }
Chris@82 624 {
Chris@82 625 V Tg, Th, Tn, Tj, Tk, To;
Chris@82 626 Tg = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 627 Th = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
Chris@82 628 Tn = VADD(Tg, Th);
Chris@82 629 Tj = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 630 Tk = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
Chris@82 631 To = VADD(Tj, Tk);
Chris@82 632 Ti = VSUB(Tg, Th);
Chris@82 633 Tr = VADD(Tn, To);
Chris@82 634 Tl = VSUB(Tj, Tk);
Chris@82 635 Tp = VMUL(LDK(KP559016994), VSUB(Tn, To));
Chris@82 636 }
Chris@82 637 T2M = VADD(Tq, Tr);
Chris@82 638 T2N = VADD(Tb, Tc);
Chris@82 639 T2T = VADD(T2M, T2N);
Chris@82 640 Tm = VFMA(LDK(KP475528258), Ti, VMUL(LDK(KP293892626), Tl));
Chris@82 641 T1W = VFNMS(LDK(KP475528258), Tl, VMUL(LDK(KP293892626), Ti));
Chris@82 642 Ts = VFNMS(LDK(KP250000000), Tr, Tq);
Chris@82 643 Tt = VADD(Tp, Ts);
Chris@82 644 T1X = VSUB(Ts, Tp);
Chris@82 645 Tu = VFMA(LDK(KP1_937166322), Tm, VMUL(LDK(KP248689887), Tt));
Chris@82 646 T20 = VFNMS(LDK(KP963507348), T1W, VMUL(LDK(KP876306680), T1X));
Chris@82 647 Tw = VFNMS(LDK(KP497379774), Tm, VMUL(LDK(KP968583161), Tt));
Chris@82 648 T1Y = VFMA(LDK(KP1_752613360), T1W, VMUL(LDK(KP481753674), T1X));
Chris@82 649 T7 = VFMA(LDK(KP475528258), T3, VMUL(LDK(KP293892626), T6));
Chris@82 650 T1U = VFNMS(LDK(KP475528258), T6, VMUL(LDK(KP293892626), T3));
Chris@82 651 Td = VFNMS(LDK(KP250000000), Tc, Tb);
Chris@82 652 Te = VADD(Ta, Td);
Chris@82 653 T1T = VSUB(Td, Ta);
Chris@82 654 Tf = VFMA(LDK(KP1_071653589), T7, VMUL(LDK(KP844327925), Te));
Chris@82 655 T21 = VFMA(LDK(KP1_809654104), T1U, VMUL(LDK(KP425779291), T1T));
Chris@82 656 Tx = VFNMS(LDK(KP1_688655851), T7, VMUL(LDK(KP535826794), Te));
Chris@82 657 T1V = VFNMS(LDK(KP851558583), T1U, VMUL(LDK(KP904827052), T1T));
Chris@82 658 }
Chris@82 659 {
Chris@82 660 V T2V, T2X, T2Y, T2S, T30, T2O, T2R, T31, T2Z;
Chris@82 661 T2V = VMUL(LDK(KP559016994), VSUB(T2T, T2U));
Chris@82 662 T2X = VADD(T2T, T2U);
Chris@82 663 T2Y = VFNMS(LDK(KP250000000), T2X, T2W);
Chris@82 664 T2O = VSUB(T2M, T2N);
Chris@82 665 T2R = VSUB(T2P, T2Q);
Chris@82 666 T2S = VBYI(VFMA(LDK(KP951056516), T2O, VMUL(LDK(KP587785252), T2R)));
Chris@82 667 T30 = VBYI(VFNMS(LDK(KP951056516), T2R, VMUL(LDK(KP587785252), T2O)));
Chris@82 668 ST(&(xo[0]), VADD(T2W, T2X), ovs, &(xo[0]));
Chris@82 669 T31 = VSUB(T2Y, T2V);
Chris@82 670 ST(&(xo[WS(os, 10)]), VADD(T30, T31), ovs, &(xo[0]));
Chris@82 671 ST(&(xo[WS(os, 15)]), VSUB(T31, T30), ovs, &(xo[WS(os, 1)]));
Chris@82 672 T2Z = VADD(T2V, T2Y);
Chris@82 673 ST(&(xo[WS(os, 5)]), VADD(T2S, T2Z), ovs, &(xo[WS(os, 1)]));
Chris@82 674 ST(&(xo[WS(os, 20)]), VSUB(T2Z, T2S), ovs, &(xo[0]));
Chris@82 675 }
Chris@82 676 {
Chris@82 677 V T1Z, T2i, T2j, T2g, T2w, T2x, T2y, T2G, T2H, T2I, T2D, T2E, T2F, T2z, T2A;
Chris@82 678 V T2B, T2p, T2m, T2q, T2b, T2c, T2a, T2d, T2h, T2r;
Chris@82 679 T1Z = VSUB(T1V, T1Y);
Chris@82 680 T2i = VADD(T20, T21);
Chris@82 681 T2j = VSUB(T25, T28);
Chris@82 682 T2g = VSUB(T2e, T2f);
Chris@82 683 T2w = VFMA(LDK(KP1_369094211), T1W, VMUL(LDK(KP728968627), T1X));
Chris@82 684 T2x = VFNMS(LDK(KP992114701), T1T, VMUL(LDK(KP250666467), T1U));
Chris@82 685 T2y = VADD(T2w, T2x);
Chris@82 686 T2G = VFNMS(LDK(KP125581039), T24, VMUL(LDK(KP998026728), T23));
Chris@82 687 T2H = VFMA(LDK(KP1_274847979), T27, VMUL(LDK(KP770513242), T26));
Chris@82 688 T2I = VADD(T2G, T2H);
Chris@82 689 T2D = VFNMS(LDK(KP1_457937254), T1W, VMUL(LDK(KP684547105), T1X));
Chris@82 690 T2E = VFMA(LDK(KP1_984229402), T1U, VMUL(LDK(KP125333233), T1T));
Chris@82 691 T2F = VADD(T2D, T2E);
Chris@82 692 T2z = VFMA(LDK(KP1_996053456), T24, VMUL(LDK(KP062790519), T23));
Chris@82 693 T2A = VFNMS(LDK(KP637423989), T26, VMUL(LDK(KP1_541026485), T27));
Chris@82 694 T2B = VADD(T2z, T2A);
Chris@82 695 {
Chris@82 696 V T2k, T2l, T22, T29;
Chris@82 697 T2k = VADD(T1Y, T1V);
Chris@82 698 T2l = VADD(T2e, T2f);
Chris@82 699 T2p = VADD(T2k, T2l);
Chris@82 700 T2m = VMUL(LDK(KP559016994), VSUB(T2k, T2l));
Chris@82 701 T2q = VFNMS(LDK(KP250000000), T2p, T2o);
Chris@82 702 T2b = VSUB(T1e, T1b);
Chris@82 703 T22 = VSUB(T20, T21);
Chris@82 704 T29 = VADD(T25, T28);
Chris@82 705 T2c = VADD(T22, T29);
Chris@82 706 T2a = VMUL(LDK(KP559016994), VSUB(T22, T29));
Chris@82 707 T2d = VFNMS(LDK(KP250000000), T2c, T2b);
Chris@82 708 }
Chris@82 709 {
Chris@82 710 V T2u, T2v, T2C, T2J;
Chris@82 711 T2u = VADD(T2b, T2c);
Chris@82 712 T2v = VBYI(VADD(T2o, T2p));
Chris@82 713 ST(&(xo[WS(os, 23)]), VSUB(T2u, T2v), ovs, &(xo[WS(os, 1)]));
Chris@82 714 ST(&(xo[WS(os, 2)]), VADD(T2u, T2v), ovs, &(xo[0]));
Chris@82 715 T2C = VADD(T2b, VADD(T2y, T2B));
Chris@82 716 T2J = VBYI(VSUB(VADD(T2F, T2I), T2o));
Chris@82 717 ST(&(xo[WS(os, 22)]), VSUB(T2C, T2J), ovs, &(xo[0]));
Chris@82 718 ST(&(xo[WS(os, 3)]), VADD(T2C, T2J), ovs, &(xo[WS(os, 1)]));
Chris@82 719 }
Chris@82 720 T2h = VFMA(LDK(KP951056516), T1Z, VADD(T2a, VFNMS(LDK(KP587785252), T2g, T2d)));
Chris@82 721 T2r = VBYI(VADD(VFMA(LDK(KP951056516), T2i, VMUL(LDK(KP587785252), T2j)), VADD(T2m, T2q)));
Chris@82 722 ST(&(xo[WS(os, 18)]), VSUB(T2h, T2r), ovs, &(xo[0]));
Chris@82 723 ST(&(xo[WS(os, 7)]), VADD(T2h, T2r), ovs, &(xo[WS(os, 1)]));
Chris@82 724 {
Chris@82 725 V T2s, T2t, T2K, T2L;
Chris@82 726 T2s = VFMA(LDK(KP587785252), T1Z, VFMA(LDK(KP951056516), T2g, VSUB(T2d, T2a)));
Chris@82 727 T2t = VBYI(VADD(VFNMS(LDK(KP951056516), T2j, VMUL(LDK(KP587785252), T2i)), VSUB(T2q, T2m)));
Chris@82 728 ST(&(xo[WS(os, 13)]), VSUB(T2s, T2t), ovs, &(xo[WS(os, 1)]));
Chris@82 729 ST(&(xo[WS(os, 12)]), VADD(T2s, T2t), ovs, &(xo[0]));
Chris@82 730 T2K = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T2w, T2x), VFMA(LDK(KP309016994), T2F, VFNMS(LDK(KP809016994), T2I, VMUL(LDK(KP587785252), VSUB(T2z, T2A))))), T2o));
Chris@82 731 T2L = VFMA(LDK(KP309016994), T2y, VFMA(LDK(KP951056516), VSUB(T2E, T2D), VFMA(LDK(KP587785252), VSUB(T2H, T2G), VFNMS(LDK(KP809016994), T2B, T2b))));
Chris@82 732 ST(&(xo[WS(os, 8)]), VADD(T2K, T2L), ovs, &(xo[0]));
Chris@82 733 ST(&(xo[WS(os, 17)]), VSUB(T2L, T2K), ovs, &(xo[WS(os, 1)]));
Chris@82 734 }
Chris@82 735 }
Chris@82 736 {
Chris@82 737 V Tv, T1m, T1n, T1k, T1D, T1E, T1F, T1N, T1O, T1P, T1K, T1L, T1M, T1G, T1H;
Chris@82 738 V T1I, T1w, T1q, T1x, T1f, T1g, T14, T1h, T1l, T1y;
Chris@82 739 Tv = VSUB(Tf, Tu);
Chris@82 740 T1m = VSUB(Tw, Tx);
Chris@82 741 T1n = VSUB(TN, T12);
Chris@82 742 T1k = VSUB(T1i, T1j);
Chris@82 743 T1D = VFMA(LDK(KP1_688655851), Tm, VMUL(LDK(KP535826794), Tt));
Chris@82 744 T1E = VFMA(LDK(KP1_541026485), T7, VMUL(LDK(KP637423989), Te));
Chris@82 745 T1F = VSUB(T1D, T1E);
Chris@82 746 T1N = VFMA(LDK(KP851558583), TM, VMUL(LDK(KP904827052), TJ));
Chris@82 747 T1O = VFMA(LDK(KP1_984229402), T11, VMUL(LDK(KP125333233), TY));
Chris@82 748 T1P = VADD(T1N, T1O);
Chris@82 749 T1K = VFNMS(LDK(KP1_071653589), Tm, VMUL(LDK(KP844327925), Tt));
Chris@82 750 T1L = VFNMS(LDK(KP770513242), Te, VMUL(LDK(KP1_274847979), T7));
Chris@82 751 T1M = VADD(T1K, T1L);
Chris@82 752 T1G = VFNMS(LDK(KP425779291), TJ, VMUL(LDK(KP1_809654104), TM));
Chris@82 753 T1H = VFNMS(LDK(KP992114701), TY, VMUL(LDK(KP250666467), T11));
Chris@82 754 T1I = VADD(T1G, T1H);
Chris@82 755 {
Chris@82 756 V T1o, T1p, Ty, T13;
Chris@82 757 T1o = VADD(Tu, Tf);
Chris@82 758 T1p = VADD(T1i, T1j);
Chris@82 759 T1w = VADD(T1o, T1p);
Chris@82 760 T1q = VMUL(LDK(KP559016994), VSUB(T1o, T1p));
Chris@82 761 T1x = VFNMS(LDK(KP250000000), T1w, T1v);
Chris@82 762 T1f = VADD(T1b, T1e);
Chris@82 763 Ty = VADD(Tw, Tx);
Chris@82 764 T13 = VADD(TN, T12);
Chris@82 765 T1g = VADD(Ty, T13);
Chris@82 766 T14 = VMUL(LDK(KP559016994), VSUB(Ty, T13));
Chris@82 767 T1h = VFNMS(LDK(KP250000000), T1g, T1f);
Chris@82 768 }
Chris@82 769 {
Chris@82 770 V T1B, T1C, T1J, T1Q;
Chris@82 771 T1B = VADD(T1f, T1g);
Chris@82 772 T1C = VBYI(VADD(T1v, T1w));
Chris@82 773 ST(&(xo[WS(os, 24)]), VSUB(T1B, T1C), ovs, &(xo[0]));
Chris@82 774 ST(&(xo[WS(os, 1)]), VADD(T1B, T1C), ovs, &(xo[WS(os, 1)]));
Chris@82 775 T1J = VADD(T1f, VADD(T1F, T1I));
Chris@82 776 T1Q = VBYI(VSUB(VADD(T1M, T1P), T1v));
Chris@82 777 ST(&(xo[WS(os, 21)]), VSUB(T1J, T1Q), ovs, &(xo[WS(os, 1)]));
Chris@82 778 ST(&(xo[WS(os, 4)]), VADD(T1J, T1Q), ovs, &(xo[0]));
Chris@82 779 }
Chris@82 780 T1l = VFMA(LDK(KP951056516), Tv, VADD(T14, VFNMS(LDK(KP587785252), T1k, T1h)));
Chris@82 781 T1y = VBYI(VADD(VFMA(LDK(KP951056516), T1m, VMUL(LDK(KP587785252), T1n)), VADD(T1q, T1x)));
Chris@82 782 ST(&(xo[WS(os, 19)]), VSUB(T1l, T1y), ovs, &(xo[WS(os, 1)]));
Chris@82 783 ST(&(xo[WS(os, 6)]), VADD(T1l, T1y), ovs, &(xo[0]));
Chris@82 784 {
Chris@82 785 V T1z, T1A, T1R, T1S;
Chris@82 786 T1z = VFMA(LDK(KP587785252), Tv, VFMA(LDK(KP951056516), T1k, VSUB(T1h, T14)));
Chris@82 787 T1A = VBYI(VADD(VFNMS(LDK(KP951056516), T1n, VMUL(LDK(KP587785252), T1m)), VSUB(T1x, T1q)));
Chris@82 788 ST(&(xo[WS(os, 14)]), VSUB(T1z, T1A), ovs, &(xo[0]));
Chris@82 789 ST(&(xo[WS(os, 11)]), VADD(T1z, T1A), ovs, &(xo[WS(os, 1)]));
Chris@82 790 T1R = VBYI(VSUB(VFMA(LDK(KP309016994), T1M, VFMA(LDK(KP951056516), VADD(T1D, T1E), VFNMS(LDK(KP809016994), T1P, VMUL(LDK(KP587785252), VSUB(T1G, T1H))))), T1v));
Chris@82 791 T1S = VFMA(LDK(KP951056516), VSUB(T1L, T1K), VFMA(LDK(KP309016994), T1F, VFMA(LDK(KP587785252), VSUB(T1O, T1N), VFNMS(LDK(KP809016994), T1I, T1f))));
Chris@82 792 ST(&(xo[WS(os, 9)]), VADD(T1R, T1S), ovs, &(xo[WS(os, 1)]));
Chris@82 793 ST(&(xo[WS(os, 16)]), VSUB(T1S, T1R), ovs, &(xo[0]));
Chris@82 794 }
Chris@82 795 }
Chris@82 796 }
Chris@82 797 }
Chris@82 798 VLEAVE();
Chris@82 799 }
Chris@82 800
Chris@82 801 static const kdft_desc desc = { 25, XSIMD_STRING("n1bv_25"), {147, 63, 77, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 802
Chris@82 803 void XSIMD(codelet_n1bv_25) (planner *p) {
Chris@82 804 X(kdft_register) (p, n1bv_25, &desc);
Chris@82 805 }
Chris@82 806
Chris@82 807 #endif