annotate src/fftw-3.3.8/dft/simd/common/n1fv_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:53 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name n1fv_25 -include dft/simd/n1f.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 224 FP additions, 193 FP multiplications,
Chris@82 32 * (or, 43 additions, 12 multiplications, 181 fused multiply/add),
Chris@82 33 * 140 stack variables, 67 constants, and 50 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/simd/n1f.h"
Chris@82 36
Chris@82 37 static void n1fv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@82 40 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
Chris@82 41 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
Chris@82 42 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
Chris@82 43 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 44 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
Chris@82 45 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@82 46 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@82 47 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@82 48 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
Chris@82 49 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@82 50 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@82 51 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
Chris@82 52 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
Chris@82 53 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
Chris@82 54 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
Chris@82 55 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
Chris@82 56 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
Chris@82 57 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@82 58 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
Chris@82 59 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
Chris@82 60 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@82 61 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@82 62 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 63 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@82 64 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@82 65 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
Chris@82 66 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 67 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 68 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
Chris@82 69 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
Chris@82 70 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
Chris@82 71 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
Chris@82 72 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
Chris@82 73 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
Chris@82 74 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
Chris@82 75 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
Chris@82 76 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@82 77 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
Chris@82 78 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@82 79 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
Chris@82 80 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
Chris@82 81 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
Chris@82 82 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
Chris@82 83 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
Chris@82 84 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@82 85 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@82 86 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 87 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
Chris@82 88 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
Chris@82 89 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
Chris@82 90 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
Chris@82 91 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
Chris@82 92 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
Chris@82 93 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
Chris@82 94 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
Chris@82 95 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
Chris@82 96 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
Chris@82 97 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
Chris@82 98 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
Chris@82 99 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
Chris@82 100 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
Chris@82 101 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
Chris@82 102 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
Chris@82 103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 104 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 105 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 106 {
Chris@82 107 INT i;
Chris@82 108 const R *xi;
Chris@82 109 R *xo;
Chris@82 110 xi = ri;
Chris@82 111 xo = ro;
Chris@82 112 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) {
Chris@82 113 V Ta, T2z, T1q, T9, T3n, T3r, T3s, T3t, T1a, T2N, T2V, T1j, T1J, T2o, T2t;
Chris@82 114 V T1R, TV, T2O, T2W, T1i, T1K, T2l, T2s, T1S, T3o, T3p, T3q, TF, T2R, T2Y;
Chris@82 115 V T1f, T1N, T2e, T2v, T1V, Tq, T2Q, T2Z, T1e, T1M, T2h, T2w, T1U;
Chris@82 116 {
Chris@82 117 V T1, T7, T1p, T4, T1o, T8;
Chris@82 118 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 119 {
Chris@82 120 V T5, T6, T2, T3;
Chris@82 121 T5 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 122 T6 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
Chris@82 123 T7 = VADD(T5, T6);
Chris@82 124 T1p = VSUB(T5, T6);
Chris@82 125 T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 126 T3 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
Chris@82 127 T4 = VADD(T2, T3);
Chris@82 128 T1o = VSUB(T2, T3);
Chris@82 129 }
Chris@82 130 Ta = VSUB(T4, T7);
Chris@82 131 T2z = VFNMS(LDK(KP618033988), T1o, T1p);
Chris@82 132 T1q = VFMA(LDK(KP618033988), T1p, T1o);
Chris@82 133 T8 = VADD(T4, T7);
Chris@82 134 T9 = VFNMS(LDK(KP250000000), T8, T1);
Chris@82 135 T3n = VADD(T1, T8);
Chris@82 136 }
Chris@82 137 {
Chris@82 138 V TH, TW, TO, TT, TQ, TS, T13, T18, T15, T17;
Chris@82 139 TH = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 140 TW = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 141 {
Chris@82 142 V TI, TJ, TK, TL, TM, TN;
Chris@82 143 TI = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 144 TJ = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
Chris@82 145 TK = VADD(TI, TJ);
Chris@82 146 TL = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 147 TM = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
Chris@82 148 TN = VADD(TL, TM);
Chris@82 149 TO = VADD(TK, TN);
Chris@82 150 TT = VSUB(TM, TL);
Chris@82 151 TQ = VSUB(TN, TK);
Chris@82 152 TS = VSUB(TI, TJ);
Chris@82 153 }
Chris@82 154 {
Chris@82 155 V TX, TY, TZ, T10, T11, T12;
Chris@82 156 TX = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 157 TY = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
Chris@82 158 TZ = VADD(TX, TY);
Chris@82 159 T10 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 160 T11 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Chris@82 161 T12 = VADD(T10, T11);
Chris@82 162 T13 = VADD(TZ, T12);
Chris@82 163 T18 = VSUB(T10, T11);
Chris@82 164 T15 = VSUB(T12, TZ);
Chris@82 165 T17 = VSUB(TX, TY);
Chris@82 166 }
Chris@82 167 T3r = VADD(TH, TO);
Chris@82 168 T3s = VADD(TW, T13);
Chris@82 169 T3t = VADD(T3r, T3s);
Chris@82 170 {
Chris@82 171 V T19, T2m, T16, T2n, T14;
Chris@82 172 T19 = VFMA(LDK(KP618033988), T18, T17);
Chris@82 173 T2m = VFNMS(LDK(KP618033988), T17, T18);
Chris@82 174 T14 = VFNMS(LDK(KP250000000), T13, TW);
Chris@82 175 T16 = VFNMS(LDK(KP559016994), T15, T14);
Chris@82 176 T2n = VFMA(LDK(KP559016994), T15, T14);
Chris@82 177 T1a = VFNMS(LDK(KP893101515), T19, T16);
Chris@82 178 T2N = VFMA(LDK(KP066152395), T2n, T2m);
Chris@82 179 T2V = VFNMS(LDK(KP059835404), T2m, T2n);
Chris@82 180 T1j = VFMA(LDK(KP987388751), T16, T19);
Chris@82 181 T1J = VFNMS(LDK(KP120146378), T19, T16);
Chris@82 182 T2o = VFMA(LDK(KP869845200), T2n, T2m);
Chris@82 183 T2t = VFNMS(LDK(KP786782374), T2m, T2n);
Chris@82 184 T1R = VFMA(LDK(KP132830569), T16, T19);
Chris@82 185 }
Chris@82 186 {
Chris@82 187 V TU, T2j, TR, T2k, TP;
Chris@82 188 TU = VFNMS(LDK(KP618033988), TT, TS);
Chris@82 189 T2j = VFMA(LDK(KP618033988), TS, TT);
Chris@82 190 TP = VFNMS(LDK(KP250000000), TO, TH);
Chris@82 191 TR = VFNMS(LDK(KP559016994), TQ, TP);
Chris@82 192 T2k = VFMA(LDK(KP559016994), TQ, TP);
Chris@82 193 TV = VFNMS(LDK(KP522847744), TU, TR);
Chris@82 194 T2O = VFNMS(LDK(KP667278218), T2k, T2j);
Chris@82 195 T2W = VFMA(LDK(KP603558818), T2j, T2k);
Chris@82 196 T1i = VFMA(LDK(KP578046249), TR, TU);
Chris@82 197 T1K = VFNMS(LDK(KP494780565), TR, TU);
Chris@82 198 T2l = VFMA(LDK(KP066152395), T2k, T2j);
Chris@82 199 T2s = VFNMS(LDK(KP059835404), T2j, T2k);
Chris@82 200 T1S = VFMA(LDK(KP447533225), TU, TR);
Chris@82 201 }
Chris@82 202 }
Chris@82 203 {
Chris@82 204 V Tc, Ty, Tj, To, Tl, Tn, Tt, TD, Tw, TB;
Chris@82 205 Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 206 Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 207 {
Chris@82 208 V Td, Te, Tf, Tg, Th, Ti;
Chris@82 209 Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 210 Te = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
Chris@82 211 Tf = VADD(Td, Te);
Chris@82 212 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 213 Th = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
Chris@82 214 Ti = VADD(Tg, Th);
Chris@82 215 Tj = VADD(Tf, Ti);
Chris@82 216 To = VSUB(Th, Tg);
Chris@82 217 Tl = VSUB(Tf, Ti);
Chris@82 218 Tn = VSUB(Td, Te);
Chris@82 219 }
Chris@82 220 {
Chris@82 221 V Tr, Ts, Tz, Tu, Tv, TA;
Chris@82 222 Tr = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
Chris@82 223 Ts = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 224 Tz = VADD(Ts, Tr);
Chris@82 225 Tu = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Chris@82 226 Tv = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@82 227 TA = VADD(Tv, Tu);
Chris@82 228 Tt = VSUB(Tr, Ts);
Chris@82 229 TD = VSUB(Tz, TA);
Chris@82 230 Tw = VSUB(Tu, Tv);
Chris@82 231 TB = VADD(Tz, TA);
Chris@82 232 }
Chris@82 233 T3o = VADD(Tc, Tj);
Chris@82 234 T3p = VADD(Ty, TB);
Chris@82 235 T3q = VADD(T3o, T3p);
Chris@82 236 {
Chris@82 237 V Tx, T2d, TE, T2c, TC;
Chris@82 238 Tx = VFMA(LDK(KP618033988), Tw, Tt);
Chris@82 239 T2d = VFNMS(LDK(KP618033988), Tt, Tw);
Chris@82 240 TC = VFMS(LDK(KP250000000), TB, Ty);
Chris@82 241 TE = VFNMS(LDK(KP559016994), TD, TC);
Chris@82 242 T2c = VFMA(LDK(KP559016994), TD, TC);
Chris@82 243 TF = VFNMS(LDK(KP667278218), TE, Tx);
Chris@82 244 T2R = VFNMS(LDK(KP494780565), T2c, T2d);
Chris@82 245 T2Y = VFMA(LDK(KP447533225), T2d, T2c);
Chris@82 246 T1f = VFMA(LDK(KP603558818), Tx, TE);
Chris@82 247 T1N = VFMA(LDK(KP869845200), TE, Tx);
Chris@82 248 T2e = VFMA(LDK(KP120146378), T2d, T2c);
Chris@82 249 T2v = VFNMS(LDK(KP132830569), T2c, T2d);
Chris@82 250 T1V = VFNMS(LDK(KP786782374), Tx, TE);
Chris@82 251 }
Chris@82 252 {
Chris@82 253 V Tp, T2g, Tm, T2f, Tk;
Chris@82 254 Tp = VFNMS(LDK(KP618033988), To, Tn);
Chris@82 255 T2g = VFMA(LDK(KP618033988), Tn, To);
Chris@82 256 Tk = VFNMS(LDK(KP250000000), Tj, Tc);
Chris@82 257 Tm = VFMA(LDK(KP559016994), Tl, Tk);
Chris@82 258 T2f = VFNMS(LDK(KP559016994), Tl, Tk);
Chris@82 259 Tq = VFNMS(LDK(KP244189809), Tp, Tm);
Chris@82 260 T2Q = VFNMS(LDK(KP522847744), T2g, T2f);
Chris@82 261 T2Z = VFMA(LDK(KP578046249), T2f, T2g);
Chris@82 262 T1e = VFMA(LDK(KP269969613), Tm, Tp);
Chris@82 263 T1M = VFMA(LDK(KP667278218), Tm, Tp);
Chris@82 264 T2h = VFMA(LDK(KP893101515), T2g, T2f);
Chris@82 265 T2w = VFNMS(LDK(KP987388751), T2f, T2g);
Chris@82 266 T1U = VFNMS(LDK(KP603558818), Tp, Tm);
Chris@82 267 }
Chris@82 268 }
Chris@82 269 {
Chris@82 270 V T3w, T3u, T3v, T3A, T3C, T3y, T3z, T3B, T3x;
Chris@82 271 T3w = VSUB(T3q, T3t);
Chris@82 272 T3u = VADD(T3q, T3t);
Chris@82 273 T3v = VFNMS(LDK(KP250000000), T3u, T3n);
Chris@82 274 T3y = VSUB(T3o, T3p);
Chris@82 275 T3z = VSUB(T3r, T3s);
Chris@82 276 T3A = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3z, T3y));
Chris@82 277 T3C = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3y, T3z));
Chris@82 278 ST(&(xo[0]), VADD(T3u, T3n), ovs, &(xo[0]));
Chris@82 279 T3B = VFNMS(LDK(KP559016994), T3w, T3v);
Chris@82 280 ST(&(xo[WS(os, 10)]), VFMAI(T3C, T3B), ovs, &(xo[0]));
Chris@82 281 ST(&(xo[WS(os, 15)]), VFNMSI(T3C, T3B), ovs, &(xo[WS(os, 1)]));
Chris@82 282 T3x = VFMA(LDK(KP559016994), T3w, T3v);
Chris@82 283 ST(&(xo[WS(os, 5)]), VFNMSI(T3A, T3x), ovs, &(xo[WS(os, 1)]));
Chris@82 284 ST(&(xo[WS(os, 20)]), VFMAI(T3A, T3x), ovs, &(xo[0]));
Chris@82 285 }
Chris@82 286 {
Chris@82 287 V T2B, T2H, T2q, T2E, T2y, T2K, T31, T3a, T3l, T3f, T2b, T35, T34, T2T, T33;
Chris@82 288 V T3h, T37;
Chris@82 289 T2B = VFMA(LDK(KP734762448), T2w, T2v);
Chris@82 290 T2H = VFNMS(LDK(KP734762448), T2h, T2e);
Chris@82 291 {
Chris@82 292 V T2p, T2i, T2D, T2C;
Chris@82 293 T2p = VFNMS(LDK(KP772036680), T2o, T2l);
Chris@82 294 T2i = VFMA(LDK(KP734762448), T2h, T2e);
Chris@82 295 T2C = VFNMS(LDK(KP772036680), T2t, T2s);
Chris@82 296 T2D = VFNMS(LDK(KP522616830), T2i, T2C);
Chris@82 297 T2q = VFMA(LDK(KP956723877), T2p, T2i);
Chris@82 298 T2E = VFMA(LDK(KP763932022), T2D, T2p);
Chris@82 299 }
Chris@82 300 {
Chris@82 301 V T2u, T2x, T2J, T2I;
Chris@82 302 T2u = VFMA(LDK(KP772036680), T2t, T2s);
Chris@82 303 T2x = VFNMS(LDK(KP734762448), T2w, T2v);
Chris@82 304 T2I = VFMA(LDK(KP772036680), T2o, T2l);
Chris@82 305 T2J = VFMA(LDK(KP522616830), T2x, T2I);
Chris@82 306 T2y = VFMA(LDK(KP945422727), T2x, T2u);
Chris@82 307 T2K = VFNMS(LDK(KP690983005), T2J, T2u);
Chris@82 308 }
Chris@82 309 {
Chris@82 310 V T3e, T3d, T3k, T36, T2P, T2S;
Chris@82 311 {
Chris@82 312 V T2X, T30, T3b, T3c;
Chris@82 313 T2X = VFMA(LDK(KP845997307), T2W, T2V);
Chris@82 314 T30 = VFNMS(LDK(KP921078979), T2Z, T2Y);
Chris@82 315 T31 = VFNMS(LDK(KP906616052), T30, T2X);
Chris@82 316 T3e = VFMA(LDK(KP906616052), T30, T2X);
Chris@82 317 T3b = VFMA(LDK(KP845997307), T2O, T2N);
Chris@82 318 T3c = VFMA(LDK(KP982009705), T2R, T2Q);
Chris@82 319 T3d = VFMA(LDK(KP570584518), T3c, T3b);
Chris@82 320 T3k = VFNMS(LDK(KP669429328), T3b, T3c);
Chris@82 321 }
Chris@82 322 T3a = VFMA(LDK(KP262346850), T31, T2z);
Chris@82 323 T3l = VFNMS(LDK(KP669429328), T3e, T3k);
Chris@82 324 T3f = VFMA(LDK(KP618033988), T3e, T3d);
Chris@82 325 T2b = VFNMS(LDK(KP559016994), Ta, T9);
Chris@82 326 T35 = VFMA(LDK(KP921078979), T2Z, T2Y);
Chris@82 327 T34 = VFNMS(LDK(KP845997307), T2W, T2V);
Chris@82 328 T2P = VFNMS(LDK(KP845997307), T2O, T2N);
Chris@82 329 T2S = VFNMS(LDK(KP982009705), T2R, T2Q);
Chris@82 330 T2T = VFMA(LDK(KP923225144), T2S, T2P);
Chris@82 331 T36 = VFNMS(LDK(KP923225144), T2S, T2P);
Chris@82 332 T33 = VFNMS(LDK(KP237294955), T2T, T2b);
Chris@82 333 T3h = VFNMS(LDK(KP904508497), T36, T34);
Chris@82 334 T37 = VFNMS(LDK(KP997675361), T36, T35);
Chris@82 335 }
Chris@82 336 {
Chris@82 337 V T2r, T2A, T2U, T32;
Chris@82 338 T2r = VFMA(LDK(KP992114701), T2q, T2b);
Chris@82 339 T2A = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2z, T2y));
Chris@82 340 ST(&(xo[WS(os, 3)]), VFNMSI(T2A, T2r), ovs, &(xo[WS(os, 1)]));
Chris@82 341 ST(&(xo[WS(os, 22)]), VFMAI(T2A, T2r), ovs, &(xo[0]));
Chris@82 342 T2U = VFMA(LDK(KP949179823), T2T, T2b);
Chris@82 343 T32 = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2z, T31));
Chris@82 344 ST(&(xo[WS(os, 2)]), VFNMSI(T32, T2U), ovs, &(xo[0]));
Chris@82 345 ST(&(xo[WS(os, 23)]), VFMAI(T32, T2U), ovs, &(xo[WS(os, 1)]));
Chris@82 346 }
Chris@82 347 {
Chris@82 348 V T3g, T39, T38, T3m, T3j, T3i;
Chris@82 349 T3g = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3f, T3a));
Chris@82 350 T38 = VFMA(LDK(KP560319534), T37, T34);
Chris@82 351 T39 = VFNMS(LDK(KP949179823), T38, T33);
Chris@82 352 ST(&(xo[WS(os, 13)]), VFNMSI(T3g, T39), ovs, &(xo[WS(os, 1)]));
Chris@82 353 ST(&(xo[WS(os, 12)]), VFMAI(T3g, T39), ovs, &(xo[0]));
Chris@82 354 T3m = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3l, T3a));
Chris@82 355 T3i = VFNMS(LDK(KP681693190), T3h, T35);
Chris@82 356 T3j = VFNMS(LDK(KP860541664), T3i, T33);
Chris@82 357 ST(&(xo[WS(os, 18)]), VFNMSI(T3m, T3j), ovs, &(xo[0]));
Chris@82 358 ST(&(xo[WS(os, 7)]), VFMAI(T3m, T3j), ovs, &(xo[WS(os, 1)]));
Chris@82 359 {
Chris@82 360 V T2G, T2M, T2F, T2L;
Chris@82 361 T2F = VFNMS(LDK(KP855719849), T2E, T2B);
Chris@82 362 T2G = VFMA(LDK(KP897376177), T2F, T2b);
Chris@82 363 T2L = VFMA(LDK(KP855719849), T2K, T2H);
Chris@82 364 T2M = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2L, T2z));
Chris@82 365 ST(&(xo[WS(os, 8)]), VFNMSI(T2M, T2G), ovs, &(xo[0]));
Chris@82 366 ST(&(xo[WS(os, 17)]), VFMAI(T2M, T2G), ovs, &(xo[WS(os, 1)]));
Chris@82 367 }
Chris@82 368 }
Chris@82 369 }
Chris@82 370 {
Chris@82 371 V T1Z, T25, T1P, T22, T1X, T28, T1t, T1u, T1F, T1z, Tb, T1k, T1g, T1c, T1d;
Chris@82 372 V T1B, T1l;
Chris@82 373 T1Z = VFNMS(LDK(KP912575812), T1V, T1U);
Chris@82 374 T25 = VFNMS(LDK(KP912575812), T1N, T1M);
Chris@82 375 {
Chris@82 376 V T1L, T1O, T21, T20;
Chris@82 377 T1L = VFNMS(LDK(KP867381224), T1K, T1J);
Chris@82 378 T1O = VFMA(LDK(KP912575812), T1N, T1M);
Chris@82 379 T20 = VFNMS(LDK(KP958953096), T1S, T1R);
Chris@82 380 T21 = VFMA(LDK(KP447417479), T1O, T20);
Chris@82 381 T1P = VFNMS(LDK(KP809385824), T1O, T1L);
Chris@82 382 T22 = VFMA(LDK(KP690983005), T21, T1L);
Chris@82 383 }
Chris@82 384 {
Chris@82 385 V T1T, T1W, T27, T26;
Chris@82 386 T1T = VFMA(LDK(KP958953096), T1S, T1R);
Chris@82 387 T1W = VFMA(LDK(KP912575812), T1V, T1U);
Chris@82 388 T26 = VFMA(LDK(KP867381224), T1K, T1J);
Chris@82 389 T27 = VFMA(LDK(KP447417479), T1W, T26);
Chris@82 390 T1X = VFMA(LDK(KP894834959), T1W, T1T);
Chris@82 391 T28 = VFNMS(LDK(KP763932022), T27, T1T);
Chris@82 392 }
Chris@82 393 {
Chris@82 394 V T1y, T1x, T1E, T1h, TG, T1b;
Chris@82 395 {
Chris@82 396 V T1r, T1s, T1v, T1w;
Chris@82 397 T1r = VFNMS(LDK(KP916574801), T1f, T1e);
Chris@82 398 T1s = VFMA(LDK(KP831864738), T1j, T1i);
Chris@82 399 T1t = VFMA(LDK(KP904730450), T1s, T1r);
Chris@82 400 T1y = VFNMS(LDK(KP904730450), T1s, T1r);
Chris@82 401 T1v = VFNMS(LDK(KP829049696), TF, Tq);
Chris@82 402 T1w = VFNMS(LDK(KP831864738), T1a, TV);
Chris@82 403 T1x = VFMA(LDK(KP559154169), T1w, T1v);
Chris@82 404 T1E = VFNMS(LDK(KP683113946), T1v, T1w);
Chris@82 405 }
Chris@82 406 T1u = VFNMS(LDK(KP242145790), T1t, T1q);
Chris@82 407 T1F = VFMA(LDK(KP617882369), T1y, T1E);
Chris@82 408 T1z = VFMA(LDK(KP559016994), T1y, T1x);
Chris@82 409 Tb = VFMA(LDK(KP559016994), Ta, T9);
Chris@82 410 T1k = VFNMS(LDK(KP831864738), T1j, T1i);
Chris@82 411 T1g = VFMA(LDK(KP916574801), T1f, T1e);
Chris@82 412 TG = VFMA(LDK(KP829049696), TF, Tq);
Chris@82 413 T1b = VFMA(LDK(KP831864738), T1a, TV);
Chris@82 414 T1c = VFMA(LDK(KP904730450), T1b, TG);
Chris@82 415 T1h = VFNMS(LDK(KP904730450), T1b, TG);
Chris@82 416 T1d = VFNMS(LDK(KP242145790), T1c, Tb);
Chris@82 417 T1B = VADD(T1g, T1h);
Chris@82 418 T1l = VFNMS(LDK(KP904730450), T1k, T1h);
Chris@82 419 }
Chris@82 420 {
Chris@82 421 V T1H, T1I, T1Q, T1Y;
Chris@82 422 T1H = VFMA(LDK(KP968583161), T1c, Tb);
Chris@82 423 T1I = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1t, T1q));
Chris@82 424 ST(&(xo[WS(os, 1)]), VFNMSI(T1I, T1H), ovs, &(xo[WS(os, 1)]));
Chris@82 425 ST(&(xo[WS(os, 24)]), VFMAI(T1I, T1H), ovs, &(xo[0]));
Chris@82 426 T1Q = VFNMS(LDK(KP992114701), T1P, Tb);
Chris@82 427 T1Y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T1X, T1q));
Chris@82 428 ST(&(xo[WS(os, 4)]), VFMAI(T1Y, T1Q), ovs, &(xo[0]));
Chris@82 429 ST(&(xo[WS(os, 21)]), VFNMSI(T1Y, T1Q), ovs, &(xo[WS(os, 1)]));
Chris@82 430 }
Chris@82 431 {
Chris@82 432 V T1A, T1n, T1m, T1G, T1D, T1C;
Chris@82 433 T1A = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1z, T1u));
Chris@82 434 T1m = VFNMS(LDK(KP618033988), T1l, T1g);
Chris@82 435 T1n = VFNMS(LDK(KP876091699), T1m, T1d);
Chris@82 436 ST(&(xo[WS(os, 6)]), VFNMSI(T1A, T1n), ovs, &(xo[0]));
Chris@82 437 ST(&(xo[WS(os, 19)]), VFMAI(T1A, T1n), ovs, &(xo[WS(os, 1)]));
Chris@82 438 T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T1F, T1u));
Chris@82 439 T1C = VFNMS(LDK(KP683113946), T1B, T1k);
Chris@82 440 T1D = VFMA(LDK(KP792626838), T1C, T1d);
Chris@82 441 ST(&(xo[WS(os, 11)]), VFNMSI(T1G, T1D), ovs, &(xo[WS(os, 1)]));
Chris@82 442 ST(&(xo[WS(os, 14)]), VFMAI(T1G, T1D), ovs, &(xo[0]));
Chris@82 443 {
Chris@82 444 V T24, T2a, T23, T29;
Chris@82 445 T23 = VFNMS(LDK(KP999544308), T22, T1Z);
Chris@82 446 T24 = VFNMS(LDK(KP803003575), T23, Tb);
Chris@82 447 T29 = VFNMS(LDK(KP999544308), T28, T25);
Chris@82 448 T2a = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T29, T1q));
Chris@82 449 ST(&(xo[WS(os, 16)]), VFNMSI(T2a, T24), ovs, &(xo[0]));
Chris@82 450 ST(&(xo[WS(os, 9)]), VFMAI(T2a, T24), ovs, &(xo[WS(os, 1)]));
Chris@82 451 }
Chris@82 452 }
Chris@82 453 }
Chris@82 454 }
Chris@82 455 }
Chris@82 456 VLEAVE();
Chris@82 457 }
Chris@82 458
Chris@82 459 static const kdft_desc desc = { 25, XSIMD_STRING("n1fv_25"), {43, 12, 181, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 460
Chris@82 461 void XSIMD(codelet_n1fv_25) (planner *p) {
Chris@82 462 X(kdft_register) (p, n1fv_25, &desc);
Chris@82 463 }
Chris@82 464
Chris@82 465 #else
Chris@82 466
Chris@82 467 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name n1fv_25 -include dft/simd/n1f.h */
Chris@82 468
Chris@82 469 /*
Chris@82 470 * This function contains 224 FP additions, 140 FP multiplications,
Chris@82 471 * (or, 146 additions, 62 multiplications, 78 fused multiply/add),
Chris@82 472 * 115 stack variables, 40 constants, and 50 memory accesses
Chris@82 473 */
Chris@82 474 #include "dft/simd/n1f.h"
Chris@82 475
Chris@82 476 static void n1fv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 477 {
Chris@82 478 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@82 479 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
Chris@82 480 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@82 481 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
Chris@82 482 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@82 483 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
Chris@82 484 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@82 485 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
Chris@82 486 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@82 487 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
Chris@82 488 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@82 489 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@82 490 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
Chris@82 491 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@82 492 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@82 493 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
Chris@82 494 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@82 495 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@82 496 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@82 497 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
Chris@82 498 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@82 499 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@82 500 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@82 501 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
Chris@82 502 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@82 503 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@82 504 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@82 505 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
Chris@82 506 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@82 507 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
Chris@82 508 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@82 509 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@82 510 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
Chris@82 511 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@82 512 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
Chris@82 513 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
Chris@82 514 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 515 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 516 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 517 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 518 {
Chris@82 519 INT i;
Chris@82 520 const R *xi;
Chris@82 521 R *xo;
Chris@82 522 xi = ri;
Chris@82 523 xo = ro;
Chris@82 524 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) {
Chris@82 525 V T7, T1g, T26, Ta, T2R, T2N, T2O, T2P, T19, T1Y, T16, T1Z, T1a, T2v, T1l;
Chris@82 526 V T2m, TU, T21, TR, T22, TV, T2u, T1k, T2l, T2K, T2L, T2M, TE, T1R, TB;
Chris@82 527 V T1S, TF, T2r, T1i, T2j, Tp, T1U, Tm, T1V, Tq, T2s, T1h, T2i;
Chris@82 528 {
Chris@82 529 V T8, T6, T1f, T3, T1e, T25, T9;
Chris@82 530 T8 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 531 {
Chris@82 532 V T4, T5, T1, T2;
Chris@82 533 T4 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 534 T5 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
Chris@82 535 T6 = VADD(T4, T5);
Chris@82 536 T1f = VSUB(T4, T5);
Chris@82 537 T1 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 538 T2 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
Chris@82 539 T3 = VADD(T1, T2);
Chris@82 540 T1e = VSUB(T1, T2);
Chris@82 541 }
Chris@82 542 T7 = VMUL(LDK(KP559016994), VSUB(T3, T6));
Chris@82 543 T1g = VFMA(LDK(KP951056516), T1e, VMUL(LDK(KP587785252), T1f));
Chris@82 544 T25 = VMUL(LDK(KP951056516), T1f);
Chris@82 545 T26 = VFNMS(LDK(KP587785252), T1e, T25);
Chris@82 546 T9 = VADD(T3, T6);
Chris@82 547 Ta = VFNMS(LDK(KP250000000), T9, T8);
Chris@82 548 T2R = VADD(T8, T9);
Chris@82 549 }
Chris@82 550 {
Chris@82 551 V TO, T13, TN, TT, TP, TS, T12, T18, T14, T17, T15, TQ;
Chris@82 552 TO = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 553 T13 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 554 {
Chris@82 555 V TH, TI, TJ, TK, TL, TM;
Chris@82 556 TH = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 557 TI = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
Chris@82 558 TJ = VADD(TH, TI);
Chris@82 559 TK = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 560 TL = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
Chris@82 561 TM = VADD(TK, TL);
Chris@82 562 TN = VMUL(LDK(KP559016994), VSUB(TJ, TM));
Chris@82 563 TT = VSUB(TK, TL);
Chris@82 564 TP = VADD(TJ, TM);
Chris@82 565 TS = VSUB(TH, TI);
Chris@82 566 }
Chris@82 567 {
Chris@82 568 V TW, TX, TY, TZ, T10, T11;
Chris@82 569 TW = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 570 TX = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
Chris@82 571 TY = VADD(TW, TX);
Chris@82 572 TZ = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 573 T10 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Chris@82 574 T11 = VADD(TZ, T10);
Chris@82 575 T12 = VMUL(LDK(KP559016994), VSUB(TY, T11));
Chris@82 576 T18 = VSUB(TZ, T10);
Chris@82 577 T14 = VADD(TY, T11);
Chris@82 578 T17 = VSUB(TW, TX);
Chris@82 579 }
Chris@82 580 T2N = VADD(TO, TP);
Chris@82 581 T2O = VADD(T13, T14);
Chris@82 582 T2P = VADD(T2N, T2O);
Chris@82 583 T19 = VFMA(LDK(KP475528258), T17, VMUL(LDK(KP293892626), T18));
Chris@82 584 T1Y = VFNMS(LDK(KP293892626), T17, VMUL(LDK(KP475528258), T18));
Chris@82 585 T15 = VFNMS(LDK(KP250000000), T14, T13);
Chris@82 586 T16 = VADD(T12, T15);
Chris@82 587 T1Z = VSUB(T15, T12);
Chris@82 588 T1a = VFNMS(LDK(KP1_369094211), T19, VMUL(LDK(KP728968627), T16));
Chris@82 589 T2v = VFMA(LDK(KP1_996053456), T1Y, VMUL(LDK(KP062790519), T1Z));
Chris@82 590 T1l = VFMA(LDK(KP1_457937254), T19, VMUL(LDK(KP684547105), T16));
Chris@82 591 T2m = VFNMS(LDK(KP998026728), T1Z, VMUL(LDK(KP125581039), T1Y));
Chris@82 592 TU = VFMA(LDK(KP475528258), TS, VMUL(LDK(KP293892626), TT));
Chris@82 593 T21 = VFNMS(LDK(KP293892626), TS, VMUL(LDK(KP475528258), TT));
Chris@82 594 TQ = VFNMS(LDK(KP250000000), TP, TO);
Chris@82 595 TR = VADD(TN, TQ);
Chris@82 596 T22 = VSUB(TQ, TN);
Chris@82 597 TV = VFNMS(LDK(KP963507348), TU, VMUL(LDK(KP876306680), TR));
Chris@82 598 T2u = VFMA(LDK(KP1_688655851), T21, VMUL(LDK(KP535826794), T22));
Chris@82 599 T1k = VFMA(LDK(KP1_752613360), TU, VMUL(LDK(KP481753674), TR));
Chris@82 600 T2l = VFNMS(LDK(KP844327925), T22, VMUL(LDK(KP1_071653589), T21));
Chris@82 601 }
Chris@82 602 {
Chris@82 603 V Tj, Ty, Ti, To, Tk, Tn, Tx, TD, Tz, TC, TA, Tl;
Chris@82 604 Tj = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 605 Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 606 {
Chris@82 607 V Tc, Td, Te, Tf, Tg, Th;
Chris@82 608 Tc = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 609 Td = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
Chris@82 610 Te = VADD(Tc, Td);
Chris@82 611 Tf = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 612 Tg = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
Chris@82 613 Th = VADD(Tf, Tg);
Chris@82 614 Ti = VMUL(LDK(KP559016994), VSUB(Te, Th));
Chris@82 615 To = VSUB(Tf, Tg);
Chris@82 616 Tk = VADD(Te, Th);
Chris@82 617 Tn = VSUB(Tc, Td);
Chris@82 618 }
Chris@82 619 {
Chris@82 620 V Tr, Ts, Tt, Tu, Tv, Tw;
Chris@82 621 Tr = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 622 Ts = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
Chris@82 623 Tt = VADD(Tr, Ts);
Chris@82 624 Tu = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@82 625 Tv = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Chris@82 626 Tw = VADD(Tu, Tv);
Chris@82 627 Tx = VMUL(LDK(KP559016994), VSUB(Tt, Tw));
Chris@82 628 TD = VSUB(Tu, Tv);
Chris@82 629 Tz = VADD(Tt, Tw);
Chris@82 630 TC = VSUB(Tr, Ts);
Chris@82 631 }
Chris@82 632 T2K = VADD(Tj, Tk);
Chris@82 633 T2L = VADD(Ty, Tz);
Chris@82 634 T2M = VADD(T2K, T2L);
Chris@82 635 TE = VFMA(LDK(KP475528258), TC, VMUL(LDK(KP293892626), TD));
Chris@82 636 T1R = VFNMS(LDK(KP293892626), TC, VMUL(LDK(KP475528258), TD));
Chris@82 637 TA = VFNMS(LDK(KP250000000), Tz, Ty);
Chris@82 638 TB = VADD(Tx, TA);
Chris@82 639 T1S = VSUB(TA, Tx);
Chris@82 640 TF = VFNMS(LDK(KP1_688655851), TE, VMUL(LDK(KP535826794), TB));
Chris@82 641 T2r = VFNMS(LDK(KP425779291), T1S, VMUL(LDK(KP1_809654104), T1R));
Chris@82 642 T1i = VFMA(LDK(KP1_071653589), TE, VMUL(LDK(KP844327925), TB));
Chris@82 643 T2j = VFMA(LDK(KP851558583), T1R, VMUL(LDK(KP904827052), T1S));
Chris@82 644 Tp = VFMA(LDK(KP475528258), Tn, VMUL(LDK(KP293892626), To));
Chris@82 645 T1U = VFNMS(LDK(KP293892626), Tn, VMUL(LDK(KP475528258), To));
Chris@82 646 Tl = VFNMS(LDK(KP250000000), Tk, Tj);
Chris@82 647 Tm = VADD(Ti, Tl);
Chris@82 648 T1V = VSUB(Tl, Ti);
Chris@82 649 Tq = VFNMS(LDK(KP497379774), Tp, VMUL(LDK(KP968583161), Tm));
Chris@82 650 T2s = VFMA(LDK(KP963507348), T1U, VMUL(LDK(KP876306680), T1V));
Chris@82 651 T1h = VFMA(LDK(KP1_937166322), Tp, VMUL(LDK(KP248689887), Tm));
Chris@82 652 T2i = VFNMS(LDK(KP481753674), T1V, VMUL(LDK(KP1_752613360), T1U));
Chris@82 653 }
Chris@82 654 {
Chris@82 655 V T2Q, T2S, T2T, T2X, T2Y, T2V, T2W, T2Z, T2U;
Chris@82 656 T2Q = VMUL(LDK(KP559016994), VSUB(T2M, T2P));
Chris@82 657 T2S = VADD(T2M, T2P);
Chris@82 658 T2T = VFNMS(LDK(KP250000000), T2S, T2R);
Chris@82 659 T2V = VSUB(T2K, T2L);
Chris@82 660 T2W = VSUB(T2N, T2O);
Chris@82 661 T2X = VBYI(VFMA(LDK(KP951056516), T2V, VMUL(LDK(KP587785252), T2W)));
Chris@82 662 T2Y = VBYI(VFNMS(LDK(KP587785252), T2V, VMUL(LDK(KP951056516), T2W)));
Chris@82 663 ST(&(xo[0]), VADD(T2R, T2S), ovs, &(xo[0]));
Chris@82 664 T2Z = VSUB(T2T, T2Q);
Chris@82 665 ST(&(xo[WS(os, 10)]), VADD(T2Y, T2Z), ovs, &(xo[0]));
Chris@82 666 ST(&(xo[WS(os, 15)]), VSUB(T2Z, T2Y), ovs, &(xo[WS(os, 1)]));
Chris@82 667 T2U = VADD(T2Q, T2T);
Chris@82 668 ST(&(xo[WS(os, 5)]), VSUB(T2U, T2X), ovs, &(xo[WS(os, 1)]));
Chris@82 669 ST(&(xo[WS(os, 20)]), VADD(T2X, T2U), ovs, &(xo[0]));
Chris@82 670 }
Chris@82 671 {
Chris@82 672 V T2t, T2y, T2z, T2w, T1T, T1W, T1X, T2c, T2d, T2e, T29, T2a, T2b, T20, T23;
Chris@82 673 V T24, T2p, T2o, T2q, T28, T2D, T2C, T2E, T2x, T2F;
Chris@82 674 T2t = VSUB(T2r, T2s);
Chris@82 675 T2y = VADD(T2i, T2j);
Chris@82 676 T2z = VSUB(T2l, T2m);
Chris@82 677 T2w = VSUB(T2u, T2v);
Chris@82 678 T1T = VFNMS(LDK(KP125333233), T1S, VMUL(LDK(KP1_984229402), T1R));
Chris@82 679 T1W = VFMA(LDK(KP1_457937254), T1U, VMUL(LDK(KP684547105), T1V));
Chris@82 680 T1X = VSUB(T1T, T1W);
Chris@82 681 T2c = VFNMS(LDK(KP1_996053456), T21, VMUL(LDK(KP062790519), T22));
Chris@82 682 T2d = VFMA(LDK(KP1_541026485), T1Y, VMUL(LDK(KP637423989), T1Z));
Chris@82 683 T2e = VSUB(T2c, T2d);
Chris@82 684 T29 = VFNMS(LDK(KP1_369094211), T1U, VMUL(LDK(KP728968627), T1V));
Chris@82 685 T2a = VFMA(LDK(KP250666467), T1R, VMUL(LDK(KP992114701), T1S));
Chris@82 686 T2b = VSUB(T29, T2a);
Chris@82 687 T20 = VFNMS(LDK(KP770513242), T1Z, VMUL(LDK(KP1_274847979), T1Y));
Chris@82 688 T23 = VFMA(LDK(KP125581039), T21, VMUL(LDK(KP998026728), T22));
Chris@82 689 T24 = VSUB(T20, T23);
Chris@82 690 {
Chris@82 691 V T2k, T2n, T2A, T2B;
Chris@82 692 T2k = VSUB(T2i, T2j);
Chris@82 693 T2n = VADD(T2l, T2m);
Chris@82 694 T2p = VADD(T2k, T2n);
Chris@82 695 T2o = VMUL(LDK(KP559016994), VSUB(T2k, T2n));
Chris@82 696 T2q = VFNMS(LDK(KP250000000), T2p, T26);
Chris@82 697 T28 = VSUB(Ta, T7);
Chris@82 698 T2A = VADD(T2s, T2r);
Chris@82 699 T2B = VADD(T2u, T2v);
Chris@82 700 T2D = VADD(T2A, T2B);
Chris@82 701 T2C = VMUL(LDK(KP559016994), VSUB(T2A, T2B));
Chris@82 702 T2E = VFNMS(LDK(KP250000000), T2D, T28);
Chris@82 703 }
Chris@82 704 {
Chris@82 705 V T2I, T2J, T27, T2f;
Chris@82 706 T2I = VBYI(VADD(T26, T2p));
Chris@82 707 T2J = VADD(T28, T2D);
Chris@82 708 ST(&(xo[WS(os, 2)]), VADD(T2I, T2J), ovs, &(xo[0]));
Chris@82 709 ST(&(xo[WS(os, 23)]), VSUB(T2J, T2I), ovs, &(xo[WS(os, 1)]));
Chris@82 710 T27 = VBYI(VSUB(VADD(T1X, T24), T26));
Chris@82 711 T2f = VADD(T28, VADD(T2b, T2e));
Chris@82 712 ST(&(xo[WS(os, 3)]), VADD(T27, T2f), ovs, &(xo[WS(os, 1)]));
Chris@82 713 ST(&(xo[WS(os, 22)]), VSUB(T2f, T27), ovs, &(xo[0]));
Chris@82 714 }
Chris@82 715 T2x = VBYI(VADD(T2o, VADD(T2q, VFNMS(LDK(KP587785252), T2w, VMUL(LDK(KP951056516), T2t)))));
Chris@82 716 T2F = VFMA(LDK(KP951056516), T2y, VFMA(LDK(KP587785252), T2z, VADD(T2C, T2E)));
Chris@82 717 ST(&(xo[WS(os, 7)]), VADD(T2x, T2F), ovs, &(xo[WS(os, 1)]));
Chris@82 718 ST(&(xo[WS(os, 18)]), VSUB(T2F, T2x), ovs, &(xo[0]));
Chris@82 719 {
Chris@82 720 V T2G, T2H, T2g, T2h;
Chris@82 721 T2G = VBYI(VADD(T2q, VSUB(VFMA(LDK(KP587785252), T2t, VMUL(LDK(KP951056516), T2w)), T2o)));
Chris@82 722 T2H = VFMA(LDK(KP587785252), T2y, VSUB(VFNMS(LDK(KP951056516), T2z, T2E), T2C));
Chris@82 723 ST(&(xo[WS(os, 12)]), VADD(T2G, T2H), ovs, &(xo[0]));
Chris@82 724 ST(&(xo[WS(os, 13)]), VSUB(T2H, T2G), ovs, &(xo[WS(os, 1)]));
Chris@82 725 T2g = VFMA(LDK(KP309016994), T2b, VFNMS(LDK(KP809016994), T2e, VFNMS(LDK(KP587785252), VADD(T23, T20), VFNMS(LDK(KP951056516), VADD(T1W, T1T), T28))));
Chris@82 726 T2h = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2c, T2d), VFNMS(LDK(KP809016994), T24, VFNMS(LDK(KP951056516), VADD(T29, T2a), VMUL(LDK(KP309016994), T1X)))), T26));
Chris@82 727 ST(&(xo[WS(os, 17)]), VSUB(T2g, T2h), ovs, &(xo[WS(os, 1)]));
Chris@82 728 ST(&(xo[WS(os, 8)]), VADD(T2g, T2h), ovs, &(xo[0]));
Chris@82 729 }
Chris@82 730 }
Chris@82 731 {
Chris@82 732 V T1p, T1u, T1w, T1q, T1B, T1C, T1D, T1L, T1M, T1N, T1I, T1J, T1K, T1E, T1F;
Chris@82 733 V T1G, T1n, T1r, T1s, Tb, T1c, T1v, T1x, T1t, T1y;
Chris@82 734 T1p = VSUB(TF, Tq);
Chris@82 735 T1u = VSUB(T1i, T1h);
Chris@82 736 T1w = VSUB(T1l, T1k);
Chris@82 737 T1q = VSUB(TV, T1a);
Chris@82 738 T1B = VFMA(LDK(KP1_688655851), Tp, VMUL(LDK(KP535826794), Tm));
Chris@82 739 T1C = VFMA(LDK(KP1_541026485), TE, VMUL(LDK(KP637423989), TB));
Chris@82 740 T1D = VSUB(T1B, T1C);
Chris@82 741 T1L = VFMA(LDK(KP851558583), TU, VMUL(LDK(KP904827052), TR));
Chris@82 742 T1M = VFMA(LDK(KP1_984229402), T19, VMUL(LDK(KP125333233), T16));
Chris@82 743 T1N = VADD(T1L, T1M);
Chris@82 744 T1I = VFNMS(LDK(KP844327925), Tm, VMUL(LDK(KP1_071653589), Tp));
Chris@82 745 T1J = VFNMS(LDK(KP1_274847979), TE, VMUL(LDK(KP770513242), TB));
Chris@82 746 T1K = VADD(T1I, T1J);
Chris@82 747 T1E = VFNMS(LDK(KP425779291), TR, VMUL(LDK(KP1_809654104), TU));
Chris@82 748 T1F = VFNMS(LDK(KP992114701), T16, VMUL(LDK(KP250666467), T19));
Chris@82 749 T1G = VADD(T1E, T1F);
Chris@82 750 {
Chris@82 751 V T1j, T1m, TG, T1b;
Chris@82 752 T1j = VADD(T1h, T1i);
Chris@82 753 T1m = VADD(T1k, T1l);
Chris@82 754 T1n = VADD(T1j, T1m);
Chris@82 755 T1r = VFMS(LDK(KP250000000), T1n, T1g);
Chris@82 756 T1s = VMUL(LDK(KP559016994), VSUB(T1m, T1j));
Chris@82 757 Tb = VADD(T7, Ta);
Chris@82 758 TG = VADD(Tq, TF);
Chris@82 759 T1b = VADD(TV, T1a);
Chris@82 760 T1c = VADD(TG, T1b);
Chris@82 761 T1v = VFNMS(LDK(KP250000000), T1c, Tb);
Chris@82 762 T1x = VMUL(LDK(KP559016994), VSUB(TG, T1b));
Chris@82 763 }
Chris@82 764 {
Chris@82 765 V T1d, T1o, T1H, T1O;
Chris@82 766 T1d = VADD(Tb, T1c);
Chris@82 767 T1o = VBYI(VADD(T1g, T1n));
Chris@82 768 ST(&(xo[WS(os, 1)]), VSUB(T1d, T1o), ovs, &(xo[WS(os, 1)]));
Chris@82 769 ST(&(xo[WS(os, 24)]), VADD(T1d, T1o), ovs, &(xo[0]));
Chris@82 770 T1H = VADD(Tb, VADD(T1D, T1G));
Chris@82 771 T1O = VBYI(VADD(T1g, VSUB(T1K, T1N)));
Chris@82 772 ST(&(xo[WS(os, 21)]), VSUB(T1H, T1O), ovs, &(xo[WS(os, 1)]));
Chris@82 773 ST(&(xo[WS(os, 4)]), VADD(T1H, T1O), ovs, &(xo[0]));
Chris@82 774 }
Chris@82 775 T1t = VBYI(VADD(VFMA(LDK(KP587785252), T1p, VMUL(LDK(KP951056516), T1q)), VSUB(T1r, T1s)));
Chris@82 776 T1y = VFMA(LDK(KP587785252), T1u, VFNMS(LDK(KP951056516), T1w, VSUB(T1v, T1x)));
Chris@82 777 ST(&(xo[WS(os, 11)]), VADD(T1t, T1y), ovs, &(xo[WS(os, 1)]));
Chris@82 778 ST(&(xo[WS(os, 14)]), VSUB(T1y, T1t), ovs, &(xo[0]));
Chris@82 779 {
Chris@82 780 V T1z, T1A, T1P, T1Q;
Chris@82 781 T1z = VBYI(VADD(VFNMS(LDK(KP587785252), T1q, VMUL(LDK(KP951056516), T1p)), VADD(T1r, T1s)));
Chris@82 782 T1A = VFMA(LDK(KP951056516), T1u, VADD(T1x, VFMA(LDK(KP587785252), T1w, T1v)));
Chris@82 783 ST(&(xo[WS(os, 6)]), VADD(T1z, T1A), ovs, &(xo[0]));
Chris@82 784 ST(&(xo[WS(os, 19)]), VSUB(T1A, T1z), ovs, &(xo[WS(os, 1)]));
Chris@82 785 T1P = VBYI(VADD(T1g, VFMA(LDK(KP309016994), T1K, VFMA(LDK(KP587785252), VSUB(T1F, T1E), VFNMS(LDK(KP951056516), VADD(T1B, T1C), VMUL(LDK(KP809016994), T1N))))));
Chris@82 786 T1Q = VFMA(LDK(KP309016994), T1D, VFMA(LDK(KP951056516), VSUB(T1I, T1J), VFMA(LDK(KP587785252), VSUB(T1M, T1L), VFNMS(LDK(KP809016994), T1G, Tb))));
Chris@82 787 ST(&(xo[WS(os, 9)]), VADD(T1P, T1Q), ovs, &(xo[WS(os, 1)]));
Chris@82 788 ST(&(xo[WS(os, 16)]), VSUB(T1Q, T1P), ovs, &(xo[0]));
Chris@82 789 }
Chris@82 790 }
Chris@82 791 }
Chris@82 792 }
Chris@82 793 VLEAVE();
Chris@82 794 }
Chris@82 795
Chris@82 796 static const kdft_desc desc = { 25, XSIMD_STRING("n1fv_25"), {146, 62, 78, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 797
Chris@82 798 void XSIMD(codelet_n1fv_25) (planner *p) {
Chris@82 799 X(kdft_register) (p, n1fv_25, &desc);
Chris@82 800 }
Chris@82 801
Chris@82 802 #endif