annotate src/fftw-3.3.3/dft/simd/common/n1bv_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:37:14 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 25 -name n1bv_25 -include n1b.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 224 FP additions, 193 FP multiplications,
Chris@10 32 * (or, 43 additions, 12 multiplications, 181 fused multiply/add),
Chris@10 33 * 215 stack variables, 67 constants, and 50 memory accesses
Chris@10 34 */
Chris@10 35 #include "n1b.h"
Chris@10 36
Chris@10 37 static void n1bv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 38 {
Chris@10 39 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
Chris@10 40 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
Chris@10 41 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@10 42 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
Chris@10 43 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@10 44 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 45 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
Chris@10 46 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@10 47 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@10 48 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@10 49 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
Chris@10 50 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@10 51 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
Chris@10 52 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
Chris@10 53 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
Chris@10 54 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
Chris@10 55 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@10 56 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
Chris@10 57 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@10 58 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@10 59 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@10 60 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
Chris@10 61 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
Chris@10 62 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@10 63 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
Chris@10 64 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
Chris@10 65 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
Chris@10 66 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
Chris@10 67 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 68 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
Chris@10 69 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
Chris@10 70 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 71 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
Chris@10 72 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
Chris@10 73 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
Chris@10 74 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@10 75 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
Chris@10 76 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
Chris@10 77 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
Chris@10 78 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@10 79 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
Chris@10 80 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
Chris@10 81 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@10 82 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 83 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
Chris@10 84 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
Chris@10 85 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
Chris@10 86 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
Chris@10 87 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@10 88 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@10 89 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
Chris@10 90 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
Chris@10 91 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
Chris@10 92 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
Chris@10 93 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
Chris@10 94 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
Chris@10 95 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
Chris@10 96 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
Chris@10 97 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
Chris@10 98 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
Chris@10 99 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
Chris@10 100 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
Chris@10 101 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
Chris@10 102 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
Chris@10 103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 104 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 105 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@10 106 {
Chris@10 107 INT i;
Chris@10 108 const R *xi;
Chris@10 109 R *xo;
Chris@10 110 xi = ii;
Chris@10 111 xo = io;
Chris@10 112 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) {
Chris@10 113 V T1g, T1k, T1I, T24, T2a, T1G, T1A, T1l, T1B, T1H, T1d;
Chris@10 114 {
Chris@10 115 V T2z, T1q, Ta, T9, T3n, Ty, Tl, T2O, T2W, T2l, T2s, TV, T1i, T1K, T1S;
Chris@10 116 V T3z, T3t, Tk, T3o, Tp, T2g, T2N, T2V, T2o, T2t, T1a, T1j, T1J, T1R, Tz;
Chris@10 117 V Tt, TA, Tw;
Chris@10 118 {
Chris@10 119 V T1, T5, T6, T2, T3;
Chris@10 120 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@10 121 T5 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@10 122 T6 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
Chris@10 123 T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@10 124 T3 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
Chris@10 125 {
Chris@10 126 V TH, TW, TK, TS, T10, T8, TN, TT, T17, TZ, T11;
Chris@10 127 TH = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@10 128 TW = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@10 129 {
Chris@10 130 V TI, TJ, TL, T7, T1p, T4, T1o, TM, TX, TY;
Chris@10 131 TI = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@10 132 TJ = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
Chris@10 133 TL = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@10 134 T7 = VADD(T5, T6);
Chris@10 135 T1p = VSUB(T5, T6);
Chris@10 136 T4 = VADD(T2, T3);
Chris@10 137 T1o = VSUB(T2, T3);
Chris@10 138 TM = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
Chris@10 139 TX = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@10 140 TK = VADD(TI, TJ);
Chris@10 141 TS = VSUB(TI, TJ);
Chris@10 142 TY = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
Chris@10 143 T10 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@10 144 T2z = VFNMS(LDK(KP618033988), T1o, T1p);
Chris@10 145 T1q = VFMA(LDK(KP618033988), T1p, T1o);
Chris@10 146 Ta = VSUB(T4, T7);
Chris@10 147 T8 = VADD(T4, T7);
Chris@10 148 TN = VADD(TL, TM);
Chris@10 149 TT = VSUB(TM, TL);
Chris@10 150 T17 = VSUB(TX, TY);
Chris@10 151 TZ = VADD(TX, TY);
Chris@10 152 T11 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Chris@10 153 }
Chris@10 154 {
Chris@10 155 V Tc, T2m, T19, Tn, To, Tr, Tj, T16, T2n, Ts, Tu, Tv;
Chris@10 156 {
Chris@10 157 V TU, T2j, TO, TQ, T12, T18;
Chris@10 158 Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@10 159 T9 = VFNMS(LDK(KP250000000), T8, T1);
Chris@10 160 T3n = VADD(T1, T8);
Chris@10 161 TU = VFNMS(LDK(KP618033988), TT, TS);
Chris@10 162 T2j = VFMA(LDK(KP618033988), TS, TT);
Chris@10 163 TO = VADD(TK, TN);
Chris@10 164 TQ = VSUB(TN, TK);
Chris@10 165 T12 = VADD(T10, T11);
Chris@10 166 T18 = VSUB(T10, T11);
Chris@10 167 Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@10 168 {
Chris@10 169 V T3r, T15, T13, Tf, Ti, T2k, TR, TP, T3s, T14;
Chris@10 170 {
Chris@10 171 V Td, Te, Tg, Th;
Chris@10 172 Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@10 173 Te = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
Chris@10 174 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@10 175 Th = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
Chris@10 176 TP = VFNMS(LDK(KP250000000), TO, TH);
Chris@10 177 T3r = VADD(TH, TO);
Chris@10 178 T2m = VFNMS(LDK(KP618033988), T17, T18);
Chris@10 179 T19 = VFMA(LDK(KP618033988), T18, T17);
Chris@10 180 T15 = VSUB(T12, TZ);
Chris@10 181 T13 = VADD(TZ, T12);
Chris@10 182 Tf = VADD(Td, Te);
Chris@10 183 Tn = VSUB(Td, Te);
Chris@10 184 To = VSUB(Th, Tg);
Chris@10 185 Ti = VADD(Tg, Th);
Chris@10 186 }
Chris@10 187 T2k = VFMA(LDK(KP559016994), TQ, TP);
Chris@10 188 TR = VFNMS(LDK(KP559016994), TQ, TP);
Chris@10 189 Tr = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
Chris@10 190 T3s = VADD(TW, T13);
Chris@10 191 T14 = VFNMS(LDK(KP250000000), T13, TW);
Chris@10 192 Tj = VADD(Tf, Ti);
Chris@10 193 Tl = VSUB(Tf, Ti);
Chris@10 194 T2O = VFNMS(LDK(KP667278218), T2k, T2j);
Chris@10 195 T2W = VFMA(LDK(KP603558818), T2j, T2k);
Chris@10 196 T2l = VFMA(LDK(KP066152395), T2k, T2j);
Chris@10 197 T2s = VFNMS(LDK(KP059835404), T2j, T2k);
Chris@10 198 TV = VFNMS(LDK(KP522847744), TU, TR);
Chris@10 199 T1i = VFMA(LDK(KP578046249), TR, TU);
Chris@10 200 T1K = VFNMS(LDK(KP494780565), TR, TU);
Chris@10 201 T1S = VFMA(LDK(KP447533225), TU, TR);
Chris@10 202 T16 = VFNMS(LDK(KP559016994), T15, T14);
Chris@10 203 T2n = VFMA(LDK(KP559016994), T15, T14);
Chris@10 204 T3z = VSUB(T3r, T3s);
Chris@10 205 T3t = VADD(T3r, T3s);
Chris@10 206 Ts = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@10 207 Tu = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Chris@10 208 Tv = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@10 209 }
Chris@10 210 }
Chris@10 211 Tk = VFNMS(LDK(KP250000000), Tj, Tc);
Chris@10 212 T3o = VADD(Tc, Tj);
Chris@10 213 Tp = VFNMS(LDK(KP618033988), To, Tn);
Chris@10 214 T2g = VFMA(LDK(KP618033988), Tn, To);
Chris@10 215 T2N = VFMA(LDK(KP066152395), T2n, T2m);
Chris@10 216 T2V = VFNMS(LDK(KP059835404), T2m, T2n);
Chris@10 217 T2o = VFMA(LDK(KP869845200), T2n, T2m);
Chris@10 218 T2t = VFNMS(LDK(KP786782374), T2m, T2n);
Chris@10 219 T1a = VFNMS(LDK(KP893101515), T19, T16);
Chris@10 220 T1j = VFMA(LDK(KP987388751), T16, T19);
Chris@10 221 T1J = VFNMS(LDK(KP120146378), T19, T16);
Chris@10 222 T1R = VFMA(LDK(KP132830569), T16, T19);
Chris@10 223 Tz = VADD(Ts, Tr);
Chris@10 224 Tt = VSUB(Tr, Ts);
Chris@10 225 TA = VADD(Tv, Tu);
Chris@10 226 Tw = VSUB(Tu, Tv);
Chris@10 227 }
Chris@10 228 }
Chris@10 229 }
Chris@10 230 {
Chris@10 231 V T2p, T2I, T2u, T2C, Tx, T2d, T2X, T34, T2P, T3b, T2b, Tb, T2Q, T2Z, T2h;
Chris@10 232 V T2w, Tq, T1e, T1M, T1U, TE, T2c, T3q, T3y;
Chris@10 233 T2p = VFNMS(LDK(KP772036680), T2o, T2l);
Chris@10 234 T2I = VFMA(LDK(KP772036680), T2o, T2l);
Chris@10 235 T2u = VFMA(LDK(KP772036680), T2t, T2s);
Chris@10 236 T2C = VFNMS(LDK(KP772036680), T2t, T2s);
Chris@10 237 {
Chris@10 238 V TD, TB, Tm, T2f, T3p, TC;
Chris@10 239 Tx = VFMA(LDK(KP618033988), Tw, Tt);
Chris@10 240 T2d = VFNMS(LDK(KP618033988), Tt, Tw);
Chris@10 241 TD = VSUB(Tz, TA);
Chris@10 242 TB = VADD(Tz, TA);
Chris@10 243 Tm = VFMA(LDK(KP559016994), Tl, Tk);
Chris@10 244 T2f = VFNMS(LDK(KP559016994), Tl, Tk);
Chris@10 245 T2X = VFMA(LDK(KP845997307), T2W, T2V);
Chris@10 246 T34 = VFNMS(LDK(KP845997307), T2W, T2V);
Chris@10 247 T2P = VFNMS(LDK(KP845997307), T2O, T2N);
Chris@10 248 T3b = VFMA(LDK(KP845997307), T2O, T2N);
Chris@10 249 T2b = VFNMS(LDK(KP559016994), Ta, T9);
Chris@10 250 Tb = VFMA(LDK(KP559016994), Ta, T9);
Chris@10 251 T3p = VADD(Ty, TB);
Chris@10 252 TC = VFMS(LDK(KP250000000), TB, Ty);
Chris@10 253 T2Q = VFNMS(LDK(KP522847744), T2g, T2f);
Chris@10 254 T2Z = VFMA(LDK(KP578046249), T2f, T2g);
Chris@10 255 T2h = VFMA(LDK(KP893101515), T2g, T2f);
Chris@10 256 T2w = VFNMS(LDK(KP987388751), T2f, T2g);
Chris@10 257 Tq = VFNMS(LDK(KP244189809), Tp, Tm);
Chris@10 258 T1e = VFMA(LDK(KP269969613), Tm, Tp);
Chris@10 259 T1M = VFMA(LDK(KP667278218), Tm, Tp);
Chris@10 260 T1U = VFNMS(LDK(KP603558818), Tp, Tm);
Chris@10 261 TE = VFNMS(LDK(KP559016994), TD, TC);
Chris@10 262 T2c = VFMA(LDK(KP559016994), TD, TC);
Chris@10 263 T3q = VADD(T3o, T3p);
Chris@10 264 T3y = VSUB(T3o, T3p);
Chris@10 265 }
Chris@10 266 {
Chris@10 267 V T1Z, T25, T1P, T22, T1X, TG, T1b, T28, T1t, T1y, T1x, T1E, T1Q, T1Y;
Chris@10 268 {
Chris@10 269 V T26, T1L, T1T, TF, T1f, T1W, T3m, T3g, T2M, T2G, T39, T3j, T21, T1O, T20;
Chris@10 270 V T27;
Chris@10 271 T26 = VFMA(LDK(KP867381224), T1K, T1J);
Chris@10 272 T1L = VFNMS(LDK(KP867381224), T1K, T1J);
Chris@10 273 T20 = VFNMS(LDK(KP958953096), T1S, T1R);
Chris@10 274 T1T = VFMA(LDK(KP958953096), T1S, T1R);
Chris@10 275 {
Chris@10 276 V T2R, T2Y, T2e, T2v, T1N, T1V;
Chris@10 277 T2R = VFNMS(LDK(KP494780565), T2c, T2d);
Chris@10 278 T2Y = VFMA(LDK(KP447533225), T2d, T2c);
Chris@10 279 T2e = VFMA(LDK(KP120146378), T2d, T2c);
Chris@10 280 T2v = VFNMS(LDK(KP132830569), T2c, T2d);
Chris@10 281 TF = VFNMS(LDK(KP667278218), TE, Tx);
Chris@10 282 T1f = VFMA(LDK(KP603558818), Tx, TE);
Chris@10 283 T1N = VFMA(LDK(KP869845200), TE, Tx);
Chris@10 284 T1V = VFNMS(LDK(KP786782374), Tx, TE);
Chris@10 285 {
Chris@10 286 V T3A, T3C, T3w, T3u;
Chris@10 287 T3A = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3z, T3y));
Chris@10 288 T3C = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3y, T3z));
Chris@10 289 T3w = VSUB(T3q, T3t);
Chris@10 290 T3u = VADD(T3q, T3t);
Chris@10 291 {
Chris@10 292 V T2B, T2x, T2H, T2i;
Chris@10 293 T2B = VFMA(LDK(KP734762448), T2w, T2v);
Chris@10 294 T2x = VFNMS(LDK(KP734762448), T2w, T2v);
Chris@10 295 T2H = VFNMS(LDK(KP734762448), T2h, T2e);
Chris@10 296 T2i = VFMA(LDK(KP734762448), T2h, T2e);
Chris@10 297 {
Chris@10 298 V T30, T35, T3c, T2S, T3v;
Chris@10 299 T30 = VFNMS(LDK(KP921078979), T2Z, T2Y);
Chris@10 300 T35 = VFMA(LDK(KP921078979), T2Z, T2Y);
Chris@10 301 T3c = VFMA(LDK(KP982009705), T2R, T2Q);
Chris@10 302 T2S = VFNMS(LDK(KP982009705), T2R, T2Q);
Chris@10 303 T1W = VFMA(LDK(KP912575812), T1V, T1U);
Chris@10 304 T1Z = VFNMS(LDK(KP912575812), T1V, T1U);
Chris@10 305 T1O = VFMA(LDK(KP912575812), T1N, T1M);
Chris@10 306 T25 = VFNMS(LDK(KP912575812), T1N, T1M);
Chris@10 307 ST(&(xo[0]), VADD(T3u, T3n), ovs, &(xo[0]));
Chris@10 308 T3v = VFNMS(LDK(KP250000000), T3u, T3n);
Chris@10 309 {
Chris@10 310 V T2y, T2J, T2q, T2D;
Chris@10 311 T2y = VFMA(LDK(KP945422727), T2x, T2u);
Chris@10 312 T2J = VFMA(LDK(KP522616830), T2x, T2I);
Chris@10 313 T2q = VFMA(LDK(KP956723877), T2p, T2i);
Chris@10 314 T2D = VFNMS(LDK(KP522616830), T2i, T2C);
Chris@10 315 {
Chris@10 316 V T3e, T31, T36, T2T;
Chris@10 317 T3e = VFMA(LDK(KP906616052), T30, T2X);
Chris@10 318 T31 = VFNMS(LDK(KP906616052), T30, T2X);
Chris@10 319 T36 = VFNMS(LDK(KP923225144), T2S, T2P);
Chris@10 320 T2T = VFMA(LDK(KP923225144), T2S, T2P);
Chris@10 321 {
Chris@10 322 V T3k, T3d, T3x, T3B;
Chris@10 323 T3k = VFNMS(LDK(KP669429328), T3b, T3c);
Chris@10 324 T3d = VFMA(LDK(KP570584518), T3c, T3b);
Chris@10 325 T3x = VFMA(LDK(KP559016994), T3w, T3v);
Chris@10 326 T3B = VFNMS(LDK(KP559016994), T3w, T3v);
Chris@10 327 {
Chris@10 328 V T2A, T2K, T2r, T2E;
Chris@10 329 T2A = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2z, T2y));
Chris@10 330 T2K = VFNMS(LDK(KP690983005), T2J, T2u);
Chris@10 331 T2r = VFMA(LDK(KP992114701), T2q, T2b);
Chris@10 332 T2E = VFMA(LDK(KP763932022), T2D, T2p);
Chris@10 333 {
Chris@10 334 V T32, T3a, T37, T3h;
Chris@10 335 T32 = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2z, T31));
Chris@10 336 T3a = VFMA(LDK(KP262346850), T31, T2z);
Chris@10 337 T37 = VFNMS(LDK(KP997675361), T36, T35);
Chris@10 338 T3h = VFNMS(LDK(KP904508497), T36, T34);
Chris@10 339 {
Chris@10 340 V T2U, T33, T3l, T3f;
Chris@10 341 T2U = VFMA(LDK(KP949179823), T2T, T2b);
Chris@10 342 T33 = VFNMS(LDK(KP237294955), T2T, T2b);
Chris@10 343 T3l = VFNMS(LDK(KP669429328), T3e, T3k);
Chris@10 344 T3f = VFMA(LDK(KP618033988), T3e, T3d);
Chris@10 345 ST(&(xo[WS(os, 20)]), VFNMSI(T3A, T3x), ovs, &(xo[0]));
Chris@10 346 ST(&(xo[WS(os, 5)]), VFMAI(T3A, T3x), ovs, &(xo[WS(os, 1)]));
Chris@10 347 ST(&(xo[WS(os, 15)]), VFMAI(T3C, T3B), ovs, &(xo[WS(os, 1)]));
Chris@10 348 ST(&(xo[WS(os, 10)]), VFNMSI(T3C, T3B), ovs, &(xo[0]));
Chris@10 349 {
Chris@10 350 V T2L, T2F, T38, T3i;
Chris@10 351 T2L = VFMA(LDK(KP855719849), T2K, T2H);
Chris@10 352 ST(&(xo[WS(os, 3)]), VFMAI(T2A, T2r), ovs, &(xo[WS(os, 1)]));
Chris@10 353 ST(&(xo[WS(os, 22)]), VFNMSI(T2A, T2r), ovs, &(xo[0]));
Chris@10 354 T2F = VFNMS(LDK(KP855719849), T2E, T2B);
Chris@10 355 T38 = VFMA(LDK(KP560319534), T37, T34);
Chris@10 356 T3i = VFNMS(LDK(KP681693190), T3h, T35);
Chris@10 357 ST(&(xo[WS(os, 2)]), VFMAI(T32, T2U), ovs, &(xo[0]));
Chris@10 358 ST(&(xo[WS(os, 23)]), VFNMSI(T32, T2U), ovs, &(xo[WS(os, 1)]));
Chris@10 359 T3m = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3l, T3a));
Chris@10 360 T3g = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3f, T3a));
Chris@10 361 T2M = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2L, T2z));
Chris@10 362 T2G = VFMA(LDK(KP897376177), T2F, T2b);
Chris@10 363 T39 = VFNMS(LDK(KP949179823), T38, T33);
Chris@10 364 T3j = VFNMS(LDK(KP860541664), T3i, T33);
Chris@10 365 T21 = VFMA(LDK(KP447417479), T1O, T20);
Chris@10 366 }
Chris@10 367 }
Chris@10 368 }
Chris@10 369 }
Chris@10 370 }
Chris@10 371 }
Chris@10 372 }
Chris@10 373 }
Chris@10 374 }
Chris@10 375 }
Chris@10 376 }
Chris@10 377 T1P = VFNMS(LDK(KP809385824), T1O, T1L);
Chris@10 378 ST(&(xo[WS(os, 17)]), VFNMSI(T2M, T2G), ovs, &(xo[WS(os, 1)]));
Chris@10 379 ST(&(xo[WS(os, 8)]), VFMAI(T2M, T2G), ovs, &(xo[0]));
Chris@10 380 ST(&(xo[WS(os, 13)]), VFMAI(T3g, T39), ovs, &(xo[WS(os, 1)]));
Chris@10 381 ST(&(xo[WS(os, 12)]), VFNMSI(T3g, T39), ovs, &(xo[0]));
Chris@10 382 ST(&(xo[WS(os, 18)]), VFMAI(T3m, T3j), ovs, &(xo[0]));
Chris@10 383 ST(&(xo[WS(os, 7)]), VFNMSI(T3m, T3j), ovs, &(xo[WS(os, 1)]));
Chris@10 384 T22 = VFMA(LDK(KP690983005), T21, T1L);
Chris@10 385 T27 = VFMA(LDK(KP447417479), T1W, T26);
Chris@10 386 T1X = VFMA(LDK(KP894834959), T1W, T1T);
Chris@10 387 {
Chris@10 388 V T1r, T1s, T1v, T1w;
Chris@10 389 T1r = VFNMS(LDK(KP916574801), T1f, T1e);
Chris@10 390 T1g = VFMA(LDK(KP916574801), T1f, T1e);
Chris@10 391 T1k = VFNMS(LDK(KP831864738), T1j, T1i);
Chris@10 392 T1s = VFMA(LDK(KP831864738), T1j, T1i);
Chris@10 393 T1v = VFNMS(LDK(KP829049696), TF, Tq);
Chris@10 394 TG = VFMA(LDK(KP829049696), TF, Tq);
Chris@10 395 T1b = VFMA(LDK(KP831864738), T1a, TV);
Chris@10 396 T1w = VFNMS(LDK(KP831864738), T1a, TV);
Chris@10 397 T28 = VFNMS(LDK(KP763932022), T27, T1T);
Chris@10 398 T1t = VFMA(LDK(KP904730450), T1s, T1r);
Chris@10 399 T1y = VFNMS(LDK(KP904730450), T1s, T1r);
Chris@10 400 T1x = VFMA(LDK(KP559154169), T1w, T1v);
Chris@10 401 T1E = VFNMS(LDK(KP683113946), T1v, T1w);
Chris@10 402 }
Chris@10 403 }
Chris@10 404 T1Q = VFNMS(LDK(KP992114701), T1P, Tb);
Chris@10 405 T1Y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T1X, T1q));
Chris@10 406 {
Chris@10 407 V T1u, T1F, T1z, T1h, T1c, T23, T29;
Chris@10 408 T23 = VFNMS(LDK(KP999544308), T22, T1Z);
Chris@10 409 T29 = VFNMS(LDK(KP999544308), T28, T25);
Chris@10 410 T1I = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1t, T1q));
Chris@10 411 T1u = VFNMS(LDK(KP242145790), T1t, T1q);
Chris@10 412 T1F = VFMA(LDK(KP617882369), T1y, T1E);
Chris@10 413 T1z = VFMA(LDK(KP559016994), T1y, T1x);
Chris@10 414 T1h = VFNMS(LDK(KP904730450), T1b, TG);
Chris@10 415 T1c = VFMA(LDK(KP904730450), T1b, TG);
Chris@10 416 ST(&(xo[WS(os, 21)]), VFMAI(T1Y, T1Q), ovs, &(xo[WS(os, 1)]));
Chris@10 417 ST(&(xo[WS(os, 4)]), VFNMSI(T1Y, T1Q), ovs, &(xo[0]));
Chris@10 418 T24 = VFNMS(LDK(KP803003575), T23, Tb);
Chris@10 419 T2a = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T29, T1q));
Chris@10 420 T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T1F, T1u));
Chris@10 421 T1A = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1z, T1u));
Chris@10 422 T1l = VFNMS(LDK(KP904730450), T1k, T1h);
Chris@10 423 T1B = VADD(T1g, T1h);
Chris@10 424 T1H = VFMA(LDK(KP968583161), T1c, Tb);
Chris@10 425 T1d = VFNMS(LDK(KP242145790), T1c, Tb);
Chris@10 426 }
Chris@10 427 }
Chris@10 428 }
Chris@10 429 }
Chris@10 430 ST(&(xo[WS(os, 16)]), VFMAI(T2a, T24), ovs, &(xo[0]));
Chris@10 431 ST(&(xo[WS(os, 9)]), VFNMSI(T2a, T24), ovs, &(xo[WS(os, 1)]));
Chris@10 432 {
Chris@10 433 V T1m, T1C, T1n, T1D;
Chris@10 434 T1m = VFNMS(LDK(KP618033988), T1l, T1g);
Chris@10 435 T1C = VFNMS(LDK(KP683113946), T1B, T1k);
Chris@10 436 ST(&(xo[WS(os, 24)]), VFNMSI(T1I, T1H), ovs, &(xo[0]));
Chris@10 437 ST(&(xo[WS(os, 1)]), VFMAI(T1I, T1H), ovs, &(xo[WS(os, 1)]));
Chris@10 438 T1n = VFNMS(LDK(KP876091699), T1m, T1d);
Chris@10 439 T1D = VFMA(LDK(KP792626838), T1C, T1d);
Chris@10 440 ST(&(xo[WS(os, 19)]), VFNMSI(T1A, T1n), ovs, &(xo[WS(os, 1)]));
Chris@10 441 ST(&(xo[WS(os, 6)]), VFMAI(T1A, T1n), ovs, &(xo[0]));
Chris@10 442 ST(&(xo[WS(os, 14)]), VFNMSI(T1G, T1D), ovs, &(xo[0]));
Chris@10 443 ST(&(xo[WS(os, 11)]), VFMAI(T1G, T1D), ovs, &(xo[WS(os, 1)]));
Chris@10 444 }
Chris@10 445 }
Chris@10 446 }
Chris@10 447 VLEAVE();
Chris@10 448 }
Chris@10 449
Chris@10 450 static const kdft_desc desc = { 25, XSIMD_STRING("n1bv_25"), {43, 12, 181, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 451
Chris@10 452 void XSIMD(codelet_n1bv_25) (planner *p) {
Chris@10 453 X(kdft_register) (p, n1bv_25, &desc);
Chris@10 454 }
Chris@10 455
Chris@10 456 #else /* HAVE_FMA */
Chris@10 457
Chris@10 458 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 25 -name n1bv_25 -include n1b.h */
Chris@10 459
Chris@10 460 /*
Chris@10 461 * This function contains 224 FP additions, 140 FP multiplications,
Chris@10 462 * (or, 147 additions, 63 multiplications, 77 fused multiply/add),
Chris@10 463 * 115 stack variables, 40 constants, and 50 memory accesses
Chris@10 464 */
Chris@10 465 #include "n1b.h"
Chris@10 466
Chris@10 467 static void n1bv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 468 {
Chris@10 469 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@10 470 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
Chris@10 471 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@10 472 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
Chris@10 473 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@10 474 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
Chris@10 475 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@10 476 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
Chris@10 477 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@10 478 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
Chris@10 479 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
Chris@10 480 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@10 481 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@10 482 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@10 483 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
Chris@10 484 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@10 485 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@10 486 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@10 487 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
Chris@10 488 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@10 489 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@10 490 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@10 491 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@10 492 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
Chris@10 493 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@10 494 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@10 495 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@10 496 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@10 497 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@10 498 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
Chris@10 499 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@10 500 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
Chris@10 501 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
Chris@10 502 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@10 503 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
Chris@10 504 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
Chris@10 505 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@10 506 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@10 507 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@10 508 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@10 509 {
Chris@10 510 INT i;
Chris@10 511 const R *xi;
Chris@10 512 R *xo;
Chris@10 513 xi = ii;
Chris@10 514 xo = io;
Chris@10 515 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) {
Chris@10 516 V T1b, T2o, T1v, T1e, T2W, T2P, T2Q, T2U, T11, T27, TY, T26, T12, T2f, T1j;
Chris@10 517 V T28, TM, T24, TJ, T23, TN, T2e, T1i, T25, T2M, T2N, T2T, Tm, T1W, Tt;
Chris@10 518 V T1X, Tu, T20, Tw, T1Y, T7, T1U, Te, T1T, Tf, T21, Tx, T1V;
Chris@10 519 {
Chris@10 520 V T1c, T1a, T1t, T17, T1r;
Chris@10 521 T1c = LD(&(xi[0]), ivs, &(xi[0]));
Chris@10 522 {
Chris@10 523 V T18, T19, T15, T16;
Chris@10 524 T18 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@10 525 T19 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
Chris@10 526 T1a = VADD(T18, T19);
Chris@10 527 T1t = VSUB(T18, T19);
Chris@10 528 T15 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@10 529 T16 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
Chris@10 530 T17 = VADD(T15, T16);
Chris@10 531 T1r = VSUB(T15, T16);
Chris@10 532 }
Chris@10 533 {
Chris@10 534 V T2n, T1s, T1u, T1d;
Chris@10 535 T1b = VMUL(LDK(KP559016994), VSUB(T17, T1a));
Chris@10 536 T2n = VMUL(LDK(KP587785252), T1r);
Chris@10 537 T2o = VFNMS(LDK(KP951056516), T1t, T2n);
Chris@10 538 T1s = VMUL(LDK(KP951056516), T1r);
Chris@10 539 T1u = VMUL(LDK(KP587785252), T1t);
Chris@10 540 T1v = VADD(T1s, T1u);
Chris@10 541 T1d = VADD(T17, T1a);
Chris@10 542 T1e = VFNMS(LDK(KP250000000), T1d, T1c);
Chris@10 543 T2W = VADD(T1c, T1d);
Chris@10 544 }
Chris@10 545 }
Chris@10 546 {
Chris@10 547 V TG, TV, TF, TL, TH, TK, TU, T10, TW, TZ, TX, TI;
Chris@10 548 TG = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@10 549 TV = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@10 550 {
Chris@10 551 V Tz, TA, TB, TC, TD, TE;
Chris@10 552 Tz = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@10 553 TA = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
Chris@10 554 TB = VADD(Tz, TA);
Chris@10 555 TC = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@10 556 TD = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
Chris@10 557 TE = VADD(TC, TD);
Chris@10 558 TF = VMUL(LDK(KP559016994), VSUB(TB, TE));
Chris@10 559 TL = VSUB(TC, TD);
Chris@10 560 TH = VADD(TB, TE);
Chris@10 561 TK = VSUB(Tz, TA);
Chris@10 562 }
Chris@10 563 {
Chris@10 564 V TO, TP, TQ, TR, TS, TT;
Chris@10 565 TO = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@10 566 TP = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
Chris@10 567 TQ = VADD(TO, TP);
Chris@10 568 TR = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@10 569 TS = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Chris@10 570 TT = VADD(TR, TS);
Chris@10 571 TU = VMUL(LDK(KP559016994), VSUB(TQ, TT));
Chris@10 572 T10 = VSUB(TR, TS);
Chris@10 573 TW = VADD(TQ, TT);
Chris@10 574 TZ = VSUB(TO, TP);
Chris@10 575 }
Chris@10 576 T2P = VADD(TG, TH);
Chris@10 577 T2Q = VADD(TV, TW);
Chris@10 578 T2U = VADD(T2P, T2Q);
Chris@10 579 T11 = VFMA(LDK(KP475528258), TZ, VMUL(LDK(KP293892626), T10));
Chris@10 580 T27 = VFNMS(LDK(KP475528258), T10, VMUL(LDK(KP293892626), TZ));
Chris@10 581 TX = VFNMS(LDK(KP250000000), TW, TV);
Chris@10 582 TY = VADD(TU, TX);
Chris@10 583 T26 = VSUB(TX, TU);
Chris@10 584 T12 = VFNMS(LDK(KP1_369094211), T11, VMUL(LDK(KP728968627), TY));
Chris@10 585 T2f = VFMA(LDK(KP125581039), T27, VMUL(LDK(KP998026728), T26));
Chris@10 586 T1j = VFMA(LDK(KP1_457937254), T11, VMUL(LDK(KP684547105), TY));
Chris@10 587 T28 = VFNMS(LDK(KP1_996053456), T27, VMUL(LDK(KP062790519), T26));
Chris@10 588 TM = VFMA(LDK(KP475528258), TK, VMUL(LDK(KP293892626), TL));
Chris@10 589 T24 = VFNMS(LDK(KP475528258), TL, VMUL(LDK(KP293892626), TK));
Chris@10 590 TI = VFNMS(LDK(KP250000000), TH, TG);
Chris@10 591 TJ = VADD(TF, TI);
Chris@10 592 T23 = VSUB(TI, TF);
Chris@10 593 TN = VFNMS(LDK(KP963507348), TM, VMUL(LDK(KP876306680), TJ));
Chris@10 594 T2e = VFMA(LDK(KP1_071653589), T24, VMUL(LDK(KP844327925), T23));
Chris@10 595 T1i = VFMA(LDK(KP1_752613360), TM, VMUL(LDK(KP481753674), TJ));
Chris@10 596 T25 = VFNMS(LDK(KP1_688655851), T24, VMUL(LDK(KP535826794), T23));
Chris@10 597 }
Chris@10 598 {
Chris@10 599 V Tb, Tq, T3, Tc, T6, Ta, Ti, Tr, Tl, Tp, Ts, Td;
Chris@10 600 Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@10 601 Tq = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@10 602 {
Chris@10 603 V T1, T2, T8, T4, T5, T9;
Chris@10 604 T1 = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@10 605 T2 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
Chris@10 606 T8 = VADD(T1, T2);
Chris@10 607 T4 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@10 608 T5 = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Chris@10 609 T9 = VADD(T4, T5);
Chris@10 610 T3 = VSUB(T1, T2);
Chris@10 611 Tc = VADD(T8, T9);
Chris@10 612 T6 = VSUB(T4, T5);
Chris@10 613 Ta = VMUL(LDK(KP559016994), VSUB(T8, T9));
Chris@10 614 }
Chris@10 615 {
Chris@10 616 V Tg, Th, Tn, Tj, Tk, To;
Chris@10 617 Tg = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@10 618 Th = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
Chris@10 619 Tn = VADD(Tg, Th);
Chris@10 620 Tj = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@10 621 Tk = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
Chris@10 622 To = VADD(Tj, Tk);
Chris@10 623 Ti = VSUB(Tg, Th);
Chris@10 624 Tr = VADD(Tn, To);
Chris@10 625 Tl = VSUB(Tj, Tk);
Chris@10 626 Tp = VMUL(LDK(KP559016994), VSUB(Tn, To));
Chris@10 627 }
Chris@10 628 T2M = VADD(Tq, Tr);
Chris@10 629 T2N = VADD(Tb, Tc);
Chris@10 630 T2T = VADD(T2M, T2N);
Chris@10 631 Tm = VFMA(LDK(KP475528258), Ti, VMUL(LDK(KP293892626), Tl));
Chris@10 632 T1W = VFNMS(LDK(KP475528258), Tl, VMUL(LDK(KP293892626), Ti));
Chris@10 633 Ts = VFNMS(LDK(KP250000000), Tr, Tq);
Chris@10 634 Tt = VADD(Tp, Ts);
Chris@10 635 T1X = VSUB(Ts, Tp);
Chris@10 636 Tu = VFMA(LDK(KP1_937166322), Tm, VMUL(LDK(KP248689887), Tt));
Chris@10 637 T20 = VFNMS(LDK(KP963507348), T1W, VMUL(LDK(KP876306680), T1X));
Chris@10 638 Tw = VFNMS(LDK(KP497379774), Tm, VMUL(LDK(KP968583161), Tt));
Chris@10 639 T1Y = VFMA(LDK(KP1_752613360), T1W, VMUL(LDK(KP481753674), T1X));
Chris@10 640 T7 = VFMA(LDK(KP475528258), T3, VMUL(LDK(KP293892626), T6));
Chris@10 641 T1U = VFNMS(LDK(KP475528258), T6, VMUL(LDK(KP293892626), T3));
Chris@10 642 Td = VFNMS(LDK(KP250000000), Tc, Tb);
Chris@10 643 Te = VADD(Ta, Td);
Chris@10 644 T1T = VSUB(Td, Ta);
Chris@10 645 Tf = VFMA(LDK(KP1_071653589), T7, VMUL(LDK(KP844327925), Te));
Chris@10 646 T21 = VFMA(LDK(KP1_809654104), T1U, VMUL(LDK(KP425779291), T1T));
Chris@10 647 Tx = VFNMS(LDK(KP1_688655851), T7, VMUL(LDK(KP535826794), Te));
Chris@10 648 T1V = VFNMS(LDK(KP851558583), T1U, VMUL(LDK(KP904827052), T1T));
Chris@10 649 }
Chris@10 650 {
Chris@10 651 V T2V, T2X, T2Y, T2S, T30, T2O, T2R, T31, T2Z;
Chris@10 652 T2V = VMUL(LDK(KP559016994), VSUB(T2T, T2U));
Chris@10 653 T2X = VADD(T2T, T2U);
Chris@10 654 T2Y = VFNMS(LDK(KP250000000), T2X, T2W);
Chris@10 655 T2O = VSUB(T2M, T2N);
Chris@10 656 T2R = VSUB(T2P, T2Q);
Chris@10 657 T2S = VBYI(VFMA(LDK(KP951056516), T2O, VMUL(LDK(KP587785252), T2R)));
Chris@10 658 T30 = VBYI(VFNMS(LDK(KP951056516), T2R, VMUL(LDK(KP587785252), T2O)));
Chris@10 659 ST(&(xo[0]), VADD(T2W, T2X), ovs, &(xo[0]));
Chris@10 660 T31 = VSUB(T2Y, T2V);
Chris@10 661 ST(&(xo[WS(os, 10)]), VADD(T30, T31), ovs, &(xo[0]));
Chris@10 662 ST(&(xo[WS(os, 15)]), VSUB(T31, T30), ovs, &(xo[WS(os, 1)]));
Chris@10 663 T2Z = VADD(T2V, T2Y);
Chris@10 664 ST(&(xo[WS(os, 5)]), VADD(T2S, T2Z), ovs, &(xo[WS(os, 1)]));
Chris@10 665 ST(&(xo[WS(os, 20)]), VSUB(T2Z, T2S), ovs, &(xo[0]));
Chris@10 666 }
Chris@10 667 {
Chris@10 668 V T1Z, T2i, T2j, T2g, T2w, T2x, T2y, T2G, T2H, T2I, T2D, T2E, T2F, T2z, T2A;
Chris@10 669 V T2B, T2p, T2m, T2q, T2b, T2c, T2a, T2d, T2h, T2r;
Chris@10 670 T1Z = VSUB(T1V, T1Y);
Chris@10 671 T2i = VADD(T20, T21);
Chris@10 672 T2j = VSUB(T25, T28);
Chris@10 673 T2g = VSUB(T2e, T2f);
Chris@10 674 T2w = VFMA(LDK(KP1_369094211), T1W, VMUL(LDK(KP728968627), T1X));
Chris@10 675 T2x = VFNMS(LDK(KP992114701), T1T, VMUL(LDK(KP250666467), T1U));
Chris@10 676 T2y = VADD(T2w, T2x);
Chris@10 677 T2G = VFNMS(LDK(KP125581039), T24, VMUL(LDK(KP998026728), T23));
Chris@10 678 T2H = VFMA(LDK(KP1_274847979), T27, VMUL(LDK(KP770513242), T26));
Chris@10 679 T2I = VADD(T2G, T2H);
Chris@10 680 T2D = VFNMS(LDK(KP1_457937254), T1W, VMUL(LDK(KP684547105), T1X));
Chris@10 681 T2E = VFMA(LDK(KP1_984229402), T1U, VMUL(LDK(KP125333233), T1T));
Chris@10 682 T2F = VADD(T2D, T2E);
Chris@10 683 T2z = VFMA(LDK(KP1_996053456), T24, VMUL(LDK(KP062790519), T23));
Chris@10 684 T2A = VFNMS(LDK(KP637423989), T26, VMUL(LDK(KP1_541026485), T27));
Chris@10 685 T2B = VADD(T2z, T2A);
Chris@10 686 {
Chris@10 687 V T2k, T2l, T22, T29;
Chris@10 688 T2k = VADD(T1Y, T1V);
Chris@10 689 T2l = VADD(T2e, T2f);
Chris@10 690 T2p = VADD(T2k, T2l);
Chris@10 691 T2m = VMUL(LDK(KP559016994), VSUB(T2k, T2l));
Chris@10 692 T2q = VFNMS(LDK(KP250000000), T2p, T2o);
Chris@10 693 T2b = VSUB(T1e, T1b);
Chris@10 694 T22 = VSUB(T20, T21);
Chris@10 695 T29 = VADD(T25, T28);
Chris@10 696 T2c = VADD(T22, T29);
Chris@10 697 T2a = VMUL(LDK(KP559016994), VSUB(T22, T29));
Chris@10 698 T2d = VFNMS(LDK(KP250000000), T2c, T2b);
Chris@10 699 }
Chris@10 700 {
Chris@10 701 V T2u, T2v, T2C, T2J;
Chris@10 702 T2u = VADD(T2b, T2c);
Chris@10 703 T2v = VBYI(VADD(T2o, T2p));
Chris@10 704 ST(&(xo[WS(os, 23)]), VSUB(T2u, T2v), ovs, &(xo[WS(os, 1)]));
Chris@10 705 ST(&(xo[WS(os, 2)]), VADD(T2u, T2v), ovs, &(xo[0]));
Chris@10 706 T2C = VADD(T2b, VADD(T2y, T2B));
Chris@10 707 T2J = VBYI(VSUB(VADD(T2F, T2I), T2o));
Chris@10 708 ST(&(xo[WS(os, 22)]), VSUB(T2C, T2J), ovs, &(xo[0]));
Chris@10 709 ST(&(xo[WS(os, 3)]), VADD(T2C, T2J), ovs, &(xo[WS(os, 1)]));
Chris@10 710 }
Chris@10 711 T2h = VFMA(LDK(KP951056516), T1Z, VADD(T2a, VFNMS(LDK(KP587785252), T2g, T2d)));
Chris@10 712 T2r = VBYI(VADD(VFMA(LDK(KP951056516), T2i, VMUL(LDK(KP587785252), T2j)), VADD(T2m, T2q)));
Chris@10 713 ST(&(xo[WS(os, 18)]), VSUB(T2h, T2r), ovs, &(xo[0]));
Chris@10 714 ST(&(xo[WS(os, 7)]), VADD(T2h, T2r), ovs, &(xo[WS(os, 1)]));
Chris@10 715 {
Chris@10 716 V T2s, T2t, T2K, T2L;
Chris@10 717 T2s = VFMA(LDK(KP587785252), T1Z, VFMA(LDK(KP951056516), T2g, VSUB(T2d, T2a)));
Chris@10 718 T2t = VBYI(VADD(VFNMS(LDK(KP951056516), T2j, VMUL(LDK(KP587785252), T2i)), VSUB(T2q, T2m)));
Chris@10 719 ST(&(xo[WS(os, 13)]), VSUB(T2s, T2t), ovs, &(xo[WS(os, 1)]));
Chris@10 720 ST(&(xo[WS(os, 12)]), VADD(T2s, T2t), ovs, &(xo[0]));
Chris@10 721 T2K = VBYI(VSUB(VFMA(LDK(KP951056516), VSUB(T2w, T2x), VFMA(LDK(KP309016994), T2F, VFNMS(LDK(KP809016994), T2I, VMUL(LDK(KP587785252), VSUB(T2z, T2A))))), T2o));
Chris@10 722 T2L = VFMA(LDK(KP309016994), T2y, VFMA(LDK(KP951056516), VSUB(T2E, T2D), VFMA(LDK(KP587785252), VSUB(T2H, T2G), VFNMS(LDK(KP809016994), T2B, T2b))));
Chris@10 723 ST(&(xo[WS(os, 8)]), VADD(T2K, T2L), ovs, &(xo[0]));
Chris@10 724 ST(&(xo[WS(os, 17)]), VSUB(T2L, T2K), ovs, &(xo[WS(os, 1)]));
Chris@10 725 }
Chris@10 726 }
Chris@10 727 {
Chris@10 728 V Tv, T1m, T1n, T1k, T1D, T1E, T1F, T1N, T1O, T1P, T1K, T1L, T1M, T1G, T1H;
Chris@10 729 V T1I, T1w, T1q, T1x, T1f, T1g, T14, T1h, T1l, T1y;
Chris@10 730 Tv = VSUB(Tf, Tu);
Chris@10 731 T1m = VSUB(Tw, Tx);
Chris@10 732 T1n = VSUB(TN, T12);
Chris@10 733 T1k = VSUB(T1i, T1j);
Chris@10 734 T1D = VFMA(LDK(KP1_688655851), Tm, VMUL(LDK(KP535826794), Tt));
Chris@10 735 T1E = VFMA(LDK(KP1_541026485), T7, VMUL(LDK(KP637423989), Te));
Chris@10 736 T1F = VSUB(T1D, T1E);
Chris@10 737 T1N = VFMA(LDK(KP851558583), TM, VMUL(LDK(KP904827052), TJ));
Chris@10 738 T1O = VFMA(LDK(KP1_984229402), T11, VMUL(LDK(KP125333233), TY));
Chris@10 739 T1P = VADD(T1N, T1O);
Chris@10 740 T1K = VFNMS(LDK(KP1_071653589), Tm, VMUL(LDK(KP844327925), Tt));
Chris@10 741 T1L = VFNMS(LDK(KP770513242), Te, VMUL(LDK(KP1_274847979), T7));
Chris@10 742 T1M = VADD(T1K, T1L);
Chris@10 743 T1G = VFNMS(LDK(KP425779291), TJ, VMUL(LDK(KP1_809654104), TM));
Chris@10 744 T1H = VFNMS(LDK(KP992114701), TY, VMUL(LDK(KP250666467), T11));
Chris@10 745 T1I = VADD(T1G, T1H);
Chris@10 746 {
Chris@10 747 V T1o, T1p, Ty, T13;
Chris@10 748 T1o = VADD(Tu, Tf);
Chris@10 749 T1p = VADD(T1i, T1j);
Chris@10 750 T1w = VADD(T1o, T1p);
Chris@10 751 T1q = VMUL(LDK(KP559016994), VSUB(T1o, T1p));
Chris@10 752 T1x = VFNMS(LDK(KP250000000), T1w, T1v);
Chris@10 753 T1f = VADD(T1b, T1e);
Chris@10 754 Ty = VADD(Tw, Tx);
Chris@10 755 T13 = VADD(TN, T12);
Chris@10 756 T1g = VADD(Ty, T13);
Chris@10 757 T14 = VMUL(LDK(KP559016994), VSUB(Ty, T13));
Chris@10 758 T1h = VFNMS(LDK(KP250000000), T1g, T1f);
Chris@10 759 }
Chris@10 760 {
Chris@10 761 V T1B, T1C, T1J, T1Q;
Chris@10 762 T1B = VADD(T1f, T1g);
Chris@10 763 T1C = VBYI(VADD(T1v, T1w));
Chris@10 764 ST(&(xo[WS(os, 24)]), VSUB(T1B, T1C), ovs, &(xo[0]));
Chris@10 765 ST(&(xo[WS(os, 1)]), VADD(T1B, T1C), ovs, &(xo[WS(os, 1)]));
Chris@10 766 T1J = VADD(T1f, VADD(T1F, T1I));
Chris@10 767 T1Q = VBYI(VSUB(VADD(T1M, T1P), T1v));
Chris@10 768 ST(&(xo[WS(os, 21)]), VSUB(T1J, T1Q), ovs, &(xo[WS(os, 1)]));
Chris@10 769 ST(&(xo[WS(os, 4)]), VADD(T1J, T1Q), ovs, &(xo[0]));
Chris@10 770 }
Chris@10 771 T1l = VFMA(LDK(KP951056516), Tv, VADD(T14, VFNMS(LDK(KP587785252), T1k, T1h)));
Chris@10 772 T1y = VBYI(VADD(VFMA(LDK(KP951056516), T1m, VMUL(LDK(KP587785252), T1n)), VADD(T1q, T1x)));
Chris@10 773 ST(&(xo[WS(os, 19)]), VSUB(T1l, T1y), ovs, &(xo[WS(os, 1)]));
Chris@10 774 ST(&(xo[WS(os, 6)]), VADD(T1l, T1y), ovs, &(xo[0]));
Chris@10 775 {
Chris@10 776 V T1z, T1A, T1R, T1S;
Chris@10 777 T1z = VFMA(LDK(KP587785252), Tv, VFMA(LDK(KP951056516), T1k, VSUB(T1h, T14)));
Chris@10 778 T1A = VBYI(VADD(VFNMS(LDK(KP951056516), T1n, VMUL(LDK(KP587785252), T1m)), VSUB(T1x, T1q)));
Chris@10 779 ST(&(xo[WS(os, 14)]), VSUB(T1z, T1A), ovs, &(xo[0]));
Chris@10 780 ST(&(xo[WS(os, 11)]), VADD(T1z, T1A), ovs, &(xo[WS(os, 1)]));
Chris@10 781 T1R = VBYI(VSUB(VFMA(LDK(KP309016994), T1M, VFMA(LDK(KP951056516), VADD(T1D, T1E), VFNMS(LDK(KP809016994), T1P, VMUL(LDK(KP587785252), VSUB(T1G, T1H))))), T1v));
Chris@10 782 T1S = VFMA(LDK(KP951056516), VSUB(T1L, T1K), VFMA(LDK(KP309016994), T1F, VFMA(LDK(KP587785252), VSUB(T1O, T1N), VFNMS(LDK(KP809016994), T1I, T1f))));
Chris@10 783 ST(&(xo[WS(os, 9)]), VADD(T1R, T1S), ovs, &(xo[WS(os, 1)]));
Chris@10 784 ST(&(xo[WS(os, 16)]), VSUB(T1S, T1R), ovs, &(xo[0]));
Chris@10 785 }
Chris@10 786 }
Chris@10 787 }
Chris@10 788 }
Chris@10 789 VLEAVE();
Chris@10 790 }
Chris@10 791
Chris@10 792 static const kdft_desc desc = { 25, XSIMD_STRING("n1bv_25"), {147, 63, 77, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 793
Chris@10 794 void XSIMD(codelet_n1bv_25) (planner *p) {
Chris@10 795 X(kdft_register) (p, n1bv_25, &desc);
Chris@10 796 }
Chris@10 797
Chris@10 798 #endif /* HAVE_FMA */