annotate src/fftw-3.3.5/dft/simd/common/n1fv_25.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:38:51 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name n1fv_25 -include n1f.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 224 FP additions, 193 FP multiplications,
Chris@42 32 * (or, 43 additions, 12 multiplications, 181 fused multiply/add),
Chris@42 33 * 215 stack variables, 67 constants, and 50 memory accesses
Chris@42 34 */
Chris@42 35 #include "n1f.h"
Chris@42 36
Chris@42 37 static void n1fv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DVK(KP792626838, +0.792626838241819413632131824093538848057784557);
Chris@42 40 DVK(KP876091699, +0.876091699473550838204498029706869638173524346);
Chris@42 41 DVK(KP803003575, +0.803003575438660414833440593570376004635464850);
Chris@42 42 DVK(KP617882369, +0.617882369114440893914546919006756321695042882);
Chris@42 43 DVK(KP242145790, +0.242145790282157779872542093866183953459003101);
Chris@42 44 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 45 DVK(KP999544308, +0.999544308746292983948881682379742149196758193);
Chris@42 46 DVK(KP683113946, +0.683113946453479238701949862233725244439656928);
Chris@42 47 DVK(KP559154169, +0.559154169276087864842202529084232643714075927);
Chris@42 48 DVK(KP904730450, +0.904730450839922351881287709692877908104763647);
Chris@42 49 DVK(KP829049696, +0.829049696159252993975487806364305442437946767);
Chris@42 50 DVK(KP831864738, +0.831864738706457140726048799369896829771167132);
Chris@42 51 DVK(KP916574801, +0.916574801383451584742370439148878693530976769);
Chris@42 52 DVK(KP894834959, +0.894834959464455102997960030820114611498661386);
Chris@42 53 DVK(KP809385824, +0.809385824416008241660603814668679683846476688);
Chris@42 54 DVK(KP447417479, +0.447417479732227551498980015410057305749330693);
Chris@42 55 DVK(KP860541664, +0.860541664367944677098261680920518816412804187);
Chris@42 56 DVK(KP897376177, +0.897376177523557693138608077137219684419427330);
Chris@42 57 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 58 DVK(KP681693190, +0.681693190061530575150324149145440022633095390);
Chris@42 59 DVK(KP560319534, +0.560319534973832390111614715371676131169633784);
Chris@42 60 DVK(KP855719849, +0.855719849902058969314654733608091555096772472);
Chris@42 61 DVK(KP237294955, +0.237294955877110315393888866460840817927895961);
Chris@42 62 DVK(KP949179823, +0.949179823508441261575555465843363271711583843);
Chris@42 63 DVK(KP904508497, +0.904508497187473712051146708591409529430077295);
Chris@42 64 DVK(KP997675361, +0.997675361079556513670859573984492383596555031);
Chris@42 65 DVK(KP262346850, +0.262346850930607871785420028382979691334784273);
Chris@42 66 DVK(KP763932022, +0.763932022500210303590826331268723764559381640);
Chris@42 67 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 68 DVK(KP690983005, +0.690983005625052575897706582817180941139845410);
Chris@42 69 DVK(KP952936919, +0.952936919628306576880750665357914584765951388);
Chris@42 70 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 71 DVK(KP570584518, +0.570584518783621657366766175430996792655723863);
Chris@42 72 DVK(KP669429328, +0.669429328479476605641803240971985825917022098);
Chris@42 73 DVK(KP923225144, +0.923225144846402650453449441572664695995209956);
Chris@42 74 DVK(KP906616052, +0.906616052148196230441134447086066874408359177);
Chris@42 75 DVK(KP956723877, +0.956723877038460305821989399535483155872969262);
Chris@42 76 DVK(KP522616830, +0.522616830205754336872861364785224694908468440);
Chris@42 77 DVK(KP945422727, +0.945422727388575946270360266328811958657216298);
Chris@42 78 DVK(KP912575812, +0.912575812670962425556968549836277086778922727);
Chris@42 79 DVK(KP982009705, +0.982009705009746369461829878184175962711969869);
Chris@42 80 DVK(KP921078979, +0.921078979742360627699756128143719920817673854);
Chris@42 81 DVK(KP734762448, +0.734762448793050413546343770063151342619912334);
Chris@42 82 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 83 DVK(KP958953096, +0.958953096729998668045963838399037225970891871);
Chris@42 84 DVK(KP867381224, +0.867381224396525206773171885031575671309956167);
Chris@42 85 DVK(KP269969613, +0.269969613759572083574752974412347470060951301);
Chris@42 86 DVK(KP244189809, +0.244189809627953270309879511234821255780225091);
Chris@42 87 DVK(KP845997307, +0.845997307939530944175097360758058292389769300);
Chris@42 88 DVK(KP772036680, +0.772036680810363904029489473607579825330539880);
Chris@42 89 DVK(KP132830569, +0.132830569247582714407653942074819768844536507);
Chris@42 90 DVK(KP120146378, +0.120146378570687701782758537356596213647956445);
Chris@42 91 DVK(KP987388751, +0.987388751065621252324603216482382109400433949);
Chris@42 92 DVK(KP893101515, +0.893101515366181661711202267938416198338079437);
Chris@42 93 DVK(KP786782374, +0.786782374965295178365099601674911834788448471);
Chris@42 94 DVK(KP869845200, +0.869845200362138853122720822420327157933056305);
Chris@42 95 DVK(KP447533225, +0.447533225982656890041886979663652563063114397);
Chris@42 96 DVK(KP494780565, +0.494780565770515410344588413655324772219443730);
Chris@42 97 DVK(KP578046249, +0.578046249379945007321754579646815604023525655);
Chris@42 98 DVK(KP522847744, +0.522847744331509716623755382187077770911012542);
Chris@42 99 DVK(KP059835404, +0.059835404262124915169548397419498386427871950);
Chris@42 100 DVK(KP066152395, +0.066152395967733048213034281011006031460903353);
Chris@42 101 DVK(KP603558818, +0.603558818296015001454675132653458027918768137);
Chris@42 102 DVK(KP667278218, +0.667278218140296670899089292254759909713898805);
Chris@42 103 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 104 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 105 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@42 106 {
Chris@42 107 INT i;
Chris@42 108 const R *xi;
Chris@42 109 R *xo;
Chris@42 110 xi = ri;
Chris@42 111 xo = ro;
Chris@42 112 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) {
Chris@42 113 V T1g, T1k, T1I, T24, T2a, T1G, T1A, T1l, T1B, T1H, T1d;
Chris@42 114 {
Chris@42 115 V T2z, T1q, Ta, T9, T3n, Ty, Tl, T2O, T2W, T2l, T2s, TV, T1i, T1K, T1S;
Chris@42 116 V T3z, T3t, Tk, T3o, Tp, T2g, T2N, T2V, T2o, T2t, T1a, T1j, T1J, T1R, Tz;
Chris@42 117 V Tt, TA, Tw;
Chris@42 118 {
Chris@42 119 V T1, T5, T6, T2, T3;
Chris@42 120 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@42 121 T5 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@42 122 T6 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
Chris@42 123 T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@42 124 T3 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
Chris@42 125 {
Chris@42 126 V TH, TW, TK, TS, T10, T8, TN, TT, T17, TZ, T11;
Chris@42 127 TH = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@42 128 TW = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@42 129 {
Chris@42 130 V TI, TJ, TL, T7, T1p, T4, T1o, TM, TX, TY;
Chris@42 131 TI = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@42 132 TJ = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
Chris@42 133 TL = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@42 134 T7 = VADD(T5, T6);
Chris@42 135 T1p = VSUB(T5, T6);
Chris@42 136 T4 = VADD(T2, T3);
Chris@42 137 T1o = VSUB(T2, T3);
Chris@42 138 TM = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
Chris@42 139 TX = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@42 140 TK = VADD(TI, TJ);
Chris@42 141 TS = VSUB(TI, TJ);
Chris@42 142 TY = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
Chris@42 143 T10 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@42 144 T2z = VFNMS(LDK(KP618033988), T1o, T1p);
Chris@42 145 T1q = VFMA(LDK(KP618033988), T1p, T1o);
Chris@42 146 Ta = VSUB(T4, T7);
Chris@42 147 T8 = VADD(T4, T7);
Chris@42 148 TN = VADD(TL, TM);
Chris@42 149 TT = VSUB(TM, TL);
Chris@42 150 T17 = VSUB(TX, TY);
Chris@42 151 TZ = VADD(TX, TY);
Chris@42 152 T11 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Chris@42 153 }
Chris@42 154 {
Chris@42 155 V Tc, T2m, T19, Tn, To, Tr, Tj, T16, T2n, Ts, Tu, Tv;
Chris@42 156 {
Chris@42 157 V TU, T2j, TO, TQ, T12, T18;
Chris@42 158 Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@42 159 T9 = VFNMS(LDK(KP250000000), T8, T1);
Chris@42 160 T3n = VADD(T1, T8);
Chris@42 161 TU = VFNMS(LDK(KP618033988), TT, TS);
Chris@42 162 T2j = VFMA(LDK(KP618033988), TS, TT);
Chris@42 163 TO = VADD(TK, TN);
Chris@42 164 TQ = VSUB(TN, TK);
Chris@42 165 T12 = VADD(T10, T11);
Chris@42 166 T18 = VSUB(T10, T11);
Chris@42 167 Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@42 168 {
Chris@42 169 V T3r, T15, T13, Tf, Ti, T2k, TR, TP, T3s, T14;
Chris@42 170 {
Chris@42 171 V Td, Te, Tg, Th;
Chris@42 172 Td = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@42 173 Te = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
Chris@42 174 Tg = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@42 175 Th = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
Chris@42 176 TP = VFNMS(LDK(KP250000000), TO, TH);
Chris@42 177 T3r = VADD(TH, TO);
Chris@42 178 T2m = VFNMS(LDK(KP618033988), T17, T18);
Chris@42 179 T19 = VFMA(LDK(KP618033988), T18, T17);
Chris@42 180 T15 = VSUB(T12, TZ);
Chris@42 181 T13 = VADD(TZ, T12);
Chris@42 182 Tf = VADD(Td, Te);
Chris@42 183 Tn = VSUB(Td, Te);
Chris@42 184 To = VSUB(Th, Tg);
Chris@42 185 Ti = VADD(Tg, Th);
Chris@42 186 }
Chris@42 187 T2k = VFMA(LDK(KP559016994), TQ, TP);
Chris@42 188 TR = VFNMS(LDK(KP559016994), TQ, TP);
Chris@42 189 Tr = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
Chris@42 190 T3s = VADD(TW, T13);
Chris@42 191 T14 = VFNMS(LDK(KP250000000), T13, TW);
Chris@42 192 Tj = VADD(Tf, Ti);
Chris@42 193 Tl = VSUB(Tf, Ti);
Chris@42 194 T2O = VFNMS(LDK(KP667278218), T2k, T2j);
Chris@42 195 T2W = VFMA(LDK(KP603558818), T2j, T2k);
Chris@42 196 T2l = VFMA(LDK(KP066152395), T2k, T2j);
Chris@42 197 T2s = VFNMS(LDK(KP059835404), T2j, T2k);
Chris@42 198 TV = VFNMS(LDK(KP522847744), TU, TR);
Chris@42 199 T1i = VFMA(LDK(KP578046249), TR, TU);
Chris@42 200 T1K = VFNMS(LDK(KP494780565), TR, TU);
Chris@42 201 T1S = VFMA(LDK(KP447533225), TU, TR);
Chris@42 202 T16 = VFNMS(LDK(KP559016994), T15, T14);
Chris@42 203 T2n = VFMA(LDK(KP559016994), T15, T14);
Chris@42 204 T3z = VSUB(T3r, T3s);
Chris@42 205 T3t = VADD(T3r, T3s);
Chris@42 206 Ts = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@42 207 Tu = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Chris@42 208 Tv = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@42 209 }
Chris@42 210 }
Chris@42 211 Tk = VFNMS(LDK(KP250000000), Tj, Tc);
Chris@42 212 T3o = VADD(Tc, Tj);
Chris@42 213 Tp = VFNMS(LDK(KP618033988), To, Tn);
Chris@42 214 T2g = VFMA(LDK(KP618033988), Tn, To);
Chris@42 215 T2N = VFMA(LDK(KP066152395), T2n, T2m);
Chris@42 216 T2V = VFNMS(LDK(KP059835404), T2m, T2n);
Chris@42 217 T2o = VFMA(LDK(KP869845200), T2n, T2m);
Chris@42 218 T2t = VFNMS(LDK(KP786782374), T2m, T2n);
Chris@42 219 T1a = VFNMS(LDK(KP893101515), T19, T16);
Chris@42 220 T1j = VFMA(LDK(KP987388751), T16, T19);
Chris@42 221 T1J = VFNMS(LDK(KP120146378), T19, T16);
Chris@42 222 T1R = VFMA(LDK(KP132830569), T16, T19);
Chris@42 223 Tz = VADD(Ts, Tr);
Chris@42 224 Tt = VSUB(Tr, Ts);
Chris@42 225 TA = VADD(Tv, Tu);
Chris@42 226 Tw = VSUB(Tu, Tv);
Chris@42 227 }
Chris@42 228 }
Chris@42 229 }
Chris@42 230 {
Chris@42 231 V T2p, T2I, T2u, T2C, Tx, T2d, T2X, T34, T2P, T3b, T2b, Tb, T2Q, T2Z, T2h;
Chris@42 232 V T2w, Tq, T1e, T1M, T1U, TE, T2c, T3q, T3y;
Chris@42 233 T2p = VFNMS(LDK(KP772036680), T2o, T2l);
Chris@42 234 T2I = VFMA(LDK(KP772036680), T2o, T2l);
Chris@42 235 T2u = VFMA(LDK(KP772036680), T2t, T2s);
Chris@42 236 T2C = VFNMS(LDK(KP772036680), T2t, T2s);
Chris@42 237 {
Chris@42 238 V TD, TB, Tm, T2f, T3p, TC;
Chris@42 239 Tx = VFMA(LDK(KP618033988), Tw, Tt);
Chris@42 240 T2d = VFNMS(LDK(KP618033988), Tt, Tw);
Chris@42 241 TD = VSUB(Tz, TA);
Chris@42 242 TB = VADD(Tz, TA);
Chris@42 243 Tm = VFMA(LDK(KP559016994), Tl, Tk);
Chris@42 244 T2f = VFNMS(LDK(KP559016994), Tl, Tk);
Chris@42 245 T2X = VFMA(LDK(KP845997307), T2W, T2V);
Chris@42 246 T34 = VFNMS(LDK(KP845997307), T2W, T2V);
Chris@42 247 T2P = VFNMS(LDK(KP845997307), T2O, T2N);
Chris@42 248 T3b = VFMA(LDK(KP845997307), T2O, T2N);
Chris@42 249 T2b = VFNMS(LDK(KP559016994), Ta, T9);
Chris@42 250 Tb = VFMA(LDK(KP559016994), Ta, T9);
Chris@42 251 T3p = VADD(Ty, TB);
Chris@42 252 TC = VFMS(LDK(KP250000000), TB, Ty);
Chris@42 253 T2Q = VFNMS(LDK(KP522847744), T2g, T2f);
Chris@42 254 T2Z = VFMA(LDK(KP578046249), T2f, T2g);
Chris@42 255 T2h = VFMA(LDK(KP893101515), T2g, T2f);
Chris@42 256 T2w = VFNMS(LDK(KP987388751), T2f, T2g);
Chris@42 257 Tq = VFNMS(LDK(KP244189809), Tp, Tm);
Chris@42 258 T1e = VFMA(LDK(KP269969613), Tm, Tp);
Chris@42 259 T1M = VFMA(LDK(KP667278218), Tm, Tp);
Chris@42 260 T1U = VFNMS(LDK(KP603558818), Tp, Tm);
Chris@42 261 TE = VFNMS(LDK(KP559016994), TD, TC);
Chris@42 262 T2c = VFMA(LDK(KP559016994), TD, TC);
Chris@42 263 T3q = VADD(T3o, T3p);
Chris@42 264 T3y = VSUB(T3o, T3p);
Chris@42 265 }
Chris@42 266 {
Chris@42 267 V T1Z, T25, T1P, T22, T1X, TG, T1b, T28, T1t, T1y, T1x, T1E, T1Q, T1Y;
Chris@42 268 {
Chris@42 269 V T26, T1L, T1T, TF, T1f, T1W, T3m, T3g, T2M, T2G, T39, T3j, T21, T1O, T20;
Chris@42 270 V T27;
Chris@42 271 T26 = VFMA(LDK(KP867381224), T1K, T1J);
Chris@42 272 T1L = VFNMS(LDK(KP867381224), T1K, T1J);
Chris@42 273 T20 = VFNMS(LDK(KP958953096), T1S, T1R);
Chris@42 274 T1T = VFMA(LDK(KP958953096), T1S, T1R);
Chris@42 275 {
Chris@42 276 V T2R, T2Y, T2e, T2v, T1N, T1V;
Chris@42 277 T2R = VFNMS(LDK(KP494780565), T2c, T2d);
Chris@42 278 T2Y = VFMA(LDK(KP447533225), T2d, T2c);
Chris@42 279 T2e = VFMA(LDK(KP120146378), T2d, T2c);
Chris@42 280 T2v = VFNMS(LDK(KP132830569), T2c, T2d);
Chris@42 281 TF = VFNMS(LDK(KP667278218), TE, Tx);
Chris@42 282 T1f = VFMA(LDK(KP603558818), Tx, TE);
Chris@42 283 T1N = VFMA(LDK(KP869845200), TE, Tx);
Chris@42 284 T1V = VFNMS(LDK(KP786782374), Tx, TE);
Chris@42 285 {
Chris@42 286 V T3A, T3C, T3w, T3u;
Chris@42 287 T3A = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3z, T3y));
Chris@42 288 T3C = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3y, T3z));
Chris@42 289 T3w = VSUB(T3q, T3t);
Chris@42 290 T3u = VADD(T3q, T3t);
Chris@42 291 {
Chris@42 292 V T2B, T2x, T2H, T2i;
Chris@42 293 T2B = VFMA(LDK(KP734762448), T2w, T2v);
Chris@42 294 T2x = VFNMS(LDK(KP734762448), T2w, T2v);
Chris@42 295 T2H = VFNMS(LDK(KP734762448), T2h, T2e);
Chris@42 296 T2i = VFMA(LDK(KP734762448), T2h, T2e);
Chris@42 297 {
Chris@42 298 V T30, T35, T3c, T2S, T3v;
Chris@42 299 T30 = VFNMS(LDK(KP921078979), T2Z, T2Y);
Chris@42 300 T35 = VFMA(LDK(KP921078979), T2Z, T2Y);
Chris@42 301 T3c = VFMA(LDK(KP982009705), T2R, T2Q);
Chris@42 302 T2S = VFNMS(LDK(KP982009705), T2R, T2Q);
Chris@42 303 T1W = VFMA(LDK(KP912575812), T1V, T1U);
Chris@42 304 T1Z = VFNMS(LDK(KP912575812), T1V, T1U);
Chris@42 305 T1O = VFMA(LDK(KP912575812), T1N, T1M);
Chris@42 306 T25 = VFNMS(LDK(KP912575812), T1N, T1M);
Chris@42 307 ST(&(xo[0]), VADD(T3u, T3n), ovs, &(xo[0]));
Chris@42 308 T3v = VFNMS(LDK(KP250000000), T3u, T3n);
Chris@42 309 {
Chris@42 310 V T2y, T2J, T2q, T2D;
Chris@42 311 T2y = VFMA(LDK(KP945422727), T2x, T2u);
Chris@42 312 T2J = VFMA(LDK(KP522616830), T2x, T2I);
Chris@42 313 T2q = VFMA(LDK(KP956723877), T2p, T2i);
Chris@42 314 T2D = VFNMS(LDK(KP522616830), T2i, T2C);
Chris@42 315 {
Chris@42 316 V T3e, T31, T36, T2T;
Chris@42 317 T3e = VFMA(LDK(KP906616052), T30, T2X);
Chris@42 318 T31 = VFNMS(LDK(KP906616052), T30, T2X);
Chris@42 319 T36 = VFNMS(LDK(KP923225144), T2S, T2P);
Chris@42 320 T2T = VFMA(LDK(KP923225144), T2S, T2P);
Chris@42 321 {
Chris@42 322 V T3k, T3d, T3x, T3B;
Chris@42 323 T3k = VFNMS(LDK(KP669429328), T3b, T3c);
Chris@42 324 T3d = VFMA(LDK(KP570584518), T3c, T3b);
Chris@42 325 T3x = VFMA(LDK(KP559016994), T3w, T3v);
Chris@42 326 T3B = VFNMS(LDK(KP559016994), T3w, T3v);
Chris@42 327 {
Chris@42 328 V T2A, T2K, T2r, T2E;
Chris@42 329 T2A = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2z, T2y));
Chris@42 330 T2K = VFNMS(LDK(KP690983005), T2J, T2u);
Chris@42 331 T2r = VFMA(LDK(KP992114701), T2q, T2b);
Chris@42 332 T2E = VFMA(LDK(KP763932022), T2D, T2p);
Chris@42 333 {
Chris@42 334 V T32, T3a, T37, T3h;
Chris@42 335 T32 = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2z, T31));
Chris@42 336 T3a = VFMA(LDK(KP262346850), T31, T2z);
Chris@42 337 T37 = VFNMS(LDK(KP997675361), T36, T35);
Chris@42 338 T3h = VFNMS(LDK(KP904508497), T36, T34);
Chris@42 339 {
Chris@42 340 V T2U, T33, T3l, T3f;
Chris@42 341 T2U = VFMA(LDK(KP949179823), T2T, T2b);
Chris@42 342 T33 = VFNMS(LDK(KP237294955), T2T, T2b);
Chris@42 343 T3l = VFNMS(LDK(KP669429328), T3e, T3k);
Chris@42 344 T3f = VFMA(LDK(KP618033988), T3e, T3d);
Chris@42 345 ST(&(xo[WS(os, 20)]), VFMAI(T3A, T3x), ovs, &(xo[0]));
Chris@42 346 ST(&(xo[WS(os, 5)]), VFNMSI(T3A, T3x), ovs, &(xo[WS(os, 1)]));
Chris@42 347 ST(&(xo[WS(os, 15)]), VFNMSI(T3C, T3B), ovs, &(xo[WS(os, 1)]));
Chris@42 348 ST(&(xo[WS(os, 10)]), VFMAI(T3C, T3B), ovs, &(xo[0]));
Chris@42 349 {
Chris@42 350 V T2L, T2F, T38, T3i;
Chris@42 351 T2L = VFMA(LDK(KP855719849), T2K, T2H);
Chris@42 352 ST(&(xo[WS(os, 22)]), VFMAI(T2A, T2r), ovs, &(xo[0]));
Chris@42 353 ST(&(xo[WS(os, 3)]), VFNMSI(T2A, T2r), ovs, &(xo[WS(os, 1)]));
Chris@42 354 T2F = VFNMS(LDK(KP855719849), T2E, T2B);
Chris@42 355 T38 = VFMA(LDK(KP560319534), T37, T34);
Chris@42 356 T3i = VFNMS(LDK(KP681693190), T3h, T35);
Chris@42 357 ST(&(xo[WS(os, 23)]), VFMAI(T32, T2U), ovs, &(xo[WS(os, 1)]));
Chris@42 358 ST(&(xo[WS(os, 2)]), VFNMSI(T32, T2U), ovs, &(xo[0]));
Chris@42 359 T3m = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3l, T3a));
Chris@42 360 T3g = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3f, T3a));
Chris@42 361 T2M = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2L, T2z));
Chris@42 362 T2G = VFMA(LDK(KP897376177), T2F, T2b);
Chris@42 363 T39 = VFNMS(LDK(KP949179823), T38, T33);
Chris@42 364 T3j = VFNMS(LDK(KP860541664), T3i, T33);
Chris@42 365 T21 = VFMA(LDK(KP447417479), T1O, T20);
Chris@42 366 }
Chris@42 367 }
Chris@42 368 }
Chris@42 369 }
Chris@42 370 }
Chris@42 371 }
Chris@42 372 }
Chris@42 373 }
Chris@42 374 }
Chris@42 375 }
Chris@42 376 }
Chris@42 377 T1P = VFNMS(LDK(KP809385824), T1O, T1L);
Chris@42 378 ST(&(xo[WS(os, 17)]), VFMAI(T2M, T2G), ovs, &(xo[WS(os, 1)]));
Chris@42 379 ST(&(xo[WS(os, 8)]), VFNMSI(T2M, T2G), ovs, &(xo[0]));
Chris@42 380 ST(&(xo[WS(os, 12)]), VFMAI(T3g, T39), ovs, &(xo[0]));
Chris@42 381 ST(&(xo[WS(os, 13)]), VFNMSI(T3g, T39), ovs, &(xo[WS(os, 1)]));
Chris@42 382 ST(&(xo[WS(os, 7)]), VFMAI(T3m, T3j), ovs, &(xo[WS(os, 1)]));
Chris@42 383 ST(&(xo[WS(os, 18)]), VFNMSI(T3m, T3j), ovs, &(xo[0]));
Chris@42 384 T22 = VFMA(LDK(KP690983005), T21, T1L);
Chris@42 385 T27 = VFMA(LDK(KP447417479), T1W, T26);
Chris@42 386 T1X = VFMA(LDK(KP894834959), T1W, T1T);
Chris@42 387 {
Chris@42 388 V T1r, T1s, T1v, T1w;
Chris@42 389 T1r = VFNMS(LDK(KP916574801), T1f, T1e);
Chris@42 390 T1g = VFMA(LDK(KP916574801), T1f, T1e);
Chris@42 391 T1k = VFNMS(LDK(KP831864738), T1j, T1i);
Chris@42 392 T1s = VFMA(LDK(KP831864738), T1j, T1i);
Chris@42 393 T1v = VFNMS(LDK(KP829049696), TF, Tq);
Chris@42 394 TG = VFMA(LDK(KP829049696), TF, Tq);
Chris@42 395 T1b = VFMA(LDK(KP831864738), T1a, TV);
Chris@42 396 T1w = VFNMS(LDK(KP831864738), T1a, TV);
Chris@42 397 T28 = VFNMS(LDK(KP763932022), T27, T1T);
Chris@42 398 T1t = VFMA(LDK(KP904730450), T1s, T1r);
Chris@42 399 T1y = VFNMS(LDK(KP904730450), T1s, T1r);
Chris@42 400 T1x = VFMA(LDK(KP559154169), T1w, T1v);
Chris@42 401 T1E = VFNMS(LDK(KP683113946), T1v, T1w);
Chris@42 402 }
Chris@42 403 }
Chris@42 404 T1Q = VFNMS(LDK(KP992114701), T1P, Tb);
Chris@42 405 T1Y = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T1X, T1q));
Chris@42 406 {
Chris@42 407 V T1u, T1F, T1z, T1h, T1c, T23, T29;
Chris@42 408 T23 = VFNMS(LDK(KP999544308), T22, T1Z);
Chris@42 409 T29 = VFNMS(LDK(KP999544308), T28, T25);
Chris@42 410 T1I = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1t, T1q));
Chris@42 411 T1u = VFNMS(LDK(KP242145790), T1t, T1q);
Chris@42 412 T1F = VFMA(LDK(KP617882369), T1y, T1E);
Chris@42 413 T1z = VFMA(LDK(KP559016994), T1y, T1x);
Chris@42 414 T1h = VFNMS(LDK(KP904730450), T1b, TG);
Chris@42 415 T1c = VFMA(LDK(KP904730450), T1b, TG);
Chris@42 416 ST(&(xo[WS(os, 21)]), VFNMSI(T1Y, T1Q), ovs, &(xo[WS(os, 1)]));
Chris@42 417 ST(&(xo[WS(os, 4)]), VFMAI(T1Y, T1Q), ovs, &(xo[0]));
Chris@42 418 T24 = VFNMS(LDK(KP803003575), T23, Tb);
Chris@42 419 T2a = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T29, T1q));
Chris@42 420 T1G = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T1F, T1u));
Chris@42 421 T1A = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1z, T1u));
Chris@42 422 T1l = VFNMS(LDK(KP904730450), T1k, T1h);
Chris@42 423 T1B = VADD(T1g, T1h);
Chris@42 424 T1H = VFMA(LDK(KP968583161), T1c, Tb);
Chris@42 425 T1d = VFNMS(LDK(KP242145790), T1c, Tb);
Chris@42 426 }
Chris@42 427 }
Chris@42 428 }
Chris@42 429 }
Chris@42 430 ST(&(xo[WS(os, 9)]), VFMAI(T2a, T24), ovs, &(xo[WS(os, 1)]));
Chris@42 431 ST(&(xo[WS(os, 16)]), VFNMSI(T2a, T24), ovs, &(xo[0]));
Chris@42 432 {
Chris@42 433 V T1m, T1C, T1n, T1D;
Chris@42 434 T1m = VFNMS(LDK(KP618033988), T1l, T1g);
Chris@42 435 T1C = VFNMS(LDK(KP683113946), T1B, T1k);
Chris@42 436 ST(&(xo[WS(os, 24)]), VFMAI(T1I, T1H), ovs, &(xo[0]));
Chris@42 437 ST(&(xo[WS(os, 1)]), VFNMSI(T1I, T1H), ovs, &(xo[WS(os, 1)]));
Chris@42 438 T1n = VFNMS(LDK(KP876091699), T1m, T1d);
Chris@42 439 T1D = VFMA(LDK(KP792626838), T1C, T1d);
Chris@42 440 ST(&(xo[WS(os, 19)]), VFMAI(T1A, T1n), ovs, &(xo[WS(os, 1)]));
Chris@42 441 ST(&(xo[WS(os, 6)]), VFNMSI(T1A, T1n), ovs, &(xo[0]));
Chris@42 442 ST(&(xo[WS(os, 14)]), VFMAI(T1G, T1D), ovs, &(xo[0]));
Chris@42 443 ST(&(xo[WS(os, 11)]), VFNMSI(T1G, T1D), ovs, &(xo[WS(os, 1)]));
Chris@42 444 }
Chris@42 445 }
Chris@42 446 }
Chris@42 447 VLEAVE();
Chris@42 448 }
Chris@42 449
Chris@42 450 static const kdft_desc desc = { 25, XSIMD_STRING("n1fv_25"), {43, 12, 181, 0}, &GENUS, 0, 0, 0, 0 };
Chris@42 451
Chris@42 452 void XSIMD(codelet_n1fv_25) (planner *p) {
Chris@42 453 X(kdft_register) (p, n1fv_25, &desc);
Chris@42 454 }
Chris@42 455
Chris@42 456 #else /* HAVE_FMA */
Chris@42 457
Chris@42 458 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name n1fv_25 -include n1f.h */
Chris@42 459
Chris@42 460 /*
Chris@42 461 * This function contains 224 FP additions, 140 FP multiplications,
Chris@42 462 * (or, 146 additions, 62 multiplications, 78 fused multiply/add),
Chris@42 463 * 115 stack variables, 40 constants, and 50 memory accesses
Chris@42 464 */
Chris@42 465 #include "n1f.h"
Chris@42 466
Chris@42 467 static void n1fv_25(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 468 {
Chris@42 469 DVK(KP809016994, +0.809016994374947424102293417182819058860154590);
Chris@42 470 DVK(KP309016994, +0.309016994374947424102293417182819058860154590);
Chris@42 471 DVK(KP770513242, +0.770513242775789230803009636396177847271667672);
Chris@42 472 DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596);
Chris@42 473 DVK(KP992114701, +0.992114701314477831049793042785778521453036709);
Chris@42 474 DVK(KP250666467, +0.250666467128608490746237519633017587885836494);
Chris@42 475 DVK(KP637423989, +0.637423989748689710176712811676016195434917298);
Chris@42 476 DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344);
Chris@42 477 DVK(KP125333233, +0.125333233564304245373118759816508793942918247);
Chris@42 478 DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418);
Chris@42 479 DVK(KP248689887, +0.248689887164854788242283746006447968417567406);
Chris@42 480 DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806);
Chris@42 481 DVK(KP497379774, +0.497379774329709576484567492012895936835134813);
Chris@42 482 DVK(KP968583161, +0.968583161128631119490168375464735813836012403);
Chris@42 483 DVK(KP904827052, +0.904827052466019527713668647932697593970413911);
Chris@42 484 DVK(KP851558583, +0.851558583130145297725004891488503407959946084);
Chris@42 485 DVK(KP425779291, +0.425779291565072648862502445744251703979973042);
Chris@42 486 DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822);
Chris@42 487 DVK(KP844327925, +0.844327925502015078548558063966681505381659241);
Chris@42 488 DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691);
Chris@42 489 DVK(KP481753674, +0.481753674101715274987191502872129653528542010);
Chris@42 490 DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477);
Chris@42 491 DVK(KP535826794, +0.535826794978996618271308767867639978063575346);
Chris@42 492 DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483);
Chris@42 493 DVK(KP963507348, +0.963507348203430549974383005744259307057084020);
Chris@42 494 DVK(KP876306680, +0.876306680043863587308115903922062583399064238);
Chris@42 495 DVK(KP998026728, +0.998026728428271561952336806863450553336905220);
Chris@42 496 DVK(KP125581039, +0.125581039058626752152356449131262266244969664);
Chris@42 497 DVK(KP684547105, +0.684547105928688673732283357621209269889519233);
Chris@42 498 DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328);
Chris@42 499 DVK(KP062790519, +0.062790519529313376076178224565631133122484832);
Chris@42 500 DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439);
Chris@42 501 DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465);
Chris@42 502 DVK(KP728968627, +0.728968627421411523146730319055259111372571664);
Chris@42 503 DVK(KP293892626, +0.293892626146236564584352977319536384298826219);
Chris@42 504 DVK(KP475528258, +0.475528258147576786058219666689691071702849317);
Chris@42 505 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@42 506 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@42 507 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@42 508 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@42 509 {
Chris@42 510 INT i;
Chris@42 511 const R *xi;
Chris@42 512 R *xo;
Chris@42 513 xi = ri;
Chris@42 514 xo = ro;
Chris@42 515 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(50, is), MAKE_VOLATILE_STRIDE(50, os)) {
Chris@42 516 V T7, T1g, T26, Ta, T2R, T2N, T2O, T2P, T19, T1Y, T16, T1Z, T1a, T2v, T1l;
Chris@42 517 V T2m, TU, T21, TR, T22, TV, T2u, T1k, T2l, T2K, T2L, T2M, TE, T1R, TB;
Chris@42 518 V T1S, TF, T2r, T1i, T2j, Tp, T1U, Tm, T1V, Tq, T2s, T1h, T2i;
Chris@42 519 {
Chris@42 520 V T8, T6, T1f, T3, T1e, T25, T9;
Chris@42 521 T8 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@42 522 {
Chris@42 523 V T4, T5, T1, T2;
Chris@42 524 T4 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@42 525 T5 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
Chris@42 526 T6 = VADD(T4, T5);
Chris@42 527 T1f = VSUB(T4, T5);
Chris@42 528 T1 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@42 529 T2 = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
Chris@42 530 T3 = VADD(T1, T2);
Chris@42 531 T1e = VSUB(T1, T2);
Chris@42 532 }
Chris@42 533 T7 = VMUL(LDK(KP559016994), VSUB(T3, T6));
Chris@42 534 T1g = VFMA(LDK(KP951056516), T1e, VMUL(LDK(KP587785252), T1f));
Chris@42 535 T25 = VMUL(LDK(KP951056516), T1f);
Chris@42 536 T26 = VFNMS(LDK(KP587785252), T1e, T25);
Chris@42 537 T9 = VADD(T3, T6);
Chris@42 538 Ta = VFNMS(LDK(KP250000000), T9, T8);
Chris@42 539 T2R = VADD(T8, T9);
Chris@42 540 }
Chris@42 541 {
Chris@42 542 V TO, T13, TN, TT, TP, TS, T12, T18, T14, T17, T15, TQ;
Chris@42 543 TO = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@42 544 T13 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@42 545 {
Chris@42 546 V TH, TI, TJ, TK, TL, TM;
Chris@42 547 TH = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@42 548 TI = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
Chris@42 549 TJ = VADD(TH, TI);
Chris@42 550 TK = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@42 551 TL = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
Chris@42 552 TM = VADD(TK, TL);
Chris@42 553 TN = VMUL(LDK(KP559016994), VSUB(TJ, TM));
Chris@42 554 TT = VSUB(TK, TL);
Chris@42 555 TP = VADD(TJ, TM);
Chris@42 556 TS = VSUB(TH, TI);
Chris@42 557 }
Chris@42 558 {
Chris@42 559 V TW, TX, TY, TZ, T10, T11;
Chris@42 560 TW = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@42 561 TX = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
Chris@42 562 TY = VADD(TW, TX);
Chris@42 563 TZ = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@42 564 T10 = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Chris@42 565 T11 = VADD(TZ, T10);
Chris@42 566 T12 = VMUL(LDK(KP559016994), VSUB(TY, T11));
Chris@42 567 T18 = VSUB(TZ, T10);
Chris@42 568 T14 = VADD(TY, T11);
Chris@42 569 T17 = VSUB(TW, TX);
Chris@42 570 }
Chris@42 571 T2N = VADD(TO, TP);
Chris@42 572 T2O = VADD(T13, T14);
Chris@42 573 T2P = VADD(T2N, T2O);
Chris@42 574 T19 = VFMA(LDK(KP475528258), T17, VMUL(LDK(KP293892626), T18));
Chris@42 575 T1Y = VFNMS(LDK(KP293892626), T17, VMUL(LDK(KP475528258), T18));
Chris@42 576 T15 = VFNMS(LDK(KP250000000), T14, T13);
Chris@42 577 T16 = VADD(T12, T15);
Chris@42 578 T1Z = VSUB(T15, T12);
Chris@42 579 T1a = VFNMS(LDK(KP1_369094211), T19, VMUL(LDK(KP728968627), T16));
Chris@42 580 T2v = VFMA(LDK(KP1_996053456), T1Y, VMUL(LDK(KP062790519), T1Z));
Chris@42 581 T1l = VFMA(LDK(KP1_457937254), T19, VMUL(LDK(KP684547105), T16));
Chris@42 582 T2m = VFNMS(LDK(KP998026728), T1Z, VMUL(LDK(KP125581039), T1Y));
Chris@42 583 TU = VFMA(LDK(KP475528258), TS, VMUL(LDK(KP293892626), TT));
Chris@42 584 T21 = VFNMS(LDK(KP293892626), TS, VMUL(LDK(KP475528258), TT));
Chris@42 585 TQ = VFNMS(LDK(KP250000000), TP, TO);
Chris@42 586 TR = VADD(TN, TQ);
Chris@42 587 T22 = VSUB(TQ, TN);
Chris@42 588 TV = VFNMS(LDK(KP963507348), TU, VMUL(LDK(KP876306680), TR));
Chris@42 589 T2u = VFMA(LDK(KP1_688655851), T21, VMUL(LDK(KP535826794), T22));
Chris@42 590 T1k = VFMA(LDK(KP1_752613360), TU, VMUL(LDK(KP481753674), TR));
Chris@42 591 T2l = VFNMS(LDK(KP844327925), T22, VMUL(LDK(KP1_071653589), T21));
Chris@42 592 }
Chris@42 593 {
Chris@42 594 V Tj, Ty, Ti, To, Tk, Tn, Tx, TD, Tz, TC, TA, Tl;
Chris@42 595 Tj = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@42 596 Ty = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@42 597 {
Chris@42 598 V Tc, Td, Te, Tf, Tg, Th;
Chris@42 599 Tc = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@42 600 Td = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
Chris@42 601 Te = VADD(Tc, Td);
Chris@42 602 Tf = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@42 603 Tg = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
Chris@42 604 Th = VADD(Tf, Tg);
Chris@42 605 Ti = VMUL(LDK(KP559016994), VSUB(Te, Th));
Chris@42 606 To = VSUB(Tf, Tg);
Chris@42 607 Tk = VADD(Te, Th);
Chris@42 608 Tn = VSUB(Tc, Td);
Chris@42 609 }
Chris@42 610 {
Chris@42 611 V Tr, Ts, Tt, Tu, Tv, Tw;
Chris@42 612 Tr = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@42 613 Ts = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
Chris@42 614 Tt = VADD(Tr, Ts);
Chris@42 615 Tu = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@42 616 Tv = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Chris@42 617 Tw = VADD(Tu, Tv);
Chris@42 618 Tx = VMUL(LDK(KP559016994), VSUB(Tt, Tw));
Chris@42 619 TD = VSUB(Tu, Tv);
Chris@42 620 Tz = VADD(Tt, Tw);
Chris@42 621 TC = VSUB(Tr, Ts);
Chris@42 622 }
Chris@42 623 T2K = VADD(Tj, Tk);
Chris@42 624 T2L = VADD(Ty, Tz);
Chris@42 625 T2M = VADD(T2K, T2L);
Chris@42 626 TE = VFMA(LDK(KP475528258), TC, VMUL(LDK(KP293892626), TD));
Chris@42 627 T1R = VFNMS(LDK(KP293892626), TC, VMUL(LDK(KP475528258), TD));
Chris@42 628 TA = VFNMS(LDK(KP250000000), Tz, Ty);
Chris@42 629 TB = VADD(Tx, TA);
Chris@42 630 T1S = VSUB(TA, Tx);
Chris@42 631 TF = VFNMS(LDK(KP1_688655851), TE, VMUL(LDK(KP535826794), TB));
Chris@42 632 T2r = VFNMS(LDK(KP425779291), T1S, VMUL(LDK(KP1_809654104), T1R));
Chris@42 633 T1i = VFMA(LDK(KP1_071653589), TE, VMUL(LDK(KP844327925), TB));
Chris@42 634 T2j = VFMA(LDK(KP851558583), T1R, VMUL(LDK(KP904827052), T1S));
Chris@42 635 Tp = VFMA(LDK(KP475528258), Tn, VMUL(LDK(KP293892626), To));
Chris@42 636 T1U = VFNMS(LDK(KP293892626), Tn, VMUL(LDK(KP475528258), To));
Chris@42 637 Tl = VFNMS(LDK(KP250000000), Tk, Tj);
Chris@42 638 Tm = VADD(Ti, Tl);
Chris@42 639 T1V = VSUB(Tl, Ti);
Chris@42 640 Tq = VFNMS(LDK(KP497379774), Tp, VMUL(LDK(KP968583161), Tm));
Chris@42 641 T2s = VFMA(LDK(KP963507348), T1U, VMUL(LDK(KP876306680), T1V));
Chris@42 642 T1h = VFMA(LDK(KP1_937166322), Tp, VMUL(LDK(KP248689887), Tm));
Chris@42 643 T2i = VFNMS(LDK(KP481753674), T1V, VMUL(LDK(KP1_752613360), T1U));
Chris@42 644 }
Chris@42 645 {
Chris@42 646 V T2Q, T2S, T2T, T2X, T2Y, T2V, T2W, T2Z, T2U;
Chris@42 647 T2Q = VMUL(LDK(KP559016994), VSUB(T2M, T2P));
Chris@42 648 T2S = VADD(T2M, T2P);
Chris@42 649 T2T = VFNMS(LDK(KP250000000), T2S, T2R);
Chris@42 650 T2V = VSUB(T2K, T2L);
Chris@42 651 T2W = VSUB(T2N, T2O);
Chris@42 652 T2X = VBYI(VFMA(LDK(KP951056516), T2V, VMUL(LDK(KP587785252), T2W)));
Chris@42 653 T2Y = VBYI(VFNMS(LDK(KP587785252), T2V, VMUL(LDK(KP951056516), T2W)));
Chris@42 654 ST(&(xo[0]), VADD(T2R, T2S), ovs, &(xo[0]));
Chris@42 655 T2Z = VSUB(T2T, T2Q);
Chris@42 656 ST(&(xo[WS(os, 10)]), VADD(T2Y, T2Z), ovs, &(xo[0]));
Chris@42 657 ST(&(xo[WS(os, 15)]), VSUB(T2Z, T2Y), ovs, &(xo[WS(os, 1)]));
Chris@42 658 T2U = VADD(T2Q, T2T);
Chris@42 659 ST(&(xo[WS(os, 5)]), VSUB(T2U, T2X), ovs, &(xo[WS(os, 1)]));
Chris@42 660 ST(&(xo[WS(os, 20)]), VADD(T2X, T2U), ovs, &(xo[0]));
Chris@42 661 }
Chris@42 662 {
Chris@42 663 V T2t, T2y, T2z, T2w, T1T, T1W, T1X, T2c, T2d, T2e, T29, T2a, T2b, T20, T23;
Chris@42 664 V T24, T2p, T2o, T2q, T28, T2D, T2C, T2E, T2x, T2F;
Chris@42 665 T2t = VSUB(T2r, T2s);
Chris@42 666 T2y = VADD(T2i, T2j);
Chris@42 667 T2z = VSUB(T2l, T2m);
Chris@42 668 T2w = VSUB(T2u, T2v);
Chris@42 669 T1T = VFNMS(LDK(KP125333233), T1S, VMUL(LDK(KP1_984229402), T1R));
Chris@42 670 T1W = VFMA(LDK(KP1_457937254), T1U, VMUL(LDK(KP684547105), T1V));
Chris@42 671 T1X = VSUB(T1T, T1W);
Chris@42 672 T2c = VFNMS(LDK(KP1_996053456), T21, VMUL(LDK(KP062790519), T22));
Chris@42 673 T2d = VFMA(LDK(KP1_541026485), T1Y, VMUL(LDK(KP637423989), T1Z));
Chris@42 674 T2e = VSUB(T2c, T2d);
Chris@42 675 T29 = VFNMS(LDK(KP1_369094211), T1U, VMUL(LDK(KP728968627), T1V));
Chris@42 676 T2a = VFMA(LDK(KP250666467), T1R, VMUL(LDK(KP992114701), T1S));
Chris@42 677 T2b = VSUB(T29, T2a);
Chris@42 678 T20 = VFNMS(LDK(KP770513242), T1Z, VMUL(LDK(KP1_274847979), T1Y));
Chris@42 679 T23 = VFMA(LDK(KP125581039), T21, VMUL(LDK(KP998026728), T22));
Chris@42 680 T24 = VSUB(T20, T23);
Chris@42 681 {
Chris@42 682 V T2k, T2n, T2A, T2B;
Chris@42 683 T2k = VSUB(T2i, T2j);
Chris@42 684 T2n = VADD(T2l, T2m);
Chris@42 685 T2p = VADD(T2k, T2n);
Chris@42 686 T2o = VMUL(LDK(KP559016994), VSUB(T2k, T2n));
Chris@42 687 T2q = VFNMS(LDK(KP250000000), T2p, T26);
Chris@42 688 T28 = VSUB(Ta, T7);
Chris@42 689 T2A = VADD(T2s, T2r);
Chris@42 690 T2B = VADD(T2u, T2v);
Chris@42 691 T2D = VADD(T2A, T2B);
Chris@42 692 T2C = VMUL(LDK(KP559016994), VSUB(T2A, T2B));
Chris@42 693 T2E = VFNMS(LDK(KP250000000), T2D, T28);
Chris@42 694 }
Chris@42 695 {
Chris@42 696 V T2I, T2J, T27, T2f;
Chris@42 697 T2I = VBYI(VADD(T26, T2p));
Chris@42 698 T2J = VADD(T28, T2D);
Chris@42 699 ST(&(xo[WS(os, 2)]), VADD(T2I, T2J), ovs, &(xo[0]));
Chris@42 700 ST(&(xo[WS(os, 23)]), VSUB(T2J, T2I), ovs, &(xo[WS(os, 1)]));
Chris@42 701 T27 = VBYI(VSUB(VADD(T1X, T24), T26));
Chris@42 702 T2f = VADD(T28, VADD(T2b, T2e));
Chris@42 703 ST(&(xo[WS(os, 3)]), VADD(T27, T2f), ovs, &(xo[WS(os, 1)]));
Chris@42 704 ST(&(xo[WS(os, 22)]), VSUB(T2f, T27), ovs, &(xo[0]));
Chris@42 705 }
Chris@42 706 T2x = VBYI(VADD(T2o, VADD(T2q, VFNMS(LDK(KP587785252), T2w, VMUL(LDK(KP951056516), T2t)))));
Chris@42 707 T2F = VFMA(LDK(KP951056516), T2y, VFMA(LDK(KP587785252), T2z, VADD(T2C, T2E)));
Chris@42 708 ST(&(xo[WS(os, 7)]), VADD(T2x, T2F), ovs, &(xo[WS(os, 1)]));
Chris@42 709 ST(&(xo[WS(os, 18)]), VSUB(T2F, T2x), ovs, &(xo[0]));
Chris@42 710 {
Chris@42 711 V T2G, T2H, T2g, T2h;
Chris@42 712 T2G = VBYI(VADD(T2q, VSUB(VFMA(LDK(KP587785252), T2t, VMUL(LDK(KP951056516), T2w)), T2o)));
Chris@42 713 T2H = VFMA(LDK(KP587785252), T2y, VSUB(VFNMS(LDK(KP951056516), T2z, T2E), T2C));
Chris@42 714 ST(&(xo[WS(os, 12)]), VADD(T2G, T2H), ovs, &(xo[0]));
Chris@42 715 ST(&(xo[WS(os, 13)]), VSUB(T2H, T2G), ovs, &(xo[WS(os, 1)]));
Chris@42 716 T2g = VFMA(LDK(KP309016994), T2b, VFNMS(LDK(KP809016994), T2e, VFNMS(LDK(KP587785252), VADD(T23, T20), VFNMS(LDK(KP951056516), VADD(T1W, T1T), T28))));
Chris@42 717 T2h = VBYI(VSUB(VFNMS(LDK(KP587785252), VADD(T2c, T2d), VFNMS(LDK(KP809016994), T24, VFNMS(LDK(KP951056516), VADD(T29, T2a), VMUL(LDK(KP309016994), T1X)))), T26));
Chris@42 718 ST(&(xo[WS(os, 17)]), VSUB(T2g, T2h), ovs, &(xo[WS(os, 1)]));
Chris@42 719 ST(&(xo[WS(os, 8)]), VADD(T2g, T2h), ovs, &(xo[0]));
Chris@42 720 }
Chris@42 721 }
Chris@42 722 {
Chris@42 723 V T1p, T1u, T1w, T1q, T1B, T1C, T1D, T1L, T1M, T1N, T1I, T1J, T1K, T1E, T1F;
Chris@42 724 V T1G, T1n, T1r, T1s, Tb, T1c, T1v, T1x, T1t, T1y;
Chris@42 725 T1p = VSUB(TF, Tq);
Chris@42 726 T1u = VSUB(T1i, T1h);
Chris@42 727 T1w = VSUB(T1l, T1k);
Chris@42 728 T1q = VSUB(TV, T1a);
Chris@42 729 T1B = VFMA(LDK(KP1_688655851), Tp, VMUL(LDK(KP535826794), Tm));
Chris@42 730 T1C = VFMA(LDK(KP1_541026485), TE, VMUL(LDK(KP637423989), TB));
Chris@42 731 T1D = VSUB(T1B, T1C);
Chris@42 732 T1L = VFMA(LDK(KP851558583), TU, VMUL(LDK(KP904827052), TR));
Chris@42 733 T1M = VFMA(LDK(KP1_984229402), T19, VMUL(LDK(KP125333233), T16));
Chris@42 734 T1N = VADD(T1L, T1M);
Chris@42 735 T1I = VFNMS(LDK(KP844327925), Tm, VMUL(LDK(KP1_071653589), Tp));
Chris@42 736 T1J = VFNMS(LDK(KP1_274847979), TE, VMUL(LDK(KP770513242), TB));
Chris@42 737 T1K = VADD(T1I, T1J);
Chris@42 738 T1E = VFNMS(LDK(KP425779291), TR, VMUL(LDK(KP1_809654104), TU));
Chris@42 739 T1F = VFNMS(LDK(KP992114701), T16, VMUL(LDK(KP250666467), T19));
Chris@42 740 T1G = VADD(T1E, T1F);
Chris@42 741 {
Chris@42 742 V T1j, T1m, TG, T1b;
Chris@42 743 T1j = VADD(T1h, T1i);
Chris@42 744 T1m = VADD(T1k, T1l);
Chris@42 745 T1n = VADD(T1j, T1m);
Chris@42 746 T1r = VFMS(LDK(KP250000000), T1n, T1g);
Chris@42 747 T1s = VMUL(LDK(KP559016994), VSUB(T1m, T1j));
Chris@42 748 Tb = VADD(T7, Ta);
Chris@42 749 TG = VADD(Tq, TF);
Chris@42 750 T1b = VADD(TV, T1a);
Chris@42 751 T1c = VADD(TG, T1b);
Chris@42 752 T1v = VFNMS(LDK(KP250000000), T1c, Tb);
Chris@42 753 T1x = VMUL(LDK(KP559016994), VSUB(TG, T1b));
Chris@42 754 }
Chris@42 755 {
Chris@42 756 V T1d, T1o, T1H, T1O;
Chris@42 757 T1d = VADD(Tb, T1c);
Chris@42 758 T1o = VBYI(VADD(T1g, T1n));
Chris@42 759 ST(&(xo[WS(os, 1)]), VSUB(T1d, T1o), ovs, &(xo[WS(os, 1)]));
Chris@42 760 ST(&(xo[WS(os, 24)]), VADD(T1d, T1o), ovs, &(xo[0]));
Chris@42 761 T1H = VADD(Tb, VADD(T1D, T1G));
Chris@42 762 T1O = VBYI(VADD(T1g, VSUB(T1K, T1N)));
Chris@42 763 ST(&(xo[WS(os, 21)]), VSUB(T1H, T1O), ovs, &(xo[WS(os, 1)]));
Chris@42 764 ST(&(xo[WS(os, 4)]), VADD(T1H, T1O), ovs, &(xo[0]));
Chris@42 765 }
Chris@42 766 T1t = VBYI(VADD(VFMA(LDK(KP587785252), T1p, VMUL(LDK(KP951056516), T1q)), VSUB(T1r, T1s)));
Chris@42 767 T1y = VFMA(LDK(KP587785252), T1u, VFNMS(LDK(KP951056516), T1w, VSUB(T1v, T1x)));
Chris@42 768 ST(&(xo[WS(os, 11)]), VADD(T1t, T1y), ovs, &(xo[WS(os, 1)]));
Chris@42 769 ST(&(xo[WS(os, 14)]), VSUB(T1y, T1t), ovs, &(xo[0]));
Chris@42 770 {
Chris@42 771 V T1z, T1A, T1P, T1Q;
Chris@42 772 T1z = VBYI(VADD(VFNMS(LDK(KP587785252), T1q, VMUL(LDK(KP951056516), T1p)), VADD(T1r, T1s)));
Chris@42 773 T1A = VFMA(LDK(KP951056516), T1u, VADD(T1x, VFMA(LDK(KP587785252), T1w, T1v)));
Chris@42 774 ST(&(xo[WS(os, 6)]), VADD(T1z, T1A), ovs, &(xo[0]));
Chris@42 775 ST(&(xo[WS(os, 19)]), VSUB(T1A, T1z), ovs, &(xo[WS(os, 1)]));
Chris@42 776 T1P = VBYI(VADD(T1g, VFMA(LDK(KP309016994), T1K, VFMA(LDK(KP587785252), VSUB(T1F, T1E), VFNMS(LDK(KP951056516), VADD(T1B, T1C), VMUL(LDK(KP809016994), T1N))))));
Chris@42 777 T1Q = VFMA(LDK(KP309016994), T1D, VFMA(LDK(KP951056516), VSUB(T1I, T1J), VFMA(LDK(KP587785252), VSUB(T1M, T1L), VFNMS(LDK(KP809016994), T1G, Tb))));
Chris@42 778 ST(&(xo[WS(os, 9)]), VADD(T1P, T1Q), ovs, &(xo[WS(os, 1)]));
Chris@42 779 ST(&(xo[WS(os, 16)]), VSUB(T1Q, T1P), ovs, &(xo[0]));
Chris@42 780 }
Chris@42 781 }
Chris@42 782 }
Chris@42 783 }
Chris@42 784 VLEAVE();
Chris@42 785 }
Chris@42 786
Chris@42 787 static const kdft_desc desc = { 25, XSIMD_STRING("n1fv_25"), {146, 62, 78, 0}, &GENUS, 0, 0, 0, 0 };
Chris@42 788
Chris@42 789 void XSIMD(codelet_n1fv_25) (planner *p) {
Chris@42 790 X(kdft_register) (p, n1fv_25, &desc);
Chris@42 791 }
Chris@42 792
Chris@42 793 #endif /* HAVE_FMA */