annotate src/fftw-3.3.8/dft/simd/common/n1bv_15.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:58 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw_c.native -fma -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 15 -name n1bv_15 -include dft/simd/n1b.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 78 FP additions, 49 FP multiplications,
Chris@82 32 * (or, 36 additions, 7 multiplications, 42 fused multiply/add),
Chris@82 33 * 53 stack variables, 8 constants, and 30 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/simd/n1b.h"
Chris@82 36
Chris@82 37 static void n1bv_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DVK(KP910592997, +0.910592997310029334643087372129977886038870291);
Chris@82 40 DVK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@82 41 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 42 DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
Chris@82 43 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 44 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 45 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 46 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 47 {
Chris@82 48 INT i;
Chris@82 49 const R *xi;
Chris@82 50 R *xo;
Chris@82 51 xi = ii;
Chris@82 52 xo = io;
Chris@82 53 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(30, is), MAKE_VOLATILE_STRIDE(30, os)) {
Chris@82 54 V T5, T11, TH, Ty, TE, TF, TB, Tg, Tr, Ts, T12, T13, T14, T15, T16;
Chris@82 55 V T17, TK, TM, TZ, T10;
Chris@82 56 {
Chris@82 57 V T1, T2, T3, T4;
Chris@82 58 T1 = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 59 T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 60 T3 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 61 T4 = VADD(T2, T3);
Chris@82 62 T5 = VFNMS(LDK(KP500000000), T4, T1);
Chris@82 63 T11 = VADD(T1, T4);
Chris@82 64 TH = VSUB(T2, T3);
Chris@82 65 }
Chris@82 66 {
Chris@82 67 V T6, T9, Ta, Tw, Tm, Tp, Tq, TA, Tb, Te, Tf, Tx, Th, Tk, Tl;
Chris@82 68 V Tz, TI, TJ;
Chris@82 69 {
Chris@82 70 V T7, T8, Tn, To;
Chris@82 71 T6 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 72 T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 73 T8 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 74 T9 = VADD(T7, T8);
Chris@82 75 Ta = VFNMS(LDK(KP500000000), T9, T6);
Chris@82 76 Tw = VSUB(T7, T8);
Chris@82 77 Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 78 Tn = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@82 79 To = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 80 Tp = VADD(Tn, To);
Chris@82 81 Tq = VFNMS(LDK(KP500000000), Tp, Tm);
Chris@82 82 TA = VSUB(Tn, To);
Chris@82 83 }
Chris@82 84 {
Chris@82 85 V Tc, Td, Ti, Tj;
Chris@82 86 Tb = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 87 Tc = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 88 Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 89 Te = VADD(Tc, Td);
Chris@82 90 Tf = VFNMS(LDK(KP500000000), Te, Tb);
Chris@82 91 Tx = VSUB(Tc, Td);
Chris@82 92 Th = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 93 Ti = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 94 Tj = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 95 Tk = VADD(Ti, Tj);
Chris@82 96 Tl = VFNMS(LDK(KP500000000), Tk, Th);
Chris@82 97 Tz = VSUB(Ti, Tj);
Chris@82 98 }
Chris@82 99 Ty = VSUB(Tw, Tx);
Chris@82 100 TE = VSUB(Ta, Tf);
Chris@82 101 TF = VSUB(Tl, Tq);
Chris@82 102 TB = VSUB(Tz, TA);
Chris@82 103 Tg = VADD(Ta, Tf);
Chris@82 104 Tr = VADD(Tl, Tq);
Chris@82 105 Ts = VADD(Tg, Tr);
Chris@82 106 T12 = VADD(T6, T9);
Chris@82 107 T13 = VADD(Tb, Te);
Chris@82 108 T14 = VADD(T12, T13);
Chris@82 109 T15 = VADD(Th, Tk);
Chris@82 110 T16 = VADD(Tm, Tp);
Chris@82 111 T17 = VADD(T15, T16);
Chris@82 112 TI = VADD(Tw, Tx);
Chris@82 113 TJ = VADD(Tz, TA);
Chris@82 114 TK = VADD(TI, TJ);
Chris@82 115 TM = VSUB(TI, TJ);
Chris@82 116 }
Chris@82 117 TZ = VADD(T5, Ts);
Chris@82 118 T10 = VMUL(LDK(KP866025403), VADD(TH, TK));
Chris@82 119 ST(&(xo[WS(os, 5)]), VFNMSI(T10, TZ), ovs, &(xo[WS(os, 1)]));
Chris@82 120 ST(&(xo[WS(os, 10)]), VFMAI(T10, TZ), ovs, &(xo[0]));
Chris@82 121 {
Chris@82 122 V T1a, T18, T19, T1e, T1g, T1c, T1d, T1f, T1b;
Chris@82 123 T1a = VSUB(T14, T17);
Chris@82 124 T18 = VADD(T14, T17);
Chris@82 125 T19 = VFNMS(LDK(KP250000000), T18, T11);
Chris@82 126 T1c = VSUB(T15, T16);
Chris@82 127 T1d = VSUB(T12, T13);
Chris@82 128 T1e = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T1d, T1c));
Chris@82 129 T1g = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T1c, T1d));
Chris@82 130 ST(&(xo[0]), VADD(T11, T18), ovs, &(xo[0]));
Chris@82 131 T1f = VFMA(LDK(KP559016994), T1a, T19);
Chris@82 132 ST(&(xo[WS(os, 6)]), VFMAI(T1g, T1f), ovs, &(xo[0]));
Chris@82 133 ST(&(xo[WS(os, 9)]), VFNMSI(T1g, T1f), ovs, &(xo[WS(os, 1)]));
Chris@82 134 T1b = VFNMS(LDK(KP559016994), T1a, T19);
Chris@82 135 ST(&(xo[WS(os, 3)]), VFMAI(T1e, T1b), ovs, &(xo[WS(os, 1)]));
Chris@82 136 ST(&(xo[WS(os, 12)]), VFNMSI(T1e, T1b), ovs, &(xo[0]));
Chris@82 137 }
Chris@82 138 {
Chris@82 139 V TC, TG, TU, TS, TN, TV, Tv, TR, TL, Tt, Tu;
Chris@82 140 TC = VFMA(LDK(KP618033988), TB, Ty);
Chris@82 141 TG = VFMA(LDK(KP618033988), TF, TE);
Chris@82 142 TU = VFNMS(LDK(KP618033988), TE, TF);
Chris@82 143 TS = VFNMS(LDK(KP618033988), Ty, TB);
Chris@82 144 TL = VFNMS(LDK(KP250000000), TK, TH);
Chris@82 145 TN = VFMA(LDK(KP559016994), TM, TL);
Chris@82 146 TV = VFNMS(LDK(KP559016994), TM, TL);
Chris@82 147 Tt = VFNMS(LDK(KP250000000), Ts, T5);
Chris@82 148 Tu = VSUB(Tg, Tr);
Chris@82 149 Tv = VFMA(LDK(KP559016994), Tu, Tt);
Chris@82 150 TR = VFNMS(LDK(KP559016994), Tu, Tt);
Chris@82 151 {
Chris@82 152 V TD, TO, TX, TY;
Chris@82 153 TD = VFNMS(LDK(KP823639103), TC, Tv);
Chris@82 154 TO = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), TN, TG));
Chris@82 155 ST(&(xo[WS(os, 1)]), VFMAI(TO, TD), ovs, &(xo[WS(os, 1)]));
Chris@82 156 ST(&(xo[WS(os, 14)]), VFNMSI(TO, TD), ovs, &(xo[0]));
Chris@82 157 TX = VFMA(LDK(KP823639103), TS, TR);
Chris@82 158 TY = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), TV, TU));
Chris@82 159 ST(&(xo[WS(os, 7)]), VFNMSI(TY, TX), ovs, &(xo[WS(os, 1)]));
Chris@82 160 ST(&(xo[WS(os, 8)]), VFMAI(TY, TX), ovs, &(xo[0]));
Chris@82 161 }
Chris@82 162 {
Chris@82 163 V TP, TQ, TT, TW;
Chris@82 164 TP = VFMA(LDK(KP823639103), TC, Tv);
Chris@82 165 TQ = VMUL(LDK(KP951056516), VFNMS(LDK(KP910592997), TN, TG));
Chris@82 166 ST(&(xo[WS(os, 4)]), VFNMSI(TQ, TP), ovs, &(xo[0]));
Chris@82 167 ST(&(xo[WS(os, 11)]), VFMAI(TQ, TP), ovs, &(xo[WS(os, 1)]));
Chris@82 168 TT = VFNMS(LDK(KP823639103), TS, TR);
Chris@82 169 TW = VMUL(LDK(KP951056516), VFMA(LDK(KP910592997), TV, TU));
Chris@82 170 ST(&(xo[WS(os, 2)]), VFNMSI(TW, TT), ovs, &(xo[0]));
Chris@82 171 ST(&(xo[WS(os, 13)]), VFMAI(TW, TT), ovs, &(xo[WS(os, 1)]));
Chris@82 172 }
Chris@82 173 }
Chris@82 174 }
Chris@82 175 }
Chris@82 176 VLEAVE();
Chris@82 177 }
Chris@82 178
Chris@82 179 static const kdft_desc desc = { 15, XSIMD_STRING("n1bv_15"), {36, 7, 42, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 180
Chris@82 181 void XSIMD(codelet_n1bv_15) (planner *p) {
Chris@82 182 X(kdft_register) (p, n1bv_15, &desc);
Chris@82 183 }
Chris@82 184
Chris@82 185 #else
Chris@82 186
Chris@82 187 /* Generated by: ../../../genfft/gen_notw_c.native -simd -compact -variables 4 -pipeline-latency 8 -sign 1 -n 15 -name n1bv_15 -include dft/simd/n1b.h */
Chris@82 188
Chris@82 189 /*
Chris@82 190 * This function contains 78 FP additions, 25 FP multiplications,
Chris@82 191 * (or, 64 additions, 11 multiplications, 14 fused multiply/add),
Chris@82 192 * 55 stack variables, 10 constants, and 30 memory accesses
Chris@82 193 */
Chris@82 194 #include "dft/simd/n1b.h"
Chris@82 195
Chris@82 196 static void n1bv_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 197 {
Chris@82 198 DVK(KP216506350, +0.216506350946109661690930792688234045867850657);
Chris@82 199 DVK(KP509036960, +0.509036960455127183450980863393907648510733164);
Chris@82 200 DVK(KP823639103, +0.823639103546331925877420039278190003029660514);
Chris@82 201 DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
Chris@82 202 DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
Chris@82 203 DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
Chris@82 204 DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
Chris@82 205 DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 206 DVK(KP484122918, +0.484122918275927110647408174972799951354115213);
Chris@82 207 DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 208 {
Chris@82 209 INT i;
Chris@82 210 const R *xi;
Chris@82 211 R *xo;
Chris@82 212 xi = ii;
Chris@82 213 xo = io;
Chris@82 214 for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(30, is), MAKE_VOLATILE_STRIDE(30, os)) {
Chris@82 215 V Ti, T11, TH, Ts, TL, TM, Tz, TC, TD, TI, T12, T13, T14, T15, T16;
Chris@82 216 V T17, Tf, Tj, TZ, T10;
Chris@82 217 {
Chris@82 218 V TF, Tg, Th, TG;
Chris@82 219 TF = LD(&(xi[0]), ivs, &(xi[0]));
Chris@82 220 Tg = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Chris@82 221 Th = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Chris@82 222 TG = VADD(Tg, Th);
Chris@82 223 Ti = VSUB(Tg, Th);
Chris@82 224 T11 = VADD(TF, TG);
Chris@82 225 TH = VFNMS(LDK(KP500000000), TG, TF);
Chris@82 226 }
Chris@82 227 {
Chris@82 228 V Tm, Tn, T3, To, Tw, Tx, Td, Ty, Tp, Tq, T6, Tr, Tt, Tu, Ta;
Chris@82 229 V Tv, T7, Te;
Chris@82 230 {
Chris@82 231 V T1, T2, Tb, Tc;
Chris@82 232 Tm = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Chris@82 233 T1 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Chris@82 234 T2 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Chris@82 235 Tn = VADD(T1, T2);
Chris@82 236 T3 = VSUB(T1, T2);
Chris@82 237 To = VFNMS(LDK(KP500000000), Tn, Tm);
Chris@82 238 Tw = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Chris@82 239 Tb = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Chris@82 240 Tc = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Chris@82 241 Tx = VADD(Tb, Tc);
Chris@82 242 Td = VSUB(Tb, Tc);
Chris@82 243 Ty = VFNMS(LDK(KP500000000), Tx, Tw);
Chris@82 244 }
Chris@82 245 {
Chris@82 246 V T4, T5, T8, T9;
Chris@82 247 Tp = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Chris@82 248 T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Chris@82 249 T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Chris@82 250 Tq = VADD(T4, T5);
Chris@82 251 T6 = VSUB(T4, T5);
Chris@82 252 Tr = VFNMS(LDK(KP500000000), Tq, Tp);
Chris@82 253 Tt = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Chris@82 254 T8 = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Chris@82 255 T9 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Chris@82 256 Tu = VADD(T8, T9);
Chris@82 257 Ta = VSUB(T8, T9);
Chris@82 258 Tv = VFNMS(LDK(KP500000000), Tu, Tt);
Chris@82 259 }
Chris@82 260 Ts = VSUB(To, Tr);
Chris@82 261 TL = VSUB(T3, T6);
Chris@82 262 TM = VSUB(Ta, Td);
Chris@82 263 Tz = VSUB(Tv, Ty);
Chris@82 264 TC = VADD(To, Tr);
Chris@82 265 TD = VADD(Tv, Ty);
Chris@82 266 TI = VADD(TC, TD);
Chris@82 267 T12 = VADD(Tm, Tn);
Chris@82 268 T13 = VADD(Tp, Tq);
Chris@82 269 T14 = VADD(T12, T13);
Chris@82 270 T15 = VADD(Tt, Tu);
Chris@82 271 T16 = VADD(Tw, Tx);
Chris@82 272 T17 = VADD(T15, T16);
Chris@82 273 T7 = VADD(T3, T6);
Chris@82 274 Te = VADD(Ta, Td);
Chris@82 275 Tf = VMUL(LDK(KP484122918), VSUB(T7, Te));
Chris@82 276 Tj = VADD(T7, Te);
Chris@82 277 }
Chris@82 278 TZ = VADD(TH, TI);
Chris@82 279 T10 = VBYI(VMUL(LDK(KP866025403), VADD(Ti, Tj)));
Chris@82 280 ST(&(xo[WS(os, 5)]), VSUB(TZ, T10), ovs, &(xo[WS(os, 1)]));
Chris@82 281 ST(&(xo[WS(os, 10)]), VADD(T10, TZ), ovs, &(xo[0]));
Chris@82 282 {
Chris@82 283 V T1a, T18, T19, T1e, T1f, T1c, T1d, T1g, T1b;
Chris@82 284 T1a = VMUL(LDK(KP559016994), VSUB(T14, T17));
Chris@82 285 T18 = VADD(T14, T17);
Chris@82 286 T19 = VFNMS(LDK(KP250000000), T18, T11);
Chris@82 287 T1c = VSUB(T12, T13);
Chris@82 288 T1d = VSUB(T15, T16);
Chris@82 289 T1e = VBYI(VFNMS(LDK(KP951056516), T1d, VMUL(LDK(KP587785252), T1c)));
Chris@82 290 T1f = VBYI(VFMA(LDK(KP951056516), T1c, VMUL(LDK(KP587785252), T1d)));
Chris@82 291 ST(&(xo[0]), VADD(T11, T18), ovs, &(xo[0]));
Chris@82 292 T1g = VADD(T1a, T19);
Chris@82 293 ST(&(xo[WS(os, 6)]), VADD(T1f, T1g), ovs, &(xo[0]));
Chris@82 294 ST(&(xo[WS(os, 9)]), VSUB(T1g, T1f), ovs, &(xo[WS(os, 1)]));
Chris@82 295 T1b = VSUB(T19, T1a);
Chris@82 296 ST(&(xo[WS(os, 3)]), VSUB(T1b, T1e), ovs, &(xo[WS(os, 1)]));
Chris@82 297 ST(&(xo[WS(os, 12)]), VADD(T1e, T1b), ovs, &(xo[0]));
Chris@82 298 }
Chris@82 299 {
Chris@82 300 V TA, TN, TU, TS, Tl, TR, TK, TV, Tk, TE, TJ;
Chris@82 301 TA = VFMA(LDK(KP951056516), Ts, VMUL(LDK(KP587785252), Tz));
Chris@82 302 TN = VFMA(LDK(KP823639103), TL, VMUL(LDK(KP509036960), TM));
Chris@82 303 TU = VFNMS(LDK(KP823639103), TM, VMUL(LDK(KP509036960), TL));
Chris@82 304 TS = VFNMS(LDK(KP951056516), Tz, VMUL(LDK(KP587785252), Ts));
Chris@82 305 Tk = VFNMS(LDK(KP216506350), Tj, VMUL(LDK(KP866025403), Ti));
Chris@82 306 Tl = VADD(Tf, Tk);
Chris@82 307 TR = VSUB(Tf, Tk);
Chris@82 308 TE = VMUL(LDK(KP559016994), VSUB(TC, TD));
Chris@82 309 TJ = VFNMS(LDK(KP250000000), TI, TH);
Chris@82 310 TK = VADD(TE, TJ);
Chris@82 311 TV = VSUB(TJ, TE);
Chris@82 312 {
Chris@82 313 V TB, TO, TX, TY;
Chris@82 314 TB = VBYI(VADD(Tl, TA));
Chris@82 315 TO = VSUB(TK, TN);
Chris@82 316 ST(&(xo[WS(os, 1)]), VADD(TB, TO), ovs, &(xo[WS(os, 1)]));
Chris@82 317 ST(&(xo[WS(os, 14)]), VSUB(TO, TB), ovs, &(xo[0]));
Chris@82 318 TX = VBYI(VSUB(TS, TR));
Chris@82 319 TY = VSUB(TV, TU);
Chris@82 320 ST(&(xo[WS(os, 7)]), VADD(TX, TY), ovs, &(xo[WS(os, 1)]));
Chris@82 321 ST(&(xo[WS(os, 8)]), VSUB(TY, TX), ovs, &(xo[0]));
Chris@82 322 }
Chris@82 323 {
Chris@82 324 V TP, TQ, TT, TW;
Chris@82 325 TP = VBYI(VSUB(Tl, TA));
Chris@82 326 TQ = VADD(TN, TK);
Chris@82 327 ST(&(xo[WS(os, 4)]), VADD(TP, TQ), ovs, &(xo[0]));
Chris@82 328 ST(&(xo[WS(os, 11)]), VSUB(TQ, TP), ovs, &(xo[WS(os, 1)]));
Chris@82 329 TT = VBYI(VADD(TR, TS));
Chris@82 330 TW = VADD(TU, TV);
Chris@82 331 ST(&(xo[WS(os, 2)]), VADD(TT, TW), ovs, &(xo[0]));
Chris@82 332 ST(&(xo[WS(os, 13)]), VSUB(TW, TT), ovs, &(xo[WS(os, 1)]));
Chris@82 333 }
Chris@82 334 }
Chris@82 335 }
Chris@82 336 }
Chris@82 337 VLEAVE();
Chris@82 338 }
Chris@82 339
Chris@82 340 static const kdft_desc desc = { 15, XSIMD_STRING("n1bv_15"), {64, 11, 14, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 341
Chris@82 342 void XSIMD(codelet_n1bv_15) (planner *p) {
Chris@82 343 X(kdft_register) (p, n1bv_15, &desc);
Chris@82 344 }
Chris@82 345
Chris@82 346 #endif