annotate src/fftw-3.3.8/dft/scalar/codelets/n1_13.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:10 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 13 -name n1_13 -include dft/scalar/n.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 176 FP additions, 114 FP multiplications,
Chris@82 32 * (or, 62 additions, 0 multiplications, 114 fused multiply/add),
Chris@82 33 * 76 stack variables, 25 constants, and 52 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/n.h"
Chris@82 36
Chris@82 37 static void n1_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP875502302, +0.875502302409147941146295545768755143177842006);
Chris@82 40 DK(KP520028571, +0.520028571888864619117130500499232802493238139);
Chris@82 41 DK(KP968287244, +0.968287244361984016049539446938120421179794516);
Chris@82 42 DK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@82 43 DK(KP600477271, +0.600477271932665282925769253334763009352012849);
Chris@82 44 DK(KP957805992, +0.957805992594665126462521754605754580515587217);
Chris@82 45 DK(KP516520780, +0.516520780623489722840901288569017135705033622);
Chris@82 46 DK(KP581704778, +0.581704778510515730456870384989698884939833902);
Chris@82 47 DK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@82 48 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@82 49 DK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@82 50 DK(KP301479260, +0.301479260047709873958013540496673347309208464);
Chris@82 51 DK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@82 52 DK(KP859542535, +0.859542535098774820163672132761689612766401925);
Chris@82 53 DK(KP514918778, +0.514918778086315755491789696138117261566051239);
Chris@82 54 DK(KP522026385, +0.522026385161275033714027226654165028300441940);
Chris@82 55 DK(KP853480001, +0.853480001859823990758994934970528322872359049);
Chris@82 56 DK(KP612264650, +0.612264650376756543746494474777125408779395514);
Chris@82 57 DK(KP038632954, +0.038632954644348171955506895830342264440241080);
Chris@82 58 DK(KP302775637, +0.302775637731994646559610633735247973125648287);
Chris@82 59 DK(KP769338817, +0.769338817572980603471413688209101117038278899);
Chris@82 60 DK(KP686558370, +0.686558370781754340655719594850823015421401653);
Chris@82 61 DK(KP226109445, +0.226109445035782405468510155372505010481906348);
Chris@82 62 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 63 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 64 {
Chris@82 65 INT i;
Chris@82 66 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(52, is), MAKE_VOLATILE_STRIDE(52, os)) {
Chris@82 67 E T1, T1P, T2n, T2o, To, TH, T2h, T2k, TB, TE, Tw, TF, T2c, T2j, T1j;
Chris@82 68 E T1m, T12, T1f, T21, T24, T1U, T27, T1d, T1g, T1Y, T25;
Chris@82 69 T1 = ri[0];
Chris@82 70 T1P = ii[0];
Chris@82 71 {
Chris@82 72 E Tf, T2d, Tb, Ty, Tq, T6, Tx, Tr, Ti, Tt, Tl, Tu, Tm, T2e, Td;
Chris@82 73 E Te, Tc, Tn;
Chris@82 74 Td = ri[WS(is, 8)];
Chris@82 75 Te = ri[WS(is, 5)];
Chris@82 76 Tf = Td + Te;
Chris@82 77 T2d = Td - Te;
Chris@82 78 {
Chris@82 79 E T7, T8, T9, Ta;
Chris@82 80 T7 = ri[WS(is, 12)];
Chris@82 81 T8 = ri[WS(is, 10)];
Chris@82 82 T9 = ri[WS(is, 4)];
Chris@82 83 Ta = T8 + T9;
Chris@82 84 Tb = T7 + Ta;
Chris@82 85 Ty = FMS(KP500000000, Ta, T7);
Chris@82 86 Tq = T8 - T9;
Chris@82 87 }
Chris@82 88 {
Chris@82 89 E T2, T3, T4, T5;
Chris@82 90 T2 = ri[WS(is, 1)];
Chris@82 91 T3 = ri[WS(is, 3)];
Chris@82 92 T4 = ri[WS(is, 9)];
Chris@82 93 T5 = T3 + T4;
Chris@82 94 T6 = T2 + T5;
Chris@82 95 Tx = FNMS(KP500000000, T5, T2);
Chris@82 96 Tr = T4 - T3;
Chris@82 97 }
Chris@82 98 {
Chris@82 99 E Tg, Th, Tj, Tk;
Chris@82 100 Tg = ri[WS(is, 11)];
Chris@82 101 Th = ri[WS(is, 6)];
Chris@82 102 Ti = Tg + Th;
Chris@82 103 Tt = Tg - Th;
Chris@82 104 Tj = ri[WS(is, 7)];
Chris@82 105 Tk = ri[WS(is, 2)];
Chris@82 106 Tl = Tj + Tk;
Chris@82 107 Tu = Tj - Tk;
Chris@82 108 }
Chris@82 109 Tm = Ti + Tl;
Chris@82 110 T2e = Tt + Tu;
Chris@82 111 T2n = T6 - Tb;
Chris@82 112 T2o = T2d + T2e;
Chris@82 113 Tc = T6 + Tb;
Chris@82 114 Tn = Tf + Tm;
Chris@82 115 To = Tc + Tn;
Chris@82 116 TH = Tc - Tn;
Chris@82 117 {
Chris@82 118 E T2f, T2g, Tz, TA;
Chris@82 119 T2f = FNMS(KP500000000, T2e, T2d);
Chris@82 120 T2g = Tr + Tq;
Chris@82 121 T2h = FMA(KP866025403, T2g, T2f);
Chris@82 122 T2k = FNMS(KP866025403, T2g, T2f);
Chris@82 123 Tz = Tx - Ty;
Chris@82 124 TA = FNMS(KP500000000, Tm, Tf);
Chris@82 125 TB = Tz + TA;
Chris@82 126 TE = Tz - TA;
Chris@82 127 }
Chris@82 128 {
Chris@82 129 E Ts, Tv, T2a, T2b;
Chris@82 130 Ts = Tq - Tr;
Chris@82 131 Tv = Tt - Tu;
Chris@82 132 Tw = Ts + Tv;
Chris@82 133 TF = Ts - Tv;
Chris@82 134 T2a = Tx + Ty;
Chris@82 135 T2b = Ti - Tl;
Chris@82 136 T2c = FMA(KP866025403, T2b, T2a);
Chris@82 137 T2j = FNMS(KP866025403, T2b, T2a);
Chris@82 138 }
Chris@82 139 }
Chris@82 140 {
Chris@82 141 E TM, T1R, T10, T1l, T18, TX, T1k, T15, TP, T1a, TS, T1b, TT, T1S, TK;
Chris@82 142 E TL, TU, T11;
Chris@82 143 TK = ii[WS(is, 8)];
Chris@82 144 TL = ii[WS(is, 5)];
Chris@82 145 TM = TK - TL;
Chris@82 146 T1R = TK + TL;
Chris@82 147 {
Chris@82 148 E T16, TY, TZ, T17;
Chris@82 149 T16 = ii[WS(is, 12)];
Chris@82 150 TY = ii[WS(is, 10)];
Chris@82 151 TZ = ii[WS(is, 4)];
Chris@82 152 T17 = TY + TZ;
Chris@82 153 T10 = TY - TZ;
Chris@82 154 T1l = T16 + T17;
Chris@82 155 T18 = FMS(KP500000000, T17, T16);
Chris@82 156 }
Chris@82 157 {
Chris@82 158 E T13, TV, TW, T14;
Chris@82 159 T13 = ii[WS(is, 1)];
Chris@82 160 TV = ii[WS(is, 9)];
Chris@82 161 TW = ii[WS(is, 3)];
Chris@82 162 T14 = TW + TV;
Chris@82 163 TX = TV - TW;
Chris@82 164 T1k = T13 + T14;
Chris@82 165 T15 = FNMS(KP500000000, T14, T13);
Chris@82 166 }
Chris@82 167 {
Chris@82 168 E TN, TO, TQ, TR;
Chris@82 169 TN = ii[WS(is, 11)];
Chris@82 170 TO = ii[WS(is, 6)];
Chris@82 171 TP = TN - TO;
Chris@82 172 T1a = TN + TO;
Chris@82 173 TQ = ii[WS(is, 7)];
Chris@82 174 TR = ii[WS(is, 2)];
Chris@82 175 TS = TQ - TR;
Chris@82 176 T1b = TQ + TR;
Chris@82 177 }
Chris@82 178 TT = TP + TS;
Chris@82 179 T1S = T1a + T1b;
Chris@82 180 T1j = TM + TT;
Chris@82 181 T1m = T1k - T1l;
Chris@82 182 TU = FNMS(KP500000000, TT, TM);
Chris@82 183 T11 = TX + T10;
Chris@82 184 T12 = FMA(KP866025403, T11, TU);
Chris@82 185 T1f = FNMS(KP866025403, T11, TU);
Chris@82 186 {
Chris@82 187 E T1Z, T20, T1Q, T1T;
Chris@82 188 T1Z = T15 - T18;
Chris@82 189 T20 = FNMS(KP500000000, T1S, T1R);
Chris@82 190 T21 = T1Z + T20;
Chris@82 191 T24 = T1Z - T20;
Chris@82 192 T1Q = T1k + T1l;
Chris@82 193 T1T = T1R + T1S;
Chris@82 194 T1U = T1Q + T1T;
Chris@82 195 T27 = T1Q - T1T;
Chris@82 196 }
Chris@82 197 {
Chris@82 198 E T19, T1c, T1W, T1X;
Chris@82 199 T19 = T15 + T18;
Chris@82 200 T1c = T1a - T1b;
Chris@82 201 T1d = FMA(KP866025403, T1c, T19);
Chris@82 202 T1g = FNMS(KP866025403, T1c, T19);
Chris@82 203 T1W = T10 - TX;
Chris@82 204 T1X = TP - TS;
Chris@82 205 T1Y = T1W + T1X;
Chris@82 206 T25 = T1W - T1X;
Chris@82 207 }
Chris@82 208 }
Chris@82 209 ro[0] = T1 + To;
Chris@82 210 io[0] = T1P + T1U;
Chris@82 211 {
Chris@82 212 E T1z, T1J, T1G, T1H, T1w, T1I, T1n, T1i, T1s, T1E, TD, T1D, TI, T1r, T1e;
Chris@82 213 E T1h;
Chris@82 214 {
Chris@82 215 E T1x, T1y, T1u, T1v;
Chris@82 216 T1x = FNMS(KP226109445, Tw, TB);
Chris@82 217 T1y = FMA(KP686558370, TE, TF);
Chris@82 218 T1z = FNMS(KP769338817, T1y, T1x);
Chris@82 219 T1J = FMA(KP769338817, T1y, T1x);
Chris@82 220 T1G = FMA(KP302775637, T1j, T1m);
Chris@82 221 T1u = FNMS(KP038632954, T12, T1d);
Chris@82 222 T1v = FNMS(KP612264650, T1f, T1g);
Chris@82 223 T1H = FNMS(KP853480001, T1v, T1u);
Chris@82 224 T1w = FMA(KP853480001, T1v, T1u);
Chris@82 225 T1I = FNMS(KP522026385, T1H, T1G);
Chris@82 226 }
Chris@82 227 T1n = FNMS(KP302775637, T1m, T1j);
Chris@82 228 T1e = FMA(KP038632954, T1d, T12);
Chris@82 229 T1h = FMA(KP612264650, T1g, T1f);
Chris@82 230 T1i = FNMS(KP853480001, T1h, T1e);
Chris@82 231 T1s = FNMS(KP522026385, T1i, T1n);
Chris@82 232 T1E = FMA(KP853480001, T1h, T1e);
Chris@82 233 {
Chris@82 234 E TG, T1q, Tp, TC, T1p;
Chris@82 235 TG = FNMS(KP514918778, TF, TE);
Chris@82 236 T1q = FNMS(KP859542535, TG, TH);
Chris@82 237 Tp = FNMS(KP083333333, To, T1);
Chris@82 238 TC = FMA(KP301479260, TB, Tw);
Chris@82 239 T1p = FNMS(KP251768516, TC, Tp);
Chris@82 240 TD = FMA(KP503537032, TC, Tp);
Chris@82 241 T1D = FNMS(KP300462606, T1q, T1p);
Chris@82 242 TI = FMA(KP581704778, TH, TG);
Chris@82 243 T1r = FMA(KP300462606, T1q, T1p);
Chris@82 244 }
Chris@82 245 {
Chris@82 246 E TJ, T1o, T1L, T1M;
Chris@82 247 TJ = FMA(KP516520780, TI, TD);
Chris@82 248 T1o = FMA(KP957805992, T1n, T1i);
Chris@82 249 ro[WS(os, 1)] = FNMS(KP600477271, T1o, TJ);
Chris@82 250 ro[WS(os, 12)] = FMA(KP600477271, T1o, TJ);
Chris@82 251 {
Chris@82 252 E T1t, T1A, T1N, T1O;
Chris@82 253 T1t = FNMS(KP575140729, T1s, T1r);
Chris@82 254 T1A = FMA(KP968287244, T1z, T1w);
Chris@82 255 ro[WS(os, 9)] = FNMS(KP520028571, T1A, T1t);
Chris@82 256 ro[WS(os, 3)] = FMA(KP520028571, T1A, T1t);
Chris@82 257 T1N = FNMS(KP516520780, TI, TD);
Chris@82 258 T1O = FMA(KP957805992, T1G, T1H);
Chris@82 259 ro[WS(os, 8)] = FNMS(KP600477271, T1O, T1N);
Chris@82 260 ro[WS(os, 5)] = FMA(KP600477271, T1O, T1N);
Chris@82 261 }
Chris@82 262 T1L = FNMS(KP520028571, T1E, T1D);
Chris@82 263 T1M = FNMS(KP875502302, T1J, T1I);
Chris@82 264 ro[WS(os, 11)] = FNMS(KP575140729, T1M, T1L);
Chris@82 265 ro[WS(os, 6)] = FMA(KP575140729, T1M, T1L);
Chris@82 266 {
Chris@82 267 E T1F, T1K, T1B, T1C;
Chris@82 268 T1F = FMA(KP520028571, T1E, T1D);
Chris@82 269 T1K = FMA(KP875502302, T1J, T1I);
Chris@82 270 ro[WS(os, 7)] = FNMS(KP575140729, T1K, T1F);
Chris@82 271 ro[WS(os, 2)] = FMA(KP575140729, T1K, T1F);
Chris@82 272 T1B = FMA(KP575140729, T1s, T1r);
Chris@82 273 T1C = FNMS(KP968287244, T1z, T1w);
Chris@82 274 ro[WS(os, 10)] = FNMS(KP520028571, T1C, T1B);
Chris@82 275 ro[WS(os, 4)] = FMA(KP520028571, T1C, T1B);
Chris@82 276 }
Chris@82 277 }
Chris@82 278 }
Chris@82 279 {
Chris@82 280 E T2F, T2N, T2v, T2u, T2A, T2K, T2p, T2m, T2C, T2M, T23, T2J, T28, T2z, T2i;
Chris@82 281 E T2l;
Chris@82 282 {
Chris@82 283 E T2D, T2E, T2s, T2t;
Chris@82 284 T2D = FNMS(KP226109445, T1Y, T21);
Chris@82 285 T2E = FMA(KP686558370, T24, T25);
Chris@82 286 T2F = FNMS(KP769338817, T2E, T2D);
Chris@82 287 T2N = FMA(KP769338817, T2E, T2D);
Chris@82 288 T2v = FNMS(KP302775637, T2n, T2o);
Chris@82 289 T2s = FMA(KP038632954, T2c, T2h);
Chris@82 290 T2t = FMA(KP612264650, T2j, T2k);
Chris@82 291 T2u = FNMS(KP853480001, T2t, T2s);
Chris@82 292 T2A = FNMS(KP522026385, T2u, T2v);
Chris@82 293 T2K = FMA(KP853480001, T2t, T2s);
Chris@82 294 }
Chris@82 295 T2p = FMA(KP302775637, T2o, T2n);
Chris@82 296 T2i = FNMS(KP038632954, T2h, T2c);
Chris@82 297 T2l = FNMS(KP612264650, T2k, T2j);
Chris@82 298 T2m = FNMS(KP853480001, T2l, T2i);
Chris@82 299 T2C = FMA(KP853480001, T2l, T2i);
Chris@82 300 T2M = FNMS(KP522026385, T2m, T2p);
Chris@82 301 {
Chris@82 302 E T26, T2y, T1V, T22, T2x;
Chris@82 303 T26 = FNMS(KP514918778, T25, T24);
Chris@82 304 T2y = FNMS(KP859542535, T26, T27);
Chris@82 305 T1V = FNMS(KP083333333, T1U, T1P);
Chris@82 306 T22 = FMA(KP301479260, T21, T1Y);
Chris@82 307 T2x = FNMS(KP251768516, T22, T1V);
Chris@82 308 T23 = FMA(KP503537032, T22, T1V);
Chris@82 309 T2J = FNMS(KP300462606, T2y, T2x);
Chris@82 310 T28 = FMA(KP581704778, T27, T26);
Chris@82 311 T2z = FMA(KP300462606, T2y, T2x);
Chris@82 312 }
Chris@82 313 {
Chris@82 314 E T29, T2q, T2L, T2O;
Chris@82 315 T29 = FNMS(KP516520780, T28, T23);
Chris@82 316 T2q = FMA(KP957805992, T2p, T2m);
Chris@82 317 io[WS(os, 5)] = FNMS(KP600477271, T2q, T29);
Chris@82 318 io[WS(os, 8)] = FMA(KP600477271, T2q, T29);
Chris@82 319 {
Chris@82 320 E T2r, T2w, T2P, T2Q;
Chris@82 321 T2r = FMA(KP516520780, T28, T23);
Chris@82 322 T2w = FMA(KP957805992, T2v, T2u);
Chris@82 323 io[WS(os, 1)] = FMA(KP600477271, T2w, T2r);
Chris@82 324 io[WS(os, 12)] = FNMS(KP600477271, T2w, T2r);
Chris@82 325 T2P = FMA(KP520028571, T2K, T2J);
Chris@82 326 T2Q = FMA(KP875502302, T2N, T2M);
Chris@82 327 io[WS(os, 6)] = FNMS(KP575140729, T2Q, T2P);
Chris@82 328 io[WS(os, 11)] = FMA(KP575140729, T2Q, T2P);
Chris@82 329 }
Chris@82 330 T2L = FNMS(KP520028571, T2K, T2J);
Chris@82 331 T2O = FNMS(KP875502302, T2N, T2M);
Chris@82 332 io[WS(os, 2)] = FNMS(KP575140729, T2O, T2L);
Chris@82 333 io[WS(os, 7)] = FMA(KP575140729, T2O, T2L);
Chris@82 334 {
Chris@82 335 E T2H, T2I, T2B, T2G;
Chris@82 336 T2H = FNMS(KP575140729, T2A, T2z);
Chris@82 337 T2I = FMA(KP968287244, T2F, T2C);
Chris@82 338 io[WS(os, 4)] = FNMS(KP520028571, T2I, T2H);
Chris@82 339 io[WS(os, 10)] = FMA(KP520028571, T2I, T2H);
Chris@82 340 T2B = FMA(KP575140729, T2A, T2z);
Chris@82 341 T2G = FNMS(KP968287244, T2F, T2C);
Chris@82 342 io[WS(os, 3)] = FNMS(KP520028571, T2G, T2B);
Chris@82 343 io[WS(os, 9)] = FMA(KP520028571, T2G, T2B);
Chris@82 344 }
Chris@82 345 }
Chris@82 346 }
Chris@82 347 }
Chris@82 348 }
Chris@82 349 }
Chris@82 350
Chris@82 351 static const kdft_desc desc = { 13, "n1_13", {62, 0, 114, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 352
Chris@82 353 void X(codelet_n1_13) (planner *p) {
Chris@82 354 X(kdft_register) (p, n1_13, &desc);
Chris@82 355 }
Chris@82 356
Chris@82 357 #else
Chris@82 358
Chris@82 359 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 13 -name n1_13 -include dft/scalar/n.h */
Chris@82 360
Chris@82 361 /*
Chris@82 362 * This function contains 176 FP additions, 68 FP multiplications,
Chris@82 363 * (or, 138 additions, 30 multiplications, 38 fused multiply/add),
Chris@82 364 * 71 stack variables, 20 constants, and 52 memory accesses
Chris@82 365 */
Chris@82 366 #include "dft/scalar/n.h"
Chris@82 367
Chris@82 368 static void n1_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 369 {
Chris@82 370 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@82 371 DK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@82 372 DK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@82 373 DK(KP075902986, +0.075902986037193865983102897245103540356428373);
Chris@82 374 DK(KP132983124, +0.132983124607418643793760531921092974399165133);
Chris@82 375 DK(KP258260390, +0.258260390311744861420450644284508567852516811);
Chris@82 376 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@82 377 DK(KP300238635, +0.300238635966332641462884626667381504676006424);
Chris@82 378 DK(KP011599105, +0.011599105605768290721655456654083252189827041);
Chris@82 379 DK(KP156891391, +0.156891391051584611046832726756003269660212636);
Chris@82 380 DK(KP256247671, +0.256247671582936600958684654061725059144125175);
Chris@82 381 DK(KP174138601, +0.174138601152135905005660794929264742616964676);
Chris@82 382 DK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@82 383 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@82 384 DK(KP113854479, +0.113854479055790798974654345867655310534642560);
Chris@82 385 DK(KP265966249, +0.265966249214837287587521063842185948798330267);
Chris@82 386 DK(KP387390585, +0.387390585467617292130675966426762851778775217);
Chris@82 387 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 388 DK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@82 389 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 390 {
Chris@82 391 INT i;
Chris@82 392 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(52, is), MAKE_VOLATILE_STRIDE(52, os)) {
Chris@82 393 E T1, T1q, Tt, Tu, To, T22, T20, T24, TF, TH, TA, TI, T1X, T25, T2a;
Chris@82 394 E T2d, T18, T1n, T2k, T2n, T1l, T1r, T1f, T1o, T2h, T2m;
Chris@82 395 T1 = ri[0];
Chris@82 396 T1q = ii[0];
Chris@82 397 {
Chris@82 398 E Tf, Tp, Tb, TC, Tx, T6, TB, Tw, Ti, Tq, Tl, Tr, Tm, Ts, Td;
Chris@82 399 E Te, Tc, Tn;
Chris@82 400 Td = ri[WS(is, 8)];
Chris@82 401 Te = ri[WS(is, 5)];
Chris@82 402 Tf = Td + Te;
Chris@82 403 Tp = Td - Te;
Chris@82 404 {
Chris@82 405 E T7, T8, T9, Ta;
Chris@82 406 T7 = ri[WS(is, 12)];
Chris@82 407 T8 = ri[WS(is, 10)];
Chris@82 408 T9 = ri[WS(is, 4)];
Chris@82 409 Ta = T8 + T9;
Chris@82 410 Tb = T7 + Ta;
Chris@82 411 TC = T8 - T9;
Chris@82 412 Tx = FNMS(KP500000000, Ta, T7);
Chris@82 413 }
Chris@82 414 {
Chris@82 415 E T2, T3, T4, T5;
Chris@82 416 T2 = ri[WS(is, 1)];
Chris@82 417 T3 = ri[WS(is, 3)];
Chris@82 418 T4 = ri[WS(is, 9)];
Chris@82 419 T5 = T3 + T4;
Chris@82 420 T6 = T2 + T5;
Chris@82 421 TB = T3 - T4;
Chris@82 422 Tw = FNMS(KP500000000, T5, T2);
Chris@82 423 }
Chris@82 424 {
Chris@82 425 E Tg, Th, Tj, Tk;
Chris@82 426 Tg = ri[WS(is, 11)];
Chris@82 427 Th = ri[WS(is, 6)];
Chris@82 428 Ti = Tg + Th;
Chris@82 429 Tq = Tg - Th;
Chris@82 430 Tj = ri[WS(is, 7)];
Chris@82 431 Tk = ri[WS(is, 2)];
Chris@82 432 Tl = Tj + Tk;
Chris@82 433 Tr = Tj - Tk;
Chris@82 434 }
Chris@82 435 Tm = Ti + Tl;
Chris@82 436 Ts = Tq + Tr;
Chris@82 437 Tt = Tp + Ts;
Chris@82 438 Tu = T6 - Tb;
Chris@82 439 Tc = T6 + Tb;
Chris@82 440 Tn = Tf + Tm;
Chris@82 441 To = Tc + Tn;
Chris@82 442 T22 = KP300462606 * (Tc - Tn);
Chris@82 443 {
Chris@82 444 E T1Y, T1Z, TD, TE;
Chris@82 445 T1Y = TB + TC;
Chris@82 446 T1Z = Tq - Tr;
Chris@82 447 T20 = T1Y - T1Z;
Chris@82 448 T24 = T1Y + T1Z;
Chris@82 449 TD = KP866025403 * (TB - TC);
Chris@82 450 TE = FNMS(KP500000000, Ts, Tp);
Chris@82 451 TF = TD - TE;
Chris@82 452 TH = TD + TE;
Chris@82 453 }
Chris@82 454 {
Chris@82 455 E Ty, Tz, T1V, T1W;
Chris@82 456 Ty = Tw - Tx;
Chris@82 457 Tz = KP866025403 * (Ti - Tl);
Chris@82 458 TA = Ty + Tz;
Chris@82 459 TI = Ty - Tz;
Chris@82 460 T1V = Tw + Tx;
Chris@82 461 T1W = FNMS(KP500000000, Tm, Tf);
Chris@82 462 T1X = T1V - T1W;
Chris@82 463 T25 = T1V + T1W;
Chris@82 464 }
Chris@82 465 }
Chris@82 466 {
Chris@82 467 E TZ, T2b, TV, T1i, T1a, TQ, T1h, T19, T12, T1d, T15, T1c, T16, T2c, TX;
Chris@82 468 E TY, TW, T17;
Chris@82 469 TX = ii[WS(is, 8)];
Chris@82 470 TY = ii[WS(is, 5)];
Chris@82 471 TZ = TX + TY;
Chris@82 472 T2b = TX - TY;
Chris@82 473 {
Chris@82 474 E TR, TS, TT, TU;
Chris@82 475 TR = ii[WS(is, 12)];
Chris@82 476 TS = ii[WS(is, 10)];
Chris@82 477 TT = ii[WS(is, 4)];
Chris@82 478 TU = TS + TT;
Chris@82 479 TV = FNMS(KP500000000, TU, TR);
Chris@82 480 T1i = TR + TU;
Chris@82 481 T1a = TS - TT;
Chris@82 482 }
Chris@82 483 {
Chris@82 484 E TM, TN, TO, TP;
Chris@82 485 TM = ii[WS(is, 1)];
Chris@82 486 TN = ii[WS(is, 3)];
Chris@82 487 TO = ii[WS(is, 9)];
Chris@82 488 TP = TN + TO;
Chris@82 489 TQ = FNMS(KP500000000, TP, TM);
Chris@82 490 T1h = TM + TP;
Chris@82 491 T19 = TN - TO;
Chris@82 492 }
Chris@82 493 {
Chris@82 494 E T10, T11, T13, T14;
Chris@82 495 T10 = ii[WS(is, 11)];
Chris@82 496 T11 = ii[WS(is, 6)];
Chris@82 497 T12 = T10 + T11;
Chris@82 498 T1d = T10 - T11;
Chris@82 499 T13 = ii[WS(is, 7)];
Chris@82 500 T14 = ii[WS(is, 2)];
Chris@82 501 T15 = T13 + T14;
Chris@82 502 T1c = T13 - T14;
Chris@82 503 }
Chris@82 504 T16 = T12 + T15;
Chris@82 505 T2c = T1d + T1c;
Chris@82 506 T2a = T1h - T1i;
Chris@82 507 T2d = T2b + T2c;
Chris@82 508 TW = TQ + TV;
Chris@82 509 T17 = FNMS(KP500000000, T16, TZ);
Chris@82 510 T18 = TW - T17;
Chris@82 511 T1n = TW + T17;
Chris@82 512 {
Chris@82 513 E T2i, T2j, T1j, T1k;
Chris@82 514 T2i = TQ - TV;
Chris@82 515 T2j = KP866025403 * (T15 - T12);
Chris@82 516 T2k = T2i + T2j;
Chris@82 517 T2n = T2i - T2j;
Chris@82 518 T1j = T1h + T1i;
Chris@82 519 T1k = TZ + T16;
Chris@82 520 T1l = KP300462606 * (T1j - T1k);
Chris@82 521 T1r = T1j + T1k;
Chris@82 522 }
Chris@82 523 {
Chris@82 524 E T1b, T1e, T2f, T2g;
Chris@82 525 T1b = T19 + T1a;
Chris@82 526 T1e = T1c - T1d;
Chris@82 527 T1f = T1b + T1e;
Chris@82 528 T1o = T1e - T1b;
Chris@82 529 T2f = FNMS(KP500000000, T2c, T2b);
Chris@82 530 T2g = KP866025403 * (T1a - T19);
Chris@82 531 T2h = T2f - T2g;
Chris@82 532 T2m = T2g + T2f;
Chris@82 533 }
Chris@82 534 }
Chris@82 535 ro[0] = T1 + To;
Chris@82 536 io[0] = T1q + T1r;
Chris@82 537 {
Chris@82 538 E T1D, T1N, T1y, T1x, T1E, T1O, Tv, TK, T1J, T1Q, T1m, T1R, T1t, T1I, TG;
Chris@82 539 E TJ;
Chris@82 540 {
Chris@82 541 E T1B, T1C, T1v, T1w;
Chris@82 542 T1B = FMA(KP387390585, T1f, KP265966249 * T18);
Chris@82 543 T1C = FMA(KP113854479, T1o, KP503537032 * T1n);
Chris@82 544 T1D = T1B + T1C;
Chris@82 545 T1N = T1C - T1B;
Chris@82 546 T1y = FMA(KP575140729, Tu, KP174138601 * Tt);
Chris@82 547 T1v = FNMS(KP156891391, TH, KP256247671 * TI);
Chris@82 548 T1w = FMA(KP011599105, TF, KP300238635 * TA);
Chris@82 549 T1x = T1v - T1w;
Chris@82 550 T1E = T1y + T1x;
Chris@82 551 T1O = KP1_732050807 * (T1v + T1w);
Chris@82 552 }
Chris@82 553 Tv = FNMS(KP174138601, Tu, KP575140729 * Tt);
Chris@82 554 TG = FNMS(KP300238635, TF, KP011599105 * TA);
Chris@82 555 TJ = FMA(KP256247671, TH, KP156891391 * TI);
Chris@82 556 TK = TG - TJ;
Chris@82 557 T1J = KP1_732050807 * (TJ + TG);
Chris@82 558 T1Q = Tv - TK;
Chris@82 559 {
Chris@82 560 E T1g, T1H, T1p, T1s, T1G;
Chris@82 561 T1g = FNMS(KP132983124, T1f, KP258260390 * T18);
Chris@82 562 T1H = T1l - T1g;
Chris@82 563 T1p = FNMS(KP251768516, T1o, KP075902986 * T1n);
Chris@82 564 T1s = FNMS(KP083333333, T1r, T1q);
Chris@82 565 T1G = T1s - T1p;
Chris@82 566 T1m = FMA(KP2_000000000, T1g, T1l);
Chris@82 567 T1R = T1H + T1G;
Chris@82 568 T1t = FMA(KP2_000000000, T1p, T1s);
Chris@82 569 T1I = T1G - T1H;
Chris@82 570 }
Chris@82 571 {
Chris@82 572 E TL, T1u, T1P, T1S;
Chris@82 573 TL = FMA(KP2_000000000, TK, Tv);
Chris@82 574 T1u = T1m + T1t;
Chris@82 575 io[WS(os, 1)] = TL + T1u;
Chris@82 576 io[WS(os, 12)] = T1u - TL;
Chris@82 577 {
Chris@82 578 E T1z, T1A, T1T, T1U;
Chris@82 579 T1z = FMS(KP2_000000000, T1x, T1y);
Chris@82 580 T1A = T1t - T1m;
Chris@82 581 io[WS(os, 5)] = T1z + T1A;
Chris@82 582 io[WS(os, 8)] = T1A - T1z;
Chris@82 583 T1T = T1R - T1Q;
Chris@82 584 T1U = T1O + T1N;
Chris@82 585 io[WS(os, 4)] = T1T - T1U;
Chris@82 586 io[WS(os, 10)] = T1U + T1T;
Chris@82 587 }
Chris@82 588 T1P = T1N - T1O;
Chris@82 589 T1S = T1Q + T1R;
Chris@82 590 io[WS(os, 3)] = T1P + T1S;
Chris@82 591 io[WS(os, 9)] = T1S - T1P;
Chris@82 592 {
Chris@82 593 E T1L, T1M, T1F, T1K;
Chris@82 594 T1L = T1J + T1I;
Chris@82 595 T1M = T1E + T1D;
Chris@82 596 io[WS(os, 6)] = T1L - T1M;
Chris@82 597 io[WS(os, 11)] = T1M + T1L;
Chris@82 598 T1F = T1D - T1E;
Chris@82 599 T1K = T1I - T1J;
Chris@82 600 io[WS(os, 2)] = T1F + T1K;
Chris@82 601 io[WS(os, 7)] = T1K - T1F;
Chris@82 602 }
Chris@82 603 }
Chris@82 604 }
Chris@82 605 {
Chris@82 606 E T2y, T2I, T2J, T2K, T2B, T2L, T2e, T2p, T2u, T2G, T23, T2F, T28, T2t, T2l;
Chris@82 607 E T2o;
Chris@82 608 {
Chris@82 609 E T2w, T2x, T2z, T2A;
Chris@82 610 T2w = FMA(KP387390585, T20, KP265966249 * T1X);
Chris@82 611 T2x = FNMS(KP503537032, T25, KP113854479 * T24);
Chris@82 612 T2y = T2w + T2x;
Chris@82 613 T2I = T2w - T2x;
Chris@82 614 T2J = FMA(KP575140729, T2a, KP174138601 * T2d);
Chris@82 615 T2z = FNMS(KP300238635, T2n, KP011599105 * T2m);
Chris@82 616 T2A = FNMS(KP156891391, T2h, KP256247671 * T2k);
Chris@82 617 T2K = T2z + T2A;
Chris@82 618 T2B = KP1_732050807 * (T2z - T2A);
Chris@82 619 T2L = T2J + T2K;
Chris@82 620 }
Chris@82 621 T2e = FNMS(KP575140729, T2d, KP174138601 * T2a);
Chris@82 622 T2l = FMA(KP256247671, T2h, KP156891391 * T2k);
Chris@82 623 T2o = FMA(KP300238635, T2m, KP011599105 * T2n);
Chris@82 624 T2p = T2l - T2o;
Chris@82 625 T2u = T2e - T2p;
Chris@82 626 T2G = KP1_732050807 * (T2o + T2l);
Chris@82 627 {
Chris@82 628 E T21, T2r, T26, T27, T2s;
Chris@82 629 T21 = FNMS(KP132983124, T20, KP258260390 * T1X);
Chris@82 630 T2r = T22 - T21;
Chris@82 631 T26 = FMA(KP251768516, T24, KP075902986 * T25);
Chris@82 632 T27 = FNMS(KP083333333, To, T1);
Chris@82 633 T2s = T27 - T26;
Chris@82 634 T23 = FMA(KP2_000000000, T21, T22);
Chris@82 635 T2F = T2s - T2r;
Chris@82 636 T28 = FMA(KP2_000000000, T26, T27);
Chris@82 637 T2t = T2r + T2s;
Chris@82 638 }
Chris@82 639 {
Chris@82 640 E T29, T2q, T2N, T2O;
Chris@82 641 T29 = T23 + T28;
Chris@82 642 T2q = FMA(KP2_000000000, T2p, T2e);
Chris@82 643 ro[WS(os, 12)] = T29 - T2q;
Chris@82 644 ro[WS(os, 1)] = T29 + T2q;
Chris@82 645 {
Chris@82 646 E T2v, T2C, T2P, T2Q;
Chris@82 647 T2v = T2t - T2u;
Chris@82 648 T2C = T2y - T2B;
Chris@82 649 ro[WS(os, 10)] = T2v - T2C;
Chris@82 650 ro[WS(os, 4)] = T2v + T2C;
Chris@82 651 T2P = T28 - T23;
Chris@82 652 T2Q = FMS(KP2_000000000, T2K, T2J);
Chris@82 653 ro[WS(os, 5)] = T2P - T2Q;
Chris@82 654 ro[WS(os, 8)] = T2P + T2Q;
Chris@82 655 }
Chris@82 656 T2N = T2F - T2G;
Chris@82 657 T2O = T2L - T2I;
Chris@82 658 ro[WS(os, 11)] = T2N - T2O;
Chris@82 659 ro[WS(os, 6)] = T2N + T2O;
Chris@82 660 {
Chris@82 661 E T2H, T2M, T2D, T2E;
Chris@82 662 T2H = T2F + T2G;
Chris@82 663 T2M = T2I + T2L;
Chris@82 664 ro[WS(os, 7)] = T2H - T2M;
Chris@82 665 ro[WS(os, 2)] = T2H + T2M;
Chris@82 666 T2D = T2t + T2u;
Chris@82 667 T2E = T2y + T2B;
Chris@82 668 ro[WS(os, 3)] = T2D - T2E;
Chris@82 669 ro[WS(os, 9)] = T2D + T2E;
Chris@82 670 }
Chris@82 671 }
Chris@82 672 }
Chris@82 673 }
Chris@82 674 }
Chris@82 675 }
Chris@82 676
Chris@82 677 static const kdft_desc desc = { 13, "n1_13", {138, 30, 38, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 678
Chris@82 679 void X(codelet_n1_13) (planner *p) {
Chris@82 680 X(kdft_register) (p, n1_13, &desc);
Chris@82 681 }
Chris@82 682
Chris@82 683 #endif