annotate src/fftw-3.3.3/dft/scalar/codelets/n1_13.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:35:43 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 13 -name n1_13 -include n.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 176 FP additions, 114 FP multiplications,
Chris@10 32 * (or, 62 additions, 0 multiplications, 114 fused multiply/add),
Chris@10 33 * 87 stack variables, 25 constants, and 52 memory accesses
Chris@10 34 */
Chris@10 35 #include "n.h"
Chris@10 36
Chris@10 37 static void n1_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 38 {
Chris@10 39 DK(KP875502302, +0.875502302409147941146295545768755143177842006);
Chris@10 40 DK(KP520028571, +0.520028571888864619117130500499232802493238139);
Chris@10 41 DK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@10 42 DK(KP600477271, +0.600477271932665282925769253334763009352012849);
Chris@10 43 DK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@10 44 DK(KP516520780, +0.516520780623489722840901288569017135705033622);
Chris@10 45 DK(KP968287244, +0.968287244361984016049539446938120421179794516);
Chris@10 46 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@10 47 DK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@10 48 DK(KP581704778, +0.581704778510515730456870384989698884939833902);
Chris@10 49 DK(KP859542535, +0.859542535098774820163672132761689612766401925);
Chris@10 50 DK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@10 51 DK(KP957805992, +0.957805992594665126462521754605754580515587217);
Chris@10 52 DK(KP522026385, +0.522026385161275033714027226654165028300441940);
Chris@10 53 DK(KP853480001, +0.853480001859823990758994934970528322872359049);
Chris@10 54 DK(KP769338817, +0.769338817572980603471413688209101117038278899);
Chris@10 55 DK(KP612264650, +0.612264650376756543746494474777125408779395514);
Chris@10 56 DK(KP038632954, +0.038632954644348171955506895830342264440241080);
Chris@10 57 DK(KP302775637, +0.302775637731994646559610633735247973125648287);
Chris@10 58 DK(KP514918778, +0.514918778086315755491789696138117261566051239);
Chris@10 59 DK(KP686558370, +0.686558370781754340655719594850823015421401653);
Chris@10 60 DK(KP226109445, +0.226109445035782405468510155372505010481906348);
Chris@10 61 DK(KP301479260, +0.301479260047709873958013540496673347309208464);
Chris@10 62 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@10 63 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@10 64 {
Chris@10 65 INT i;
Chris@10 66 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(52, is), MAKE_VOLATILE_STRIDE(52, os)) {
Chris@10 67 E T2B, T2H, T2I, T2G;
Chris@10 68 {
Chris@10 69 E T1, T1P, T2n, T2o, To, TH, T2h, T2k, TE, TB, TF, Tw, T2j, T2c, T1m;
Chris@10 70 E T1W, T1X, T1c, T19, T1j, T12, T1f, T21, T24, T27, T1U;
Chris@10 71 T1 = ri[0];
Chris@10 72 T1P = ii[0];
Chris@10 73 {
Chris@10 74 E T2b, Tv, Ts, T2a;
Chris@10 75 {
Chris@10 76 E T2d, Tf, Tq, Ty, Tb, Tr, T6, Tx, Ti, Tt, Tu, Tl;
Chris@10 77 {
Chris@10 78 E T7, T8, T9, Td, Te;
Chris@10 79 Td = ri[WS(is, 8)];
Chris@10 80 Te = ri[WS(is, 5)];
Chris@10 81 T7 = ri[WS(is, 12)];
Chris@10 82 T8 = ri[WS(is, 10)];
Chris@10 83 T9 = ri[WS(is, 4)];
Chris@10 84 T2d = Td - Te;
Chris@10 85 Tf = Td + Te;
Chris@10 86 {
Chris@10 87 E T2, Ta, T3, T4;
Chris@10 88 T2 = ri[WS(is, 1)];
Chris@10 89 Ta = T8 + T9;
Chris@10 90 Tq = T8 - T9;
Chris@10 91 T3 = ri[WS(is, 3)];
Chris@10 92 T4 = ri[WS(is, 9)];
Chris@10 93 {
Chris@10 94 E Tg, T5, Th, Tj, Tk;
Chris@10 95 Tg = ri[WS(is, 11)];
Chris@10 96 Ty = FMS(KP500000000, Ta, T7);
Chris@10 97 Tb = T7 + Ta;
Chris@10 98 Tr = T4 - T3;
Chris@10 99 T5 = T3 + T4;
Chris@10 100 Th = ri[WS(is, 6)];
Chris@10 101 Tj = ri[WS(is, 7)];
Chris@10 102 Tk = ri[WS(is, 2)];
Chris@10 103 T6 = T2 + T5;
Chris@10 104 Tx = FNMS(KP500000000, T5, T2);
Chris@10 105 Ti = Tg + Th;
Chris@10 106 Tt = Tg - Th;
Chris@10 107 Tu = Tj - Tk;
Chris@10 108 Tl = Tj + Tk;
Chris@10 109 }
Chris@10 110 }
Chris@10 111 }
Chris@10 112 {
Chris@10 113 E Tc, Tm, T2e, T2g;
Chris@10 114 Tc = T6 + Tb;
Chris@10 115 T2n = T6 - Tb;
Chris@10 116 T2b = Ti - Tl;
Chris@10 117 Tm = Ti + Tl;
Chris@10 118 T2e = Tt + Tu;
Chris@10 119 Tv = Tt - Tu;
Chris@10 120 Ts = Tq - Tr;
Chris@10 121 T2g = Tr + Tq;
Chris@10 122 {
Chris@10 123 E Tz, TA, Tn, T2f;
Chris@10 124 Tz = Tx - Ty;
Chris@10 125 T2a = Tx + Ty;
Chris@10 126 TA = FNMS(KP500000000, Tm, Tf);
Chris@10 127 Tn = Tf + Tm;
Chris@10 128 T2f = FNMS(KP500000000, T2e, T2d);
Chris@10 129 T2o = T2d + T2e;
Chris@10 130 To = Tc + Tn;
Chris@10 131 TH = Tc - Tn;
Chris@10 132 T2h = FMA(KP866025403, T2g, T2f);
Chris@10 133 T2k = FNMS(KP866025403, T2g, T2f);
Chris@10 134 TE = Tz - TA;
Chris@10 135 TB = Tz + TA;
Chris@10 136 }
Chris@10 137 }
Chris@10 138 }
Chris@10 139 {
Chris@10 140 E T1R, TM, T10, T18, T1l, TX, T1k, T15, TP, T1a, T1b, TS;
Chris@10 141 {
Chris@10 142 E T16, TY, TZ, TK, TL;
Chris@10 143 TK = ii[WS(is, 8)];
Chris@10 144 TF = Ts - Tv;
Chris@10 145 Tw = Ts + Tv;
Chris@10 146 T2j = FNMS(KP866025403, T2b, T2a);
Chris@10 147 T2c = FMA(KP866025403, T2b, T2a);
Chris@10 148 TL = ii[WS(is, 5)];
Chris@10 149 T16 = ii[WS(is, 12)];
Chris@10 150 TY = ii[WS(is, 10)];
Chris@10 151 TZ = ii[WS(is, 4)];
Chris@10 152 T1R = TK + TL;
Chris@10 153 TM = TK - TL;
Chris@10 154 {
Chris@10 155 E T13, T17, TV, TW;
Chris@10 156 T13 = ii[WS(is, 1)];
Chris@10 157 T17 = TY + TZ;
Chris@10 158 T10 = TY - TZ;
Chris@10 159 TV = ii[WS(is, 9)];
Chris@10 160 TW = ii[WS(is, 3)];
Chris@10 161 {
Chris@10 162 E TN, T14, TO, TQ, TR;
Chris@10 163 TN = ii[WS(is, 11)];
Chris@10 164 T18 = FMS(KP500000000, T17, T16);
Chris@10 165 T1l = T16 + T17;
Chris@10 166 TX = TV - TW;
Chris@10 167 T14 = TW + TV;
Chris@10 168 TO = ii[WS(is, 6)];
Chris@10 169 TQ = ii[WS(is, 7)];
Chris@10 170 TR = ii[WS(is, 2)];
Chris@10 171 T1k = T13 + T14;
Chris@10 172 T15 = FNMS(KP500000000, T14, T13);
Chris@10 173 TP = TN - TO;
Chris@10 174 T1a = TN + TO;
Chris@10 175 T1b = TQ + TR;
Chris@10 176 TS = TQ - TR;
Chris@10 177 }
Chris@10 178 }
Chris@10 179 }
Chris@10 180 {
Chris@10 181 E T1Q, T11, TT, T1S;
Chris@10 182 T1Q = T1k + T1l;
Chris@10 183 T1m = T1k - T1l;
Chris@10 184 T11 = TX + T10;
Chris@10 185 T1W = T10 - TX;
Chris@10 186 T1X = TP - TS;
Chris@10 187 TT = TP + TS;
Chris@10 188 T1S = T1a + T1b;
Chris@10 189 T1c = T1a - T1b;
Chris@10 190 {
Chris@10 191 E T1Z, TU, T1T, T20;
Chris@10 192 T19 = T15 + T18;
Chris@10 193 T1Z = T15 - T18;
Chris@10 194 T1j = TM + TT;
Chris@10 195 TU = FNMS(KP500000000, TT, TM);
Chris@10 196 T1T = T1R + T1S;
Chris@10 197 T20 = FNMS(KP500000000, T1S, T1R);
Chris@10 198 T12 = FMA(KP866025403, T11, TU);
Chris@10 199 T1f = FNMS(KP866025403, T11, TU);
Chris@10 200 T21 = T1Z + T20;
Chris@10 201 T24 = T1Z - T20;
Chris@10 202 T27 = T1Q - T1T;
Chris@10 203 T1U = T1Q + T1T;
Chris@10 204 }
Chris@10 205 }
Chris@10 206 }
Chris@10 207 }
Chris@10 208 {
Chris@10 209 E T1g, T1d, T25, T1Y;
Chris@10 210 ro[0] = T1 + To;
Chris@10 211 T1g = FNMS(KP866025403, T1c, T19);
Chris@10 212 T1d = FMA(KP866025403, T1c, T19);
Chris@10 213 T25 = T1W - T1X;
Chris@10 214 T1Y = T1W + T1X;
Chris@10 215 io[0] = T1P + T1U;
Chris@10 216 {
Chris@10 217 E T1C, T1B, T1F, T1K;
Chris@10 218 {
Chris@10 219 E TC, T1J, T1z, T1w, T1I, T1O, Tp, T1E, T1q, TI, T1o, T1s;
Chris@10 220 {
Chris@10 221 E TG, T1n, T1G, T1u, T1e, T1h, T1v, T1x, T1y, T1H, T1i;
Chris@10 222 TC = FMA(KP301479260, TB, Tw);
Chris@10 223 T1x = FNMS(KP226109445, Tw, TB);
Chris@10 224 T1y = FMA(KP686558370, TE, TF);
Chris@10 225 TG = FNMS(KP514918778, TF, TE);
Chris@10 226 T1n = FNMS(KP302775637, T1m, T1j);
Chris@10 227 T1G = FMA(KP302775637, T1j, T1m);
Chris@10 228 T1u = FNMS(KP038632954, T12, T1d);
Chris@10 229 T1e = FMA(KP038632954, T1d, T12);
Chris@10 230 T1h = FMA(KP612264650, T1g, T1f);
Chris@10 231 T1v = FNMS(KP612264650, T1f, T1g);
Chris@10 232 T1J = FMA(KP769338817, T1y, T1x);
Chris@10 233 T1z = FNMS(KP769338817, T1y, T1x);
Chris@10 234 T1H = FNMS(KP853480001, T1v, T1u);
Chris@10 235 T1w = FMA(KP853480001, T1v, T1u);
Chris@10 236 T1I = FNMS(KP522026385, T1H, T1G);
Chris@10 237 T1O = FMA(KP957805992, T1G, T1H);
Chris@10 238 Tp = FNMS(KP083333333, To, T1);
Chris@10 239 T1E = FMA(KP853480001, T1h, T1e);
Chris@10 240 T1i = FNMS(KP853480001, T1h, T1e);
Chris@10 241 T1q = FNMS(KP859542535, TG, TH);
Chris@10 242 TI = FMA(KP581704778, TH, TG);
Chris@10 243 T1o = FMA(KP957805992, T1n, T1i);
Chris@10 244 T1s = FNMS(KP522026385, T1i, T1n);
Chris@10 245 }
Chris@10 246 {
Chris@10 247 E T1A, T1D, T1t, T1L, T1M;
Chris@10 248 {
Chris@10 249 E T1p, TD, TJ, T1N, T1r;
Chris@10 250 T1p = FNMS(KP251768516, TC, Tp);
Chris@10 251 TD = FMA(KP503537032, TC, Tp);
Chris@10 252 T1C = FNMS(KP968287244, T1z, T1w);
Chris@10 253 T1A = FMA(KP968287244, T1z, T1w);
Chris@10 254 TJ = FMA(KP516520780, TI, TD);
Chris@10 255 T1N = FNMS(KP516520780, TI, TD);
Chris@10 256 T1D = FNMS(KP300462606, T1q, T1p);
Chris@10 257 T1r = FMA(KP300462606, T1q, T1p);
Chris@10 258 ro[WS(os, 8)] = FNMS(KP600477271, T1O, T1N);
Chris@10 259 ro[WS(os, 12)] = FMA(KP600477271, T1o, TJ);
Chris@10 260 ro[WS(os, 1)] = FNMS(KP600477271, T1o, TJ);
Chris@10 261 T1t = FNMS(KP575140729, T1s, T1r);
Chris@10 262 T1B = FMA(KP575140729, T1s, T1r);
Chris@10 263 ro[WS(os, 5)] = FMA(KP600477271, T1O, T1N);
Chris@10 264 }
Chris@10 265 T1L = FNMS(KP520028571, T1E, T1D);
Chris@10 266 T1F = FMA(KP520028571, T1E, T1D);
Chris@10 267 T1K = FMA(KP875502302, T1J, T1I);
Chris@10 268 T1M = FNMS(KP875502302, T1J, T1I);
Chris@10 269 ro[WS(os, 3)] = FMA(KP520028571, T1A, T1t);
Chris@10 270 ro[WS(os, 9)] = FNMS(KP520028571, T1A, T1t);
Chris@10 271 ro[WS(os, 6)] = FMA(KP575140729, T1M, T1L);
Chris@10 272 ro[WS(os, 11)] = FNMS(KP575140729, T1M, T1L);
Chris@10 273 }
Chris@10 274 }
Chris@10 275 {
Chris@10 276 E T22, T2F, T2N, T2K, T2w, T2A, T1V, T2C, T28, T2y, T2M, T2q;
Chris@10 277 {
Chris@10 278 E T26, T2v, T2p, T2i, T2s, T2t, T2l, T2D, T2E, T2u, T2m;
Chris@10 279 T2D = FNMS(KP226109445, T1Y, T21);
Chris@10 280 T22 = FMA(KP301479260, T21, T1Y);
Chris@10 281 ro[WS(os, 2)] = FMA(KP575140729, T1K, T1F);
Chris@10 282 ro[WS(os, 7)] = FNMS(KP575140729, T1K, T1F);
Chris@10 283 ro[WS(os, 4)] = FMA(KP520028571, T1C, T1B);
Chris@10 284 ro[WS(os, 10)] = FNMS(KP520028571, T1C, T1B);
Chris@10 285 T26 = FNMS(KP514918778, T25, T24);
Chris@10 286 T2E = FMA(KP686558370, T24, T25);
Chris@10 287 T2v = FNMS(KP302775637, T2n, T2o);
Chris@10 288 T2p = FMA(KP302775637, T2o, T2n);
Chris@10 289 T2i = FNMS(KP038632954, T2h, T2c);
Chris@10 290 T2s = FMA(KP038632954, T2c, T2h);
Chris@10 291 T2t = FMA(KP612264650, T2j, T2k);
Chris@10 292 T2l = FNMS(KP612264650, T2k, T2j);
Chris@10 293 T2F = FNMS(KP769338817, T2E, T2D);
Chris@10 294 T2N = FMA(KP769338817, T2E, T2D);
Chris@10 295 T2K = FMA(KP853480001, T2t, T2s);
Chris@10 296 T2u = FNMS(KP853480001, T2t, T2s);
Chris@10 297 T2w = FMA(KP957805992, T2v, T2u);
Chris@10 298 T2A = FNMS(KP522026385, T2u, T2v);
Chris@10 299 T1V = FNMS(KP083333333, T1U, T1P);
Chris@10 300 T2m = FNMS(KP853480001, T2l, T2i);
Chris@10 301 T2C = FMA(KP853480001, T2l, T2i);
Chris@10 302 T28 = FMA(KP581704778, T27, T26);
Chris@10 303 T2y = FNMS(KP859542535, T26, T27);
Chris@10 304 T2M = FNMS(KP522026385, T2m, T2p);
Chris@10 305 T2q = FMA(KP957805992, T2p, T2m);
Chris@10 306 }
Chris@10 307 {
Chris@10 308 E T2O, T2Q, T2z, T2P, T2L;
Chris@10 309 {
Chris@10 310 E T23, T2x, T2r, T29, T2J;
Chris@10 311 T23 = FMA(KP503537032, T22, T1V);
Chris@10 312 T2x = FNMS(KP251768516, T22, T1V);
Chris@10 313 T2O = FNMS(KP875502302, T2N, T2M);
Chris@10 314 T2Q = FMA(KP875502302, T2N, T2M);
Chris@10 315 T2r = FMA(KP516520780, T28, T23);
Chris@10 316 T29 = FNMS(KP516520780, T28, T23);
Chris@10 317 T2z = FMA(KP300462606, T2y, T2x);
Chris@10 318 T2J = FNMS(KP300462606, T2y, T2x);
Chris@10 319 io[WS(os, 12)] = FNMS(KP600477271, T2w, T2r);
Chris@10 320 io[WS(os, 1)] = FMA(KP600477271, T2w, T2r);
Chris@10 321 io[WS(os, 8)] = FMA(KP600477271, T2q, T29);
Chris@10 322 io[WS(os, 5)] = FNMS(KP600477271, T2q, T29);
Chris@10 323 T2P = FMA(KP520028571, T2K, T2J);
Chris@10 324 T2L = FNMS(KP520028571, T2K, T2J);
Chris@10 325 }
Chris@10 326 T2B = FMA(KP575140729, T2A, T2z);
Chris@10 327 T2H = FNMS(KP575140729, T2A, T2z);
Chris@10 328 io[WS(os, 11)] = FMA(KP575140729, T2Q, T2P);
Chris@10 329 io[WS(os, 6)] = FNMS(KP575140729, T2Q, T2P);
Chris@10 330 io[WS(os, 7)] = FMA(KP575140729, T2O, T2L);
Chris@10 331 io[WS(os, 2)] = FNMS(KP575140729, T2O, T2L);
Chris@10 332 T2I = FMA(KP968287244, T2F, T2C);
Chris@10 333 T2G = FNMS(KP968287244, T2F, T2C);
Chris@10 334 }
Chris@10 335 }
Chris@10 336 }
Chris@10 337 }
Chris@10 338 }
Chris@10 339 io[WS(os, 10)] = FMA(KP520028571, T2I, T2H);
Chris@10 340 io[WS(os, 4)] = FNMS(KP520028571, T2I, T2H);
Chris@10 341 io[WS(os, 9)] = FMA(KP520028571, T2G, T2B);
Chris@10 342 io[WS(os, 3)] = FNMS(KP520028571, T2G, T2B);
Chris@10 343 }
Chris@10 344 }
Chris@10 345 }
Chris@10 346
Chris@10 347 static const kdft_desc desc = { 13, "n1_13", {62, 0, 114, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 348
Chris@10 349 void X(codelet_n1_13) (planner *p) {
Chris@10 350 X(kdft_register) (p, n1_13, &desc);
Chris@10 351 }
Chris@10 352
Chris@10 353 #else /* HAVE_FMA */
Chris@10 354
Chris@10 355 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 13 -name n1_13 -include n.h */
Chris@10 356
Chris@10 357 /*
Chris@10 358 * This function contains 176 FP additions, 68 FP multiplications,
Chris@10 359 * (or, 138 additions, 30 multiplications, 38 fused multiply/add),
Chris@10 360 * 71 stack variables, 20 constants, and 52 memory accesses
Chris@10 361 */
Chris@10 362 #include "n.h"
Chris@10 363
Chris@10 364 static void n1_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 365 {
Chris@10 366 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@10 367 DK(KP083333333, +0.083333333333333333333333333333333333333333333);
Chris@10 368 DK(KP251768516, +0.251768516431883313623436926934233488546674281);
Chris@10 369 DK(KP075902986, +0.075902986037193865983102897245103540356428373);
Chris@10 370 DK(KP132983124, +0.132983124607418643793760531921092974399165133);
Chris@10 371 DK(KP258260390, +0.258260390311744861420450644284508567852516811);
Chris@10 372 DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
Chris@10 373 DK(KP300238635, +0.300238635966332641462884626667381504676006424);
Chris@10 374 DK(KP011599105, +0.011599105605768290721655456654083252189827041);
Chris@10 375 DK(KP156891391, +0.156891391051584611046832726756003269660212636);
Chris@10 376 DK(KP256247671, +0.256247671582936600958684654061725059144125175);
Chris@10 377 DK(KP174138601, +0.174138601152135905005660794929264742616964676);
Chris@10 378 DK(KP575140729, +0.575140729474003121368385547455453388461001608);
Chris@10 379 DK(KP503537032, +0.503537032863766627246873853868466977093348562);
Chris@10 380 DK(KP113854479, +0.113854479055790798974654345867655310534642560);
Chris@10 381 DK(KP265966249, +0.265966249214837287587521063842185948798330267);
Chris@10 382 DK(KP387390585, +0.387390585467617292130675966426762851778775217);
Chris@10 383 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@10 384 DK(KP300462606, +0.300462606288665774426601772289207995520941381);
Chris@10 385 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@10 386 {
Chris@10 387 INT i;
Chris@10 388 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(52, is), MAKE_VOLATILE_STRIDE(52, os)) {
Chris@10 389 E T1, T1q, Tt, Tu, To, T22, T20, T24, TF, TH, TA, TI, T1X, T25, T2a;
Chris@10 390 E T2d, T18, T1n, T2k, T2n, T1l, T1r, T1f, T1o, T2h, T2m;
Chris@10 391 T1 = ri[0];
Chris@10 392 T1q = ii[0];
Chris@10 393 {
Chris@10 394 E Tf, Tp, Tb, TC, Tx, T6, TB, Tw, Ti, Tq, Tl, Tr, Tm, Ts, Td;
Chris@10 395 E Te, Tc, Tn;
Chris@10 396 Td = ri[WS(is, 8)];
Chris@10 397 Te = ri[WS(is, 5)];
Chris@10 398 Tf = Td + Te;
Chris@10 399 Tp = Td - Te;
Chris@10 400 {
Chris@10 401 E T7, T8, T9, Ta;
Chris@10 402 T7 = ri[WS(is, 12)];
Chris@10 403 T8 = ri[WS(is, 10)];
Chris@10 404 T9 = ri[WS(is, 4)];
Chris@10 405 Ta = T8 + T9;
Chris@10 406 Tb = T7 + Ta;
Chris@10 407 TC = T8 - T9;
Chris@10 408 Tx = FNMS(KP500000000, Ta, T7);
Chris@10 409 }
Chris@10 410 {
Chris@10 411 E T2, T3, T4, T5;
Chris@10 412 T2 = ri[WS(is, 1)];
Chris@10 413 T3 = ri[WS(is, 3)];
Chris@10 414 T4 = ri[WS(is, 9)];
Chris@10 415 T5 = T3 + T4;
Chris@10 416 T6 = T2 + T5;
Chris@10 417 TB = T3 - T4;
Chris@10 418 Tw = FNMS(KP500000000, T5, T2);
Chris@10 419 }
Chris@10 420 {
Chris@10 421 E Tg, Th, Tj, Tk;
Chris@10 422 Tg = ri[WS(is, 11)];
Chris@10 423 Th = ri[WS(is, 6)];
Chris@10 424 Ti = Tg + Th;
Chris@10 425 Tq = Tg - Th;
Chris@10 426 Tj = ri[WS(is, 7)];
Chris@10 427 Tk = ri[WS(is, 2)];
Chris@10 428 Tl = Tj + Tk;
Chris@10 429 Tr = Tj - Tk;
Chris@10 430 }
Chris@10 431 Tm = Ti + Tl;
Chris@10 432 Ts = Tq + Tr;
Chris@10 433 Tt = Tp + Ts;
Chris@10 434 Tu = T6 - Tb;
Chris@10 435 Tc = T6 + Tb;
Chris@10 436 Tn = Tf + Tm;
Chris@10 437 To = Tc + Tn;
Chris@10 438 T22 = KP300462606 * (Tc - Tn);
Chris@10 439 {
Chris@10 440 E T1Y, T1Z, TD, TE;
Chris@10 441 T1Y = TB + TC;
Chris@10 442 T1Z = Tq - Tr;
Chris@10 443 T20 = T1Y - T1Z;
Chris@10 444 T24 = T1Y + T1Z;
Chris@10 445 TD = KP866025403 * (TB - TC);
Chris@10 446 TE = FNMS(KP500000000, Ts, Tp);
Chris@10 447 TF = TD - TE;
Chris@10 448 TH = TD + TE;
Chris@10 449 }
Chris@10 450 {
Chris@10 451 E Ty, Tz, T1V, T1W;
Chris@10 452 Ty = Tw - Tx;
Chris@10 453 Tz = KP866025403 * (Ti - Tl);
Chris@10 454 TA = Ty + Tz;
Chris@10 455 TI = Ty - Tz;
Chris@10 456 T1V = Tw + Tx;
Chris@10 457 T1W = FNMS(KP500000000, Tm, Tf);
Chris@10 458 T1X = T1V - T1W;
Chris@10 459 T25 = T1V + T1W;
Chris@10 460 }
Chris@10 461 }
Chris@10 462 {
Chris@10 463 E TZ, T2b, TV, T1i, T1a, TQ, T1h, T19, T12, T1d, T15, T1c, T16, T2c, TX;
Chris@10 464 E TY, TW, T17;
Chris@10 465 TX = ii[WS(is, 8)];
Chris@10 466 TY = ii[WS(is, 5)];
Chris@10 467 TZ = TX + TY;
Chris@10 468 T2b = TX - TY;
Chris@10 469 {
Chris@10 470 E TR, TS, TT, TU;
Chris@10 471 TR = ii[WS(is, 12)];
Chris@10 472 TS = ii[WS(is, 10)];
Chris@10 473 TT = ii[WS(is, 4)];
Chris@10 474 TU = TS + TT;
Chris@10 475 TV = FNMS(KP500000000, TU, TR);
Chris@10 476 T1i = TR + TU;
Chris@10 477 T1a = TS - TT;
Chris@10 478 }
Chris@10 479 {
Chris@10 480 E TM, TN, TO, TP;
Chris@10 481 TM = ii[WS(is, 1)];
Chris@10 482 TN = ii[WS(is, 3)];
Chris@10 483 TO = ii[WS(is, 9)];
Chris@10 484 TP = TN + TO;
Chris@10 485 TQ = FNMS(KP500000000, TP, TM);
Chris@10 486 T1h = TM + TP;
Chris@10 487 T19 = TN - TO;
Chris@10 488 }
Chris@10 489 {
Chris@10 490 E T10, T11, T13, T14;
Chris@10 491 T10 = ii[WS(is, 11)];
Chris@10 492 T11 = ii[WS(is, 6)];
Chris@10 493 T12 = T10 + T11;
Chris@10 494 T1d = T10 - T11;
Chris@10 495 T13 = ii[WS(is, 7)];
Chris@10 496 T14 = ii[WS(is, 2)];
Chris@10 497 T15 = T13 + T14;
Chris@10 498 T1c = T13 - T14;
Chris@10 499 }
Chris@10 500 T16 = T12 + T15;
Chris@10 501 T2c = T1d + T1c;
Chris@10 502 T2a = T1h - T1i;
Chris@10 503 T2d = T2b + T2c;
Chris@10 504 TW = TQ + TV;
Chris@10 505 T17 = FNMS(KP500000000, T16, TZ);
Chris@10 506 T18 = TW - T17;
Chris@10 507 T1n = TW + T17;
Chris@10 508 {
Chris@10 509 E T2i, T2j, T1j, T1k;
Chris@10 510 T2i = TQ - TV;
Chris@10 511 T2j = KP866025403 * (T15 - T12);
Chris@10 512 T2k = T2i + T2j;
Chris@10 513 T2n = T2i - T2j;
Chris@10 514 T1j = T1h + T1i;
Chris@10 515 T1k = TZ + T16;
Chris@10 516 T1l = KP300462606 * (T1j - T1k);
Chris@10 517 T1r = T1j + T1k;
Chris@10 518 }
Chris@10 519 {
Chris@10 520 E T1b, T1e, T2f, T2g;
Chris@10 521 T1b = T19 + T1a;
Chris@10 522 T1e = T1c - T1d;
Chris@10 523 T1f = T1b + T1e;
Chris@10 524 T1o = T1e - T1b;
Chris@10 525 T2f = FNMS(KP500000000, T2c, T2b);
Chris@10 526 T2g = KP866025403 * (T1a - T19);
Chris@10 527 T2h = T2f - T2g;
Chris@10 528 T2m = T2g + T2f;
Chris@10 529 }
Chris@10 530 }
Chris@10 531 ro[0] = T1 + To;
Chris@10 532 io[0] = T1q + T1r;
Chris@10 533 {
Chris@10 534 E T1D, T1N, T1y, T1x, T1E, T1O, Tv, TK, T1J, T1Q, T1m, T1R, T1t, T1I, TG;
Chris@10 535 E TJ;
Chris@10 536 {
Chris@10 537 E T1B, T1C, T1v, T1w;
Chris@10 538 T1B = FMA(KP387390585, T1f, KP265966249 * T18);
Chris@10 539 T1C = FMA(KP113854479, T1o, KP503537032 * T1n);
Chris@10 540 T1D = T1B + T1C;
Chris@10 541 T1N = T1C - T1B;
Chris@10 542 T1y = FMA(KP575140729, Tu, KP174138601 * Tt);
Chris@10 543 T1v = FNMS(KP156891391, TH, KP256247671 * TI);
Chris@10 544 T1w = FMA(KP011599105, TF, KP300238635 * TA);
Chris@10 545 T1x = T1v - T1w;
Chris@10 546 T1E = T1y + T1x;
Chris@10 547 T1O = KP1_732050807 * (T1v + T1w);
Chris@10 548 }
Chris@10 549 Tv = FNMS(KP174138601, Tu, KP575140729 * Tt);
Chris@10 550 TG = FNMS(KP300238635, TF, KP011599105 * TA);
Chris@10 551 TJ = FMA(KP256247671, TH, KP156891391 * TI);
Chris@10 552 TK = TG - TJ;
Chris@10 553 T1J = KP1_732050807 * (TJ + TG);
Chris@10 554 T1Q = Tv - TK;
Chris@10 555 {
Chris@10 556 E T1g, T1H, T1p, T1s, T1G;
Chris@10 557 T1g = FNMS(KP132983124, T1f, KP258260390 * T18);
Chris@10 558 T1H = T1l - T1g;
Chris@10 559 T1p = FNMS(KP251768516, T1o, KP075902986 * T1n);
Chris@10 560 T1s = FNMS(KP083333333, T1r, T1q);
Chris@10 561 T1G = T1s - T1p;
Chris@10 562 T1m = FMA(KP2_000000000, T1g, T1l);
Chris@10 563 T1R = T1H + T1G;
Chris@10 564 T1t = FMA(KP2_000000000, T1p, T1s);
Chris@10 565 T1I = T1G - T1H;
Chris@10 566 }
Chris@10 567 {
Chris@10 568 E TL, T1u, T1P, T1S;
Chris@10 569 TL = FMA(KP2_000000000, TK, Tv);
Chris@10 570 T1u = T1m + T1t;
Chris@10 571 io[WS(os, 1)] = TL + T1u;
Chris@10 572 io[WS(os, 12)] = T1u - TL;
Chris@10 573 {
Chris@10 574 E T1z, T1A, T1T, T1U;
Chris@10 575 T1z = FMS(KP2_000000000, T1x, T1y);
Chris@10 576 T1A = T1t - T1m;
Chris@10 577 io[WS(os, 5)] = T1z + T1A;
Chris@10 578 io[WS(os, 8)] = T1A - T1z;
Chris@10 579 T1T = T1R - T1Q;
Chris@10 580 T1U = T1O + T1N;
Chris@10 581 io[WS(os, 4)] = T1T - T1U;
Chris@10 582 io[WS(os, 10)] = T1U + T1T;
Chris@10 583 }
Chris@10 584 T1P = T1N - T1O;
Chris@10 585 T1S = T1Q + T1R;
Chris@10 586 io[WS(os, 3)] = T1P + T1S;
Chris@10 587 io[WS(os, 9)] = T1S - T1P;
Chris@10 588 {
Chris@10 589 E T1L, T1M, T1F, T1K;
Chris@10 590 T1L = T1J + T1I;
Chris@10 591 T1M = T1E + T1D;
Chris@10 592 io[WS(os, 6)] = T1L - T1M;
Chris@10 593 io[WS(os, 11)] = T1M + T1L;
Chris@10 594 T1F = T1D - T1E;
Chris@10 595 T1K = T1I - T1J;
Chris@10 596 io[WS(os, 2)] = T1F + T1K;
Chris@10 597 io[WS(os, 7)] = T1K - T1F;
Chris@10 598 }
Chris@10 599 }
Chris@10 600 }
Chris@10 601 {
Chris@10 602 E T2y, T2I, T2J, T2K, T2B, T2L, T2e, T2p, T2u, T2G, T23, T2F, T28, T2t, T2l;
Chris@10 603 E T2o;
Chris@10 604 {
Chris@10 605 E T2w, T2x, T2z, T2A;
Chris@10 606 T2w = FMA(KP387390585, T20, KP265966249 * T1X);
Chris@10 607 T2x = FNMS(KP503537032, T25, KP113854479 * T24);
Chris@10 608 T2y = T2w + T2x;
Chris@10 609 T2I = T2w - T2x;
Chris@10 610 T2J = FMA(KP575140729, T2a, KP174138601 * T2d);
Chris@10 611 T2z = FNMS(KP300238635, T2n, KP011599105 * T2m);
Chris@10 612 T2A = FNMS(KP156891391, T2h, KP256247671 * T2k);
Chris@10 613 T2K = T2z + T2A;
Chris@10 614 T2B = KP1_732050807 * (T2z - T2A);
Chris@10 615 T2L = T2J + T2K;
Chris@10 616 }
Chris@10 617 T2e = FNMS(KP575140729, T2d, KP174138601 * T2a);
Chris@10 618 T2l = FMA(KP256247671, T2h, KP156891391 * T2k);
Chris@10 619 T2o = FMA(KP300238635, T2m, KP011599105 * T2n);
Chris@10 620 T2p = T2l - T2o;
Chris@10 621 T2u = T2e - T2p;
Chris@10 622 T2G = KP1_732050807 * (T2o + T2l);
Chris@10 623 {
Chris@10 624 E T21, T2r, T26, T27, T2s;
Chris@10 625 T21 = FNMS(KP132983124, T20, KP258260390 * T1X);
Chris@10 626 T2r = T22 - T21;
Chris@10 627 T26 = FMA(KP251768516, T24, KP075902986 * T25);
Chris@10 628 T27 = FNMS(KP083333333, To, T1);
Chris@10 629 T2s = T27 - T26;
Chris@10 630 T23 = FMA(KP2_000000000, T21, T22);
Chris@10 631 T2F = T2s - T2r;
Chris@10 632 T28 = FMA(KP2_000000000, T26, T27);
Chris@10 633 T2t = T2r + T2s;
Chris@10 634 }
Chris@10 635 {
Chris@10 636 E T29, T2q, T2N, T2O;
Chris@10 637 T29 = T23 + T28;
Chris@10 638 T2q = FMA(KP2_000000000, T2p, T2e);
Chris@10 639 ro[WS(os, 12)] = T29 - T2q;
Chris@10 640 ro[WS(os, 1)] = T29 + T2q;
Chris@10 641 {
Chris@10 642 E T2v, T2C, T2P, T2Q;
Chris@10 643 T2v = T2t - T2u;
Chris@10 644 T2C = T2y - T2B;
Chris@10 645 ro[WS(os, 10)] = T2v - T2C;
Chris@10 646 ro[WS(os, 4)] = T2v + T2C;
Chris@10 647 T2P = T28 - T23;
Chris@10 648 T2Q = FMS(KP2_000000000, T2K, T2J);
Chris@10 649 ro[WS(os, 5)] = T2P - T2Q;
Chris@10 650 ro[WS(os, 8)] = T2P + T2Q;
Chris@10 651 }
Chris@10 652 T2N = T2F - T2G;
Chris@10 653 T2O = T2L - T2I;
Chris@10 654 ro[WS(os, 11)] = T2N - T2O;
Chris@10 655 ro[WS(os, 6)] = T2N + T2O;
Chris@10 656 {
Chris@10 657 E T2H, T2M, T2D, T2E;
Chris@10 658 T2H = T2F + T2G;
Chris@10 659 T2M = T2I + T2L;
Chris@10 660 ro[WS(os, 7)] = T2H - T2M;
Chris@10 661 ro[WS(os, 2)] = T2H + T2M;
Chris@10 662 T2D = T2t + T2u;
Chris@10 663 T2E = T2y + T2B;
Chris@10 664 ro[WS(os, 3)] = T2D - T2E;
Chris@10 665 ro[WS(os, 9)] = T2D + T2E;
Chris@10 666 }
Chris@10 667 }
Chris@10 668 }
Chris@10 669 }
Chris@10 670 }
Chris@10 671 }
Chris@10 672
Chris@10 673 static const kdft_desc desc = { 13, "n1_13", {138, 30, 38, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 674
Chris@10 675 void X(codelet_n1_13) (planner *p) {
Chris@10 676 X(kdft_register) (p, n1_13, &desc);
Chris@10 677 }
Chris@10 678
Chris@10 679 #endif /* HAVE_FMA */