annotate src/fftw-3.3.5/rdft/scalar/r2cb/r2cbIII_64.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:50:47 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-rdft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_r2cb.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cbIII_64 -dft-III -include r2cbIII.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 434 FP additions, 260 FP multiplications,
Chris@42 32 * (or, 238 additions, 64 multiplications, 196 fused multiply/add),
Chris@42 33 * 165 stack variables, 36 constants, and 128 memory accesses
Chris@42 34 */
Chris@42 35 #include "r2cbIII.h"
Chris@42 36
Chris@42 37 static void r2cbIII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DK(KP357805721, +0.357805721314524104672487743774474392487532769);
Chris@42 40 DK(KP1_883088130, +1.883088130366041556825018805199004714371179592);
Chris@42 41 DK(KP472964775, +0.472964775891319928124438237972992463904131113);
Chris@42 42 DK(KP1_807978586, +1.807978586246886663172400594461074097420264050);
Chris@42 43 DK(KP049126849, +0.049126849769467254105343321271313617079695752);
Chris@42 44 DK(KP1_997590912, +1.997590912410344785429543209518201388886407229);
Chris@42 45 DK(KP906347169, +0.906347169019147157946142717268914412664134293);
Chris@42 46 DK(KP1_481902250, +1.481902250709918182351233794990325459457910619);
Chris@42 47 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 48 DK(KP250486960, +0.250486960191305461595702160124721208578685568);
Chris@42 49 DK(KP1_940062506, +1.940062506389087985207968414572200502913731924);
Chris@42 50 DK(KP599376933, +0.599376933681923766271389869014404232837890546);
Chris@42 51 DK(KP1_715457220, +1.715457220000544139804539968569540274084981599);
Chris@42 52 DK(KP148335987, +0.148335987538347428753676511486911367000625355);
Chris@42 53 DK(KP1_978353019, +1.978353019929561946903347476032486127967379067);
Chris@42 54 DK(KP741650546, +0.741650546272035369581266691172079863842265220);
Chris@42 55 DK(KP1_606415062, +1.606415062961289819613353025926283847759138854);
Chris@42 56 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 57 DK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@42 58 DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
Chris@42 59 DK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@42 60 DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
Chris@42 61 DK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@42 62 DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
Chris@42 63 DK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@42 64 DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
Chris@42 65 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 66 DK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 67 DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
Chris@42 68 DK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 69 DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
Chris@42 70 DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
Chris@42 71 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@42 72 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 73 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 74 DK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 75 {
Chris@42 76 INT i;
Chris@42 77 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@42 78 E T43, T4b, T49, T4e, T3T, T46, T40, T4a;
Chris@42 79 {
Chris@42 80 E T3t, T15, T2E, T3U, T6b, Tf, T6Q, T6u, T5J, T4L, T3V, T1g, T5U, T5q, T3u;
Chris@42 81 E T2H, T6v, Tu, T5r, T4V, T6R, T6e, T2K, T1s, T2J, T1D, T3X, T3B, T5s, T4Q;
Chris@42 82 E T3Y, T3y, T6g, TK, T5M, T57, T6N, T6j, T35, T1W, T34, T25, T4i, T3J, T5N;
Chris@42 83 E T52, T4j, T3G, T6l, TZ, T3L, T5P, T5i, T6M, T6o, T3M, T38, T2n, T37, T2w;
Chris@42 84 E T4l, T3Q, T5Q, T5d;
Chris@42 85 {
Chris@42 86 E T3x, T3w, T3E, T3F;
Chris@42 87 {
Chris@42 88 E T5p, T5o, T2G, T2F;
Chris@42 89 {
Chris@42 90 E T11, T3, T5m, T2D, T2A, T6, T5n, T14, Tb, T16, Ta, T4I, T19, Tc, T1c;
Chris@42 91 E T1d;
Chris@42 92 {
Chris@42 93 E T4, T5, T12, T13;
Chris@42 94 {
Chris@42 95 E T1, T2, T2B, T2C;
Chris@42 96 T1 = Cr[0];
Chris@42 97 T2 = Cr[WS(csr, 31)];
Chris@42 98 T2B = Ci[0];
Chris@42 99 T2C = Ci[WS(csi, 31)];
Chris@42 100 T4 = Cr[WS(csr, 16)];
Chris@42 101 T11 = T1 - T2;
Chris@42 102 T3 = T1 + T2;
Chris@42 103 T5m = T2C - T2B;
Chris@42 104 T2D = T2B + T2C;
Chris@42 105 T5 = Cr[WS(csr, 15)];
Chris@42 106 T12 = Ci[WS(csi, 16)];
Chris@42 107 T13 = Ci[WS(csi, 15)];
Chris@42 108 }
Chris@42 109 {
Chris@42 110 E T8, T9, T17, T18;
Chris@42 111 T8 = Cr[WS(csr, 8)];
Chris@42 112 T2A = T4 - T5;
Chris@42 113 T6 = T4 + T5;
Chris@42 114 T5n = T13 - T12;
Chris@42 115 T14 = T12 + T13;
Chris@42 116 T9 = Cr[WS(csr, 23)];
Chris@42 117 T17 = Ci[WS(csi, 8)];
Chris@42 118 T18 = Ci[WS(csi, 23)];
Chris@42 119 Tb = Cr[WS(csr, 7)];
Chris@42 120 T16 = T8 - T9;
Chris@42 121 Ta = T8 + T9;
Chris@42 122 T4I = T18 - T17;
Chris@42 123 T19 = T17 + T18;
Chris@42 124 Tc = Cr[WS(csr, 24)];
Chris@42 125 T1c = Ci[WS(csi, 7)];
Chris@42 126 T1d = Ci[WS(csi, 24)];
Chris@42 127 }
Chris@42 128 }
Chris@42 129 {
Chris@42 130 E T1b, T4J, T1e, T4H, T7, Te, Td;
Chris@42 131 T3t = T11 + T14;
Chris@42 132 T15 = T11 - T14;
Chris@42 133 T1b = Tb - Tc;
Chris@42 134 Td = Tb + Tc;
Chris@42 135 T4J = T1c - T1d;
Chris@42 136 T1e = T1c + T1d;
Chris@42 137 T2E = T2A + T2D;
Chris@42 138 T3U = T2A - T2D;
Chris@42 139 T4H = T3 - T6;
Chris@42 140 T7 = T3 + T6;
Chris@42 141 Te = Ta + Td;
Chris@42 142 T5p = Ta - Td;
Chris@42 143 {
Chris@42 144 E T4K, T6s, T6t, T1a, T1f;
Chris@42 145 T5o = T5m - T5n;
Chris@42 146 T6s = T5n + T5m;
Chris@42 147 T6t = T4I + T4J;
Chris@42 148 T4K = T4I - T4J;
Chris@42 149 T6b = T7 - Te;
Chris@42 150 Tf = T7 + Te;
Chris@42 151 T6Q = T6t + T6s;
Chris@42 152 T6u = T6s - T6t;
Chris@42 153 T2G = T16 + T19;
Chris@42 154 T1a = T16 - T19;
Chris@42 155 T1f = T1b - T1e;
Chris@42 156 T2F = T1b + T1e;
Chris@42 157 T5J = T4H - T4K;
Chris@42 158 T4L = T4H + T4K;
Chris@42 159 T3V = T1a - T1f;
Chris@42 160 T1g = T1a + T1f;
Chris@42 161 }
Chris@42 162 }
Chris@42 163 }
Chris@42 164 {
Chris@42 165 E T1i, Ti, T4O, T1q, T1n, Tl, T4N, T1l, Tq, T1t, Tp, T4T, T1A, Tr, T1u;
Chris@42 166 E T1v;
Chris@42 167 {
Chris@42 168 E Tj, Tk, T1j, T1k;
Chris@42 169 {
Chris@42 170 E Tg, Th, T1o, T1p;
Chris@42 171 Tg = Cr[WS(csr, 4)];
Chris@42 172 T5U = T5p + T5o;
Chris@42 173 T5q = T5o - T5p;
Chris@42 174 T3u = T2G + T2F;
Chris@42 175 T2H = T2F - T2G;
Chris@42 176 Th = Cr[WS(csr, 27)];
Chris@42 177 T1o = Ci[WS(csi, 4)];
Chris@42 178 T1p = Ci[WS(csi, 27)];
Chris@42 179 Tj = Cr[WS(csr, 20)];
Chris@42 180 T1i = Tg - Th;
Chris@42 181 Ti = Tg + Th;
Chris@42 182 T4O = T1p - T1o;
Chris@42 183 T1q = T1o + T1p;
Chris@42 184 Tk = Cr[WS(csr, 11)];
Chris@42 185 T1j = Ci[WS(csi, 20)];
Chris@42 186 T1k = Ci[WS(csi, 11)];
Chris@42 187 }
Chris@42 188 {
Chris@42 189 E Tn, To, T1y, T1z;
Chris@42 190 Tn = Cr[WS(csr, 3)];
Chris@42 191 T1n = Tj - Tk;
Chris@42 192 Tl = Tj + Tk;
Chris@42 193 T4N = T1k - T1j;
Chris@42 194 T1l = T1j + T1k;
Chris@42 195 To = Cr[WS(csr, 28)];
Chris@42 196 T1y = Ci[WS(csi, 3)];
Chris@42 197 T1z = Ci[WS(csi, 28)];
Chris@42 198 Tq = Cr[WS(csr, 12)];
Chris@42 199 T1t = Tn - To;
Chris@42 200 Tp = Tn + To;
Chris@42 201 T4T = T1y - T1z;
Chris@42 202 T1A = T1y + T1z;
Chris@42 203 Tr = Cr[WS(csr, 19)];
Chris@42 204 T1u = Ci[WS(csi, 12)];
Chris@42 205 T1v = Ci[WS(csi, 19)];
Chris@42 206 }
Chris@42 207 }
Chris@42 208 {
Chris@42 209 E T4M, T1B, T1w, T4P, T1m, T1r, Tm, Ts, T4S;
Chris@42 210 T4M = Ti - Tl;
Chris@42 211 Tm = Ti + Tl;
Chris@42 212 T1B = Tq - Tr;
Chris@42 213 Ts = Tq + Tr;
Chris@42 214 T4S = T1v - T1u;
Chris@42 215 T1w = T1u + T1v;
Chris@42 216 {
Chris@42 217 E T6c, Tt, T4R, T6d, T4U;
Chris@42 218 T6c = T4N + T4O;
Chris@42 219 T4P = T4N - T4O;
Chris@42 220 Tt = Tp + Ts;
Chris@42 221 T4R = Tp - Ts;
Chris@42 222 T6d = T4S + T4T;
Chris@42 223 T4U = T4S - T4T;
Chris@42 224 T3x = T1i + T1l;
Chris@42 225 T1m = T1i - T1l;
Chris@42 226 T6v = Tm - Tt;
Chris@42 227 Tu = Tm + Tt;
Chris@42 228 T5r = T4R - T4U;
Chris@42 229 T4V = T4R + T4U;
Chris@42 230 T6R = T6c + T6d;
Chris@42 231 T6e = T6c - T6d;
Chris@42 232 T1r = T1n + T1q;
Chris@42 233 T3w = T1n - T1q;
Chris@42 234 }
Chris@42 235 {
Chris@42 236 E T3A, T3z, T1x, T1C;
Chris@42 237 T3A = T1t + T1w;
Chris@42 238 T1x = T1t - T1w;
Chris@42 239 T1C = T1A - T1B;
Chris@42 240 T3z = T1B + T1A;
Chris@42 241 T2K = FMA(KP414213562, T1m, T1r);
Chris@42 242 T1s = FNMS(KP414213562, T1r, T1m);
Chris@42 243 T2J = FMA(KP414213562, T1x, T1C);
Chris@42 244 T1D = FNMS(KP414213562, T1C, T1x);
Chris@42 245 T3X = FMA(KP414213562, T3z, T3A);
Chris@42 246 T3B = FNMS(KP414213562, T3A, T3z);
Chris@42 247 T5s = T4M + T4P;
Chris@42 248 T4Q = T4M - T4P;
Chris@42 249 }
Chris@42 250 }
Chris@42 251 }
Chris@42 252 }
Chris@42 253 {
Chris@42 254 E T1G, Ty, T54, T20, T1X, TB, T53, T1J, TI, T4Z, T1L, TF, T22, T1U, T50;
Chris@42 255 E T1O;
Chris@42 256 {
Chris@42 257 E T1Y, T1Z, Tz, TA, Tw, Tx, T1H, T1I;
Chris@42 258 Tw = Cr[WS(csr, 2)];
Chris@42 259 Tx = Cr[WS(csr, 29)];
Chris@42 260 T1Y = Ci[WS(csi, 2)];
Chris@42 261 T3Y = FNMS(KP414213562, T3w, T3x);
Chris@42 262 T3y = FMA(KP414213562, T3x, T3w);
Chris@42 263 T1G = Tw - Tx;
Chris@42 264 Ty = Tw + Tx;
Chris@42 265 T1Z = Ci[WS(csi, 29)];
Chris@42 266 Tz = Cr[WS(csr, 18)];
Chris@42 267 TA = Cr[WS(csr, 13)];
Chris@42 268 T1H = Ci[WS(csi, 18)];
Chris@42 269 T54 = T1Y - T1Z;
Chris@42 270 T20 = T1Y + T1Z;
Chris@42 271 T1X = Tz - TA;
Chris@42 272 TB = Tz + TA;
Chris@42 273 T1I = Ci[WS(csi, 13)];
Chris@42 274 {
Chris@42 275 E T1R, T1Q, T1S, TG, TH;
Chris@42 276 TG = Cr[WS(csr, 5)];
Chris@42 277 TH = Cr[WS(csr, 26)];
Chris@42 278 T1R = Ci[WS(csi, 5)];
Chris@42 279 T53 = T1H - T1I;
Chris@42 280 T1J = T1H + T1I;
Chris@42 281 T1Q = TG - TH;
Chris@42 282 TI = TG + TH;
Chris@42 283 T1S = Ci[WS(csi, 26)];
Chris@42 284 {
Chris@42 285 E T1M, T1N, TD, TE, T1T;
Chris@42 286 TD = Cr[WS(csr, 10)];
Chris@42 287 TE = Cr[WS(csr, 21)];
Chris@42 288 T1T = T1R + T1S;
Chris@42 289 T4Z = T1S - T1R;
Chris@42 290 T1M = Ci[WS(csi, 10)];
Chris@42 291 T1L = TD - TE;
Chris@42 292 TF = TD + TE;
Chris@42 293 T1N = Ci[WS(csi, 21)];
Chris@42 294 T22 = T1Q + T1T;
Chris@42 295 T1U = T1Q - T1T;
Chris@42 296 T50 = T1M - T1N;
Chris@42 297 T1O = T1M + T1N;
Chris@42 298 }
Chris@42 299 }
Chris@42 300 }
Chris@42 301 {
Chris@42 302 E T4Y, T23, T51, T1K, T1V, T3I, T3H, T21, T24;
Chris@42 303 {
Chris@42 304 E T56, T1P, T6h, T55, TC, TJ, T6i;
Chris@42 305 T4Y = Ty - TB;
Chris@42 306 TC = Ty + TB;
Chris@42 307 TJ = TF + TI;
Chris@42 308 T56 = TF - TI;
Chris@42 309 T1P = T1L - T1O;
Chris@42 310 T23 = T1L + T1O;
Chris@42 311 T6h = T53 + T54;
Chris@42 312 T55 = T53 - T54;
Chris@42 313 T6g = TC - TJ;
Chris@42 314 TK = TC + TJ;
Chris@42 315 T6i = T50 + T4Z;
Chris@42 316 T51 = T4Z - T50;
Chris@42 317 T3E = T1G + T1J;
Chris@42 318 T1K = T1G - T1J;
Chris@42 319 T5M = T56 + T55;
Chris@42 320 T57 = T55 - T56;
Chris@42 321 T6N = T6i + T6h;
Chris@42 322 T6j = T6h - T6i;
Chris@42 323 T1V = T1P + T1U;
Chris@42 324 T3I = T1P - T1U;
Chris@42 325 }
Chris@42 326 T3H = T1X - T20;
Chris@42 327 T21 = T1X + T20;
Chris@42 328 T24 = T22 - T23;
Chris@42 329 T3F = T23 + T22;
Chris@42 330 T35 = FNMS(KP707106781, T1V, T1K);
Chris@42 331 T1W = FMA(KP707106781, T1V, T1K);
Chris@42 332 T34 = FMA(KP707106781, T24, T21);
Chris@42 333 T25 = FNMS(KP707106781, T24, T21);
Chris@42 334 T4i = FMA(KP707106781, T3I, T3H);
Chris@42 335 T3J = FNMS(KP707106781, T3I, T3H);
Chris@42 336 T5N = T4Y - T51;
Chris@42 337 T52 = T4Y + T51;
Chris@42 338 }
Chris@42 339 }
Chris@42 340 {
Chris@42 341 E T27, TN, T5f, T2q, T2r, TQ, T5e, T2a, TX, T5a, T2c, TU, T2t, T2l, T5b;
Chris@42 342 E T2f;
Chris@42 343 {
Chris@42 344 E T2o, T2p, TO, TP, TL, TM, T28, T29;
Chris@42 345 TL = Cr[WS(csr, 1)];
Chris@42 346 TM = Cr[WS(csr, 30)];
Chris@42 347 T2o = Ci[WS(csi, 1)];
Chris@42 348 T4j = FMA(KP707106781, T3F, T3E);
Chris@42 349 T3G = FNMS(KP707106781, T3F, T3E);
Chris@42 350 T27 = TL - TM;
Chris@42 351 TN = TL + TM;
Chris@42 352 T2p = Ci[WS(csi, 30)];
Chris@42 353 TO = Cr[WS(csr, 14)];
Chris@42 354 TP = Cr[WS(csr, 17)];
Chris@42 355 T28 = Ci[WS(csi, 14)];
Chris@42 356 T5f = T2p - T2o;
Chris@42 357 T2q = T2o + T2p;
Chris@42 358 T2r = TO - TP;
Chris@42 359 TQ = TO + TP;
Chris@42 360 T29 = Ci[WS(csi, 17)];
Chris@42 361 {
Chris@42 362 E T2i, T2h, T2j, TV, TW;
Chris@42 363 TV = Cr[WS(csr, 9)];
Chris@42 364 TW = Cr[WS(csr, 22)];
Chris@42 365 T2i = Ci[WS(csi, 9)];
Chris@42 366 T5e = T28 - T29;
Chris@42 367 T2a = T28 + T29;
Chris@42 368 T2h = TV - TW;
Chris@42 369 TX = TV + TW;
Chris@42 370 T2j = Ci[WS(csi, 22)];
Chris@42 371 {
Chris@42 372 E T2d, T2e, TS, TT, T2k;
Chris@42 373 TS = Cr[WS(csr, 6)];
Chris@42 374 TT = Cr[WS(csr, 25)];
Chris@42 375 T2k = T2i + T2j;
Chris@42 376 T5a = T2j - T2i;
Chris@42 377 T2d = Ci[WS(csi, 6)];
Chris@42 378 T2c = TS - TT;
Chris@42 379 TU = TS + TT;
Chris@42 380 T2e = Ci[WS(csi, 25)];
Chris@42 381 T2t = T2h + T2k;
Chris@42 382 T2l = T2h - T2k;
Chris@42 383 T5b = T2d - T2e;
Chris@42 384 T2f = T2d + T2e;
Chris@42 385 }
Chris@42 386 }
Chris@42 387 }
Chris@42 388 {
Chris@42 389 E T59, T2u, T5c, T2b, T2m, T3P, T3O, T2s, T2v;
Chris@42 390 {
Chris@42 391 E T5h, T2g, T6m, T5g, TR, TY, T6n;
Chris@42 392 T59 = TN - TQ;
Chris@42 393 TR = TN + TQ;
Chris@42 394 TY = TU + TX;
Chris@42 395 T5h = TU - TX;
Chris@42 396 T2g = T2c - T2f;
Chris@42 397 T2u = T2c + T2f;
Chris@42 398 T6m = T5e + T5f;
Chris@42 399 T5g = T5e - T5f;
Chris@42 400 T6l = TR - TY;
Chris@42 401 TZ = TR + TY;
Chris@42 402 T6n = T5b + T5a;
Chris@42 403 T5c = T5a - T5b;
Chris@42 404 T3L = T27 + T2a;
Chris@42 405 T2b = T27 - T2a;
Chris@42 406 T5P = T5h + T5g;
Chris@42 407 T5i = T5g - T5h;
Chris@42 408 T6M = T6n + T6m;
Chris@42 409 T6o = T6m - T6n;
Chris@42 410 T2m = T2g + T2l;
Chris@42 411 T3P = T2g - T2l;
Chris@42 412 }
Chris@42 413 T3O = T2r + T2q;
Chris@42 414 T2s = T2q - T2r;
Chris@42 415 T2v = T2t - T2u;
Chris@42 416 T3M = T2u + T2t;
Chris@42 417 T38 = FNMS(KP707106781, T2m, T2b);
Chris@42 418 T2n = FMA(KP707106781, T2m, T2b);
Chris@42 419 T37 = FNMS(KP707106781, T2v, T2s);
Chris@42 420 T2w = FMA(KP707106781, T2v, T2s);
Chris@42 421 T4l = FMA(KP707106781, T3P, T3O);
Chris@42 422 T3Q = FNMS(KP707106781, T3P, T3O);
Chris@42 423 T5Q = T59 - T5c;
Chris@42 424 T5d = T59 + T5c;
Chris@42 425 }
Chris@42 426 }
Chris@42 427 }
Chris@42 428 {
Chris@42 429 E T4m, T3N, T5t, T5L, T63, T4W, T5Y, T5X, T66, T5W, T67, T5S;
Chris@42 430 {
Chris@42 431 E T6T, T6S, T6W, T6P;
Chris@42 432 {
Chris@42 433 E T6L, T6O, T6Y, T6X, T6Z, Tv, T10, T70;
Chris@42 434 T6L = Tf - Tu;
Chris@42 435 Tv = Tf + Tu;
Chris@42 436 T10 = TK + TZ;
Chris@42 437 T6T = TK - TZ;
Chris@42 438 T6O = T6M - T6N;
Chris@42 439 T6Y = T6N + T6M;
Chris@42 440 T4m = FMA(KP707106781, T3M, T3L);
Chris@42 441 T3N = FNMS(KP707106781, T3M, T3L);
Chris@42 442 T6X = Tv - T10;
Chris@42 443 T6S = T6Q - T6R;
Chris@42 444 T6Z = T6R + T6Q;
Chris@42 445 R0[0] = KP2_000000000 * (Tv + T10);
Chris@42 446 R0[WS(rs, 16)] = KP2_000000000 * (T6Z - T6Y);
Chris@42 447 T70 = T6Y + T6Z;
Chris@42 448 T6W = T6L - T6O;
Chris@42 449 T6P = T6L + T6O;
Chris@42 450 R0[WS(rs, 24)] = KP1_414213562 * (T70 - T6X);
Chris@42 451 R0[WS(rs, 8)] = KP1_414213562 * (T6X + T70);
Chris@42 452 }
Chris@42 453 {
Chris@42 454 E T6D, T6f, T6w, T6G, T6p, T6x, T6y, T6k, T6V, T6U;
Chris@42 455 T6D = T6b - T6e;
Chris@42 456 T6f = T6b + T6e;
Chris@42 457 T6w = T6u - T6v;
Chris@42 458 T6G = T6v + T6u;
Chris@42 459 T6V = T6T + T6S;
Chris@42 460 T6U = T6S - T6T;
Chris@42 461 T6p = T6l + T6o;
Chris@42 462 T6x = T6l - T6o;
Chris@42 463 R0[WS(rs, 12)] = KP1_847759065 * (FMA(KP414213562, T6W, T6V));
Chris@42 464 R0[WS(rs, 28)] = -(KP1_847759065 * (FNMS(KP414213562, T6V, T6W)));
Chris@42 465 R0[WS(rs, 20)] = KP1_847759065 * (FNMS(KP414213562, T6P, T6U));
Chris@42 466 R0[WS(rs, 4)] = KP1_847759065 * (FMA(KP414213562, T6U, T6P));
Chris@42 467 T6y = T6g + T6j;
Chris@42 468 T6k = T6g - T6j;
Chris@42 469 {
Chris@42 470 E T5V, T5K, T5O, T5R;
Chris@42 471 T5t = T5r - T5s;
Chris@42 472 T5K = T5s + T5r;
Chris@42 473 {
Chris@42 474 E T6E, T6z, T6H, T6q;
Chris@42 475 T6E = T6y + T6x;
Chris@42 476 T6z = T6x - T6y;
Chris@42 477 T6H = T6k - T6p;
Chris@42 478 T6q = T6k + T6p;
Chris@42 479 {
Chris@42 480 E T6F, T6K, T6B, T6A;
Chris@42 481 T6F = FNMS(KP707106781, T6E, T6D);
Chris@42 482 T6K = FMA(KP707106781, T6E, T6D);
Chris@42 483 T6B = FNMS(KP707106781, T6z, T6w);
Chris@42 484 T6A = FMA(KP707106781, T6z, T6w);
Chris@42 485 {
Chris@42 486 E T6I, T6J, T6C, T6r;
Chris@42 487 T6I = FNMS(KP707106781, T6H, T6G);
Chris@42 488 T6J = FMA(KP707106781, T6H, T6G);
Chris@42 489 T6C = FNMS(KP707106781, T6q, T6f);
Chris@42 490 T6r = FMA(KP707106781, T6q, T6f);
Chris@42 491 R0[WS(rs, 22)] = KP1_662939224 * (FNMS(KP668178637, T6F, T6I));
Chris@42 492 R0[WS(rs, 6)] = KP1_662939224 * (FMA(KP668178637, T6I, T6F));
Chris@42 493 R0[WS(rs, 30)] = -(KP1_961570560 * (FNMS(KP198912367, T6J, T6K)));
Chris@42 494 R0[WS(rs, 14)] = KP1_961570560 * (FMA(KP198912367, T6K, T6J));
Chris@42 495 R0[WS(rs, 26)] = -(KP1_662939224 * (FNMS(KP668178637, T6B, T6C)));
Chris@42 496 R0[WS(rs, 10)] = KP1_662939224 * (FMA(KP668178637, T6C, T6B));
Chris@42 497 R0[WS(rs, 18)] = KP1_961570560 * (FNMS(KP198912367, T6r, T6A));
Chris@42 498 R0[WS(rs, 2)] = KP1_961570560 * (FMA(KP198912367, T6A, T6r));
Chris@42 499 T5L = FNMS(KP707106781, T5K, T5J);
Chris@42 500 T63 = FMA(KP707106781, T5K, T5J);
Chris@42 501 }
Chris@42 502 }
Chris@42 503 }
Chris@42 504 T5V = T4Q - T4V;
Chris@42 505 T4W = T4Q + T4V;
Chris@42 506 T5Y = FNMS(KP414213562, T5M, T5N);
Chris@42 507 T5O = FMA(KP414213562, T5N, T5M);
Chris@42 508 T5R = FNMS(KP414213562, T5Q, T5P);
Chris@42 509 T5X = FMA(KP414213562, T5P, T5Q);
Chris@42 510 T66 = FMA(KP707106781, T5V, T5U);
Chris@42 511 T5W = FNMS(KP707106781, T5V, T5U);
Chris@42 512 T67 = T5O + T5R;
Chris@42 513 T5S = T5O - T5R;
Chris@42 514 }
Chris@42 515 }
Chris@42 516 }
Chris@42 517 {
Chris@42 518 E T1h, T2L, T2I, T3h, T3p, T1E, T3n, T3s, T3b, T3k, T3e, T3o;
Chris@42 519 {
Chris@42 520 E T4X, T5B, T5v, T5w, T5E, T5u, T5F, T5k, T58, T5j;
Chris@42 521 {
Chris@42 522 E T68, T69, T62, T5T, T64, T5Z;
Chris@42 523 T68 = FNMS(KP923879532, T67, T66);
Chris@42 524 T69 = FMA(KP923879532, T67, T66);
Chris@42 525 T62 = FNMS(KP923879532, T5S, T5L);
Chris@42 526 T5T = FMA(KP923879532, T5S, T5L);
Chris@42 527 T64 = T5Y + T5X;
Chris@42 528 T5Z = T5X - T5Y;
Chris@42 529 T4X = FMA(KP707106781, T4W, T4L);
Chris@42 530 T5B = FNMS(KP707106781, T4W, T4L);
Chris@42 531 {
Chris@42 532 E T65, T6a, T61, T60;
Chris@42 533 T65 = FNMS(KP923879532, T64, T63);
Chris@42 534 T6a = FMA(KP923879532, T64, T63);
Chris@42 535 T61 = FNMS(KP923879532, T5Z, T5W);
Chris@42 536 T60 = FMA(KP923879532, T5Z, T5W);
Chris@42 537 R0[WS(rs, 23)] = KP1_546020906 * (FNMS(KP820678790, T65, T68));
Chris@42 538 R0[WS(rs, 7)] = KP1_546020906 * (FMA(KP820678790, T68, T65));
Chris@42 539 R0[WS(rs, 31)] = -(KP1_990369453 * (FNMS(KP098491403, T69, T6a)));
Chris@42 540 R0[WS(rs, 15)] = KP1_990369453 * (FMA(KP098491403, T6a, T69));
Chris@42 541 R0[WS(rs, 27)] = -(KP1_763842528 * (FNMS(KP534511135, T61, T62)));
Chris@42 542 R0[WS(rs, 11)] = KP1_763842528 * (FMA(KP534511135, T62, T61));
Chris@42 543 R0[WS(rs, 19)] = KP1_913880671 * (FNMS(KP303346683, T5T, T60));
Chris@42 544 R0[WS(rs, 3)] = KP1_913880671 * (FMA(KP303346683, T60, T5T));
Chris@42 545 }
Chris@42 546 }
Chris@42 547 T5v = FNMS(KP414213562, T52, T57);
Chris@42 548 T58 = FMA(KP414213562, T57, T52);
Chris@42 549 T5j = FNMS(KP414213562, T5i, T5d);
Chris@42 550 T5w = FMA(KP414213562, T5d, T5i);
Chris@42 551 T5E = FNMS(KP707106781, T5t, T5q);
Chris@42 552 T5u = FMA(KP707106781, T5t, T5q);
Chris@42 553 T5F = T58 - T5j;
Chris@42 554 T5k = T58 + T5j;
Chris@42 555 {
Chris@42 556 E T3l, T33, T3c, T3m, T3a, T3d;
Chris@42 557 {
Chris@42 558 E T39, T3f, T3g, T36;
Chris@42 559 {
Chris@42 560 E T31, T5G, T5H, T5A, T5l, T5C, T5x, T32;
Chris@42 561 T1h = FMA(KP707106781, T1g, T15);
Chris@42 562 T31 = FNMS(KP707106781, T1g, T15);
Chris@42 563 T5G = FNMS(KP923879532, T5F, T5E);
Chris@42 564 T5H = FMA(KP923879532, T5F, T5E);
Chris@42 565 T5A = FNMS(KP923879532, T5k, T4X);
Chris@42 566 T5l = FMA(KP923879532, T5k, T4X);
Chris@42 567 T5C = T5w - T5v;
Chris@42 568 T5x = T5v + T5w;
Chris@42 569 T32 = T2K + T2J;
Chris@42 570 T2L = T2J - T2K;
Chris@42 571 T39 = FNMS(KP668178637, T38, T37);
Chris@42 572 T3f = FMA(KP668178637, T37, T38);
Chris@42 573 {
Chris@42 574 E T5D, T5I, T5z, T5y;
Chris@42 575 T5D = FNMS(KP923879532, T5C, T5B);
Chris@42 576 T5I = FMA(KP923879532, T5C, T5B);
Chris@42 577 T5z = FNMS(KP923879532, T5x, T5u);
Chris@42 578 T5y = FMA(KP923879532, T5x, T5u);
Chris@42 579 T3l = FMA(KP923879532, T32, T31);
Chris@42 580 T33 = FNMS(KP923879532, T32, T31);
Chris@42 581 R0[WS(rs, 21)] = KP1_763842528 * (FNMS(KP534511135, T5D, T5G));
Chris@42 582 R0[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T5G, T5D));
Chris@42 583 R0[WS(rs, 29)] = -(KP1_913880671 * (FNMS(KP303346683, T5H, T5I)));
Chris@42 584 R0[WS(rs, 13)] = KP1_913880671 * (FMA(KP303346683, T5I, T5H));
Chris@42 585 R0[WS(rs, 25)] = -(KP1_546020906 * (FNMS(KP820678790, T5z, T5A)));
Chris@42 586 R0[WS(rs, 9)] = KP1_546020906 * (FMA(KP820678790, T5A, T5z));
Chris@42 587 R0[WS(rs, 17)] = KP1_990369453 * (FNMS(KP098491403, T5l, T5y));
Chris@42 588 R0[WS(rs, 1)] = KP1_990369453 * (FMA(KP098491403, T5y, T5l));
Chris@42 589 T3g = FMA(KP668178637, T34, T35);
Chris@42 590 T36 = FNMS(KP668178637, T35, T34);
Chris@42 591 }
Chris@42 592 }
Chris@42 593 T2I = FNMS(KP707106781, T2H, T2E);
Chris@42 594 T3c = FMA(KP707106781, T2H, T2E);
Chris@42 595 T3m = T3g + T3f;
Chris@42 596 T3h = T3f - T3g;
Chris@42 597 T3p = T39 - T36;
Chris@42 598 T3a = T36 + T39;
Chris@42 599 T3d = T1s - T1D;
Chris@42 600 T1E = T1s + T1D;
Chris@42 601 }
Chris@42 602 T3n = FNMS(KP831469612, T3m, T3l);
Chris@42 603 T3s = FMA(KP831469612, T3m, T3l);
Chris@42 604 T3b = FNMS(KP831469612, T3a, T33);
Chris@42 605 T3k = FMA(KP831469612, T3a, T33);
Chris@42 606 T3e = FMA(KP923879532, T3d, T3c);
Chris@42 607 T3o = FNMS(KP923879532, T3d, T3c);
Chris@42 608 }
Chris@42 609 }
Chris@42 610 {
Chris@42 611 E T3v, T3Z, T3W, T4v, T4D, T3C, T4B, T4G, T4p, T4y, T4s, T4C;
Chris@42 612 {
Chris@42 613 E T4z, T4h, T4q, T4A, T4o, T4r;
Chris@42 614 {
Chris@42 615 E T4n, T4t, T4u, T4k, T4f, T4g;
Chris@42 616 T3v = FNMS(KP707106781, T3u, T3t);
Chris@42 617 T4f = FMA(KP707106781, T3u, T3t);
Chris@42 618 T4g = T3Y + T3X;
Chris@42 619 T3Z = T3X - T3Y;
Chris@42 620 {
Chris@42 621 E T3r, T3q, T3i, T3j;
Chris@42 622 T3r = FNMS(KP831469612, T3p, T3o);
Chris@42 623 T3q = FMA(KP831469612, T3p, T3o);
Chris@42 624 T3i = FNMS(KP831469612, T3h, T3e);
Chris@42 625 T3j = FMA(KP831469612, T3h, T3e);
Chris@42 626 R1[WS(rs, 22)] = -(KP1_606415062 * (FMA(KP741650546, T3n, T3q)));
Chris@42 627 R1[WS(rs, 6)] = KP1_606415062 * (FNMS(KP741650546, T3q, T3n));
Chris@42 628 R1[WS(rs, 30)] = -(KP1_978353019 * (FMA(KP148335987, T3r, T3s)));
Chris@42 629 R1[WS(rs, 14)] = -(KP1_978353019 * (FNMS(KP148335987, T3s, T3r)));
Chris@42 630 R1[WS(rs, 26)] = -(KP1_715457220 * (FMA(KP599376933, T3j, T3k)));
Chris@42 631 R1[WS(rs, 10)] = -(KP1_715457220 * (FNMS(KP599376933, T3k, T3j)));
Chris@42 632 R1[WS(rs, 18)] = -(KP1_940062506 * (FMA(KP250486960, T3b, T3i)));
Chris@42 633 R1[WS(rs, 2)] = KP1_940062506 * (FNMS(KP250486960, T3i, T3b));
Chris@42 634 T4z = FMA(KP923879532, T4g, T4f);
Chris@42 635 T4h = FNMS(KP923879532, T4g, T4f);
Chris@42 636 }
Chris@42 637 T4n = FNMS(KP198912367, T4m, T4l);
Chris@42 638 T4t = FMA(KP198912367, T4l, T4m);
Chris@42 639 T4u = FNMS(KP198912367, T4i, T4j);
Chris@42 640 T4k = FMA(KP198912367, T4j, T4i);
Chris@42 641 T3W = FNMS(KP707106781, T3V, T3U);
Chris@42 642 T4q = FMA(KP707106781, T3V, T3U);
Chris@42 643 T4A = T4u + T4t;
Chris@42 644 T4v = T4t - T4u;
Chris@42 645 T4D = T4k + T4n;
Chris@42 646 T4o = T4k - T4n;
Chris@42 647 T4r = T3y + T3B;
Chris@42 648 T3C = T3y - T3B;
Chris@42 649 }
Chris@42 650 T4B = FNMS(KP980785280, T4A, T4z);
Chris@42 651 T4G = FMA(KP980785280, T4A, T4z);
Chris@42 652 T4p = FMA(KP980785280, T4o, T4h);
Chris@42 653 T4y = FNMS(KP980785280, T4o, T4h);
Chris@42 654 T4s = FNMS(KP923879532, T4r, T4q);
Chris@42 655 T4C = FMA(KP923879532, T4r, T4q);
Chris@42 656 }
Chris@42 657 {
Chris@42 658 E T2P, T2X, T2V, T30, T2z, T2S, T2M, T2W;
Chris@42 659 {
Chris@42 660 E T2T, T1F, T2U, T2y;
Chris@42 661 {
Chris@42 662 E T2x, T2N, T2O, T26;
Chris@42 663 {
Chris@42 664 E T4F, T4E, T4w, T4x;
Chris@42 665 T4F = FMA(KP980785280, T4D, T4C);
Chris@42 666 T4E = FNMS(KP980785280, T4D, T4C);
Chris@42 667 T4w = FMA(KP980785280, T4v, T4s);
Chris@42 668 T4x = FNMS(KP980785280, T4v, T4s);
Chris@42 669 R1[WS(rs, 23)] = KP1_481902250 * (FNMS(KP906347169, T4B, T4E));
Chris@42 670 R1[WS(rs, 7)] = KP1_481902250 * (FMA(KP906347169, T4E, T4B));
Chris@42 671 R1[WS(rs, 31)] = -(KP1_997590912 * (FNMS(KP049126849, T4F, T4G)));
Chris@42 672 R1[WS(rs, 15)] = KP1_997590912 * (FMA(KP049126849, T4G, T4F));
Chris@42 673 R1[WS(rs, 27)] = -(KP1_807978586 * (FNMS(KP472964775, T4x, T4y)));
Chris@42 674 R1[WS(rs, 11)] = KP1_807978586 * (FMA(KP472964775, T4y, T4x));
Chris@42 675 R1[WS(rs, 19)] = KP1_883088130 * (FNMS(KP357805721, T4p, T4w));
Chris@42 676 R1[WS(rs, 3)] = KP1_883088130 * (FMA(KP357805721, T4w, T4p));
Chris@42 677 T2T = FNMS(KP923879532, T1E, T1h);
Chris@42 678 T1F = FMA(KP923879532, T1E, T1h);
Chris@42 679 }
Chris@42 680 T2x = FNMS(KP198912367, T2w, T2n);
Chris@42 681 T2N = FMA(KP198912367, T2n, T2w);
Chris@42 682 T2O = FMA(KP198912367, T1W, T25);
Chris@42 683 T26 = FNMS(KP198912367, T25, T1W);
Chris@42 684 T2U = T2O + T2N;
Chris@42 685 T2P = T2N - T2O;
Chris@42 686 T2X = T26 - T2x;
Chris@42 687 T2y = T26 + T2x;
Chris@42 688 }
Chris@42 689 T2V = FNMS(KP980785280, T2U, T2T);
Chris@42 690 T30 = FMA(KP980785280, T2U, T2T);
Chris@42 691 T2z = FMA(KP980785280, T2y, T1F);
Chris@42 692 T2S = FNMS(KP980785280, T2y, T1F);
Chris@42 693 T2M = FNMS(KP923879532, T2L, T2I);
Chris@42 694 T2W = FMA(KP923879532, T2L, T2I);
Chris@42 695 }
Chris@42 696 {
Chris@42 697 E T47, T3D, T48, T3S;
Chris@42 698 {
Chris@42 699 E T3K, T41, T42, T3R;
Chris@42 700 {
Chris@42 701 E T2Z, T2Y, T2Q, T2R;
Chris@42 702 T2Z = FNMS(KP980785280, T2X, T2W);
Chris@42 703 T2Y = FMA(KP980785280, T2X, T2W);
Chris@42 704 T2Q = FNMS(KP980785280, T2P, T2M);
Chris@42 705 T2R = FMA(KP980785280, T2P, T2M);
Chris@42 706 R1[WS(rs, 20)] = -(KP1_807978586 * (FMA(KP472964775, T2V, T2Y)));
Chris@42 707 R1[WS(rs, 4)] = KP1_807978586 * (FNMS(KP472964775, T2Y, T2V));
Chris@42 708 R1[WS(rs, 28)] = -(KP1_883088130 * (FMA(KP357805721, T2Z, T30)));
Chris@42 709 R1[WS(rs, 12)] = -(KP1_883088130 * (FNMS(KP357805721, T30, T2Z)));
Chris@42 710 R1[WS(rs, 24)] = -(KP1_481902250 * (FMA(KP906347169, T2R, T2S)));
Chris@42 711 R1[WS(rs, 8)] = -(KP1_481902250 * (FNMS(KP906347169, T2S, T2R)));
Chris@42 712 R1[WS(rs, 16)] = -(KP1_997590912 * (FMA(KP049126849, T2z, T2Q)));
Chris@42 713 R1[0] = KP1_997590912 * (FNMS(KP049126849, T2Q, T2z));
Chris@42 714 T47 = FNMS(KP923879532, T3C, T3v);
Chris@42 715 T3D = FMA(KP923879532, T3C, T3v);
Chris@42 716 }
Chris@42 717 T3K = FMA(KP668178637, T3J, T3G);
Chris@42 718 T41 = FNMS(KP668178637, T3G, T3J);
Chris@42 719 T42 = FMA(KP668178637, T3N, T3Q);
Chris@42 720 T3R = FNMS(KP668178637, T3Q, T3N);
Chris@42 721 T48 = T42 - T41;
Chris@42 722 T43 = T41 + T42;
Chris@42 723 T4b = T3K - T3R;
Chris@42 724 T3S = T3K + T3R;
Chris@42 725 }
Chris@42 726 T49 = FNMS(KP831469612, T48, T47);
Chris@42 727 T4e = FMA(KP831469612, T48, T47);
Chris@42 728 T3T = FMA(KP831469612, T3S, T3D);
Chris@42 729 T46 = FNMS(KP831469612, T3S, T3D);
Chris@42 730 T40 = FMA(KP923879532, T3Z, T3W);
Chris@42 731 T4a = FNMS(KP923879532, T3Z, T3W);
Chris@42 732 }
Chris@42 733 }
Chris@42 734 }
Chris@42 735 }
Chris@42 736 }
Chris@42 737 }
Chris@42 738 {
Chris@42 739 E T4d, T4c, T44, T45;
Chris@42 740 T4d = FMA(KP831469612, T4b, T4a);
Chris@42 741 T4c = FNMS(KP831469612, T4b, T4a);
Chris@42 742 T44 = FMA(KP831469612, T43, T40);
Chris@42 743 T45 = FNMS(KP831469612, T43, T40);
Chris@42 744 R1[WS(rs, 21)] = KP1_715457220 * (FNMS(KP599376933, T49, T4c));
Chris@42 745 R1[WS(rs, 5)] = KP1_715457220 * (FMA(KP599376933, T4c, T49));
Chris@42 746 R1[WS(rs, 29)] = -(KP1_940062506 * (FNMS(KP250486960, T4d, T4e)));
Chris@42 747 R1[WS(rs, 13)] = KP1_940062506 * (FMA(KP250486960, T4e, T4d));
Chris@42 748 R1[WS(rs, 25)] = -(KP1_606415062 * (FNMS(KP741650546, T45, T46)));
Chris@42 749 R1[WS(rs, 9)] = KP1_606415062 * (FMA(KP741650546, T46, T45));
Chris@42 750 R1[WS(rs, 17)] = KP1_978353019 * (FNMS(KP148335987, T3T, T44));
Chris@42 751 R1[WS(rs, 1)] = KP1_978353019 * (FMA(KP148335987, T44, T3T));
Chris@42 752 }
Chris@42 753 }
Chris@42 754 }
Chris@42 755 }
Chris@42 756
Chris@42 757 static const kr2c_desc desc = { 64, "r2cbIII_64", {238, 64, 196, 0}, &GENUS };
Chris@42 758
Chris@42 759 void X(codelet_r2cbIII_64) (planner *p) {
Chris@42 760 X(kr2c_register) (p, r2cbIII_64, &desc);
Chris@42 761 }
Chris@42 762
Chris@42 763 #else /* HAVE_FMA */
Chris@42 764
Chris@42 765 /* Generated by: ../../../genfft/gen_r2cb.native -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cbIII_64 -dft-III -include r2cbIII.h */
Chris@42 766
Chris@42 767 /*
Chris@42 768 * This function contains 434 FP additions, 208 FP multiplications,
Chris@42 769 * (or, 342 additions, 116 multiplications, 92 fused multiply/add),
Chris@42 770 * 130 stack variables, 39 constants, and 128 memory accesses
Chris@42 771 */
Chris@42 772 #include "r2cbIII.h"
Chris@42 773
Chris@42 774 static void r2cbIII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
Chris@42 775 {
Chris@42 776 DK(KP1_343117909, +1.343117909694036801250753700854843606457501264);
Chris@42 777 DK(KP1_481902250, +1.481902250709918182351233794990325459457910619);
Chris@42 778 DK(KP1_807978586, +1.807978586246886663172400594461074097420264050);
Chris@42 779 DK(KP855110186, +0.855110186860564188641933713777597068609157259);
Chris@42 780 DK(KP1_997590912, +1.997590912410344785429543209518201388886407229);
Chris@42 781 DK(KP098135348, +0.098135348654836028509909953885365316629490726);
Chris@42 782 DK(KP673779706, +0.673779706784440101378506425238295140955533559);
Chris@42 783 DK(KP1_883088130, +1.883088130366041556825018805199004714371179592);
Chris@42 784 DK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 785 DK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 786 DK(KP1_191398608, +1.191398608984866686934073057659939779023852677);
Chris@42 787 DK(KP1_606415062, +1.606415062961289819613353025926283847759138854);
Chris@42 788 DK(KP1_715457220, +1.715457220000544139804539968569540274084981599);
Chris@42 789 DK(KP1_028205488, +1.028205488386443453187387677937631545216098241);
Chris@42 790 DK(KP1_978353019, +1.978353019929561946903347476032486127967379067);
Chris@42 791 DK(KP293460948, +0.293460948910723503317700259293435639412430633);
Chris@42 792 DK(KP485960359, +0.485960359806527779896548324154942236641981567);
Chris@42 793 DK(KP1_940062506, +1.940062506389087985207968414572200502913731924);
Chris@42 794 DK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 795 DK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 796 DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
Chris@42 797 DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
Chris@42 798 DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
Chris@42 799 DK(KP942793473, +0.942793473651995297112775251810508755314920638);
Chris@42 800 DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
Chris@42 801 DK(KP196034280, +0.196034280659121203988391127777283691722273346);
Chris@42 802 DK(KP580569354, +0.580569354508924735272384751634790549382952557);
Chris@42 803 DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
Chris@42 804 DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
Chris@42 805 DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
Chris@42 806 DK(KP390180644, +0.390180644032256535696569736954044481855383236);
Chris@42 807 DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
Chris@42 808 DK(KP765366864, +0.765366864730179543456919968060797733522689125);
Chris@42 809 DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
Chris@42 810 DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
Chris@42 811 DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
Chris@42 812 DK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 813 DK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 814 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 815 {
Chris@42 816 INT i;
Chris@42 817 for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(256, rs), MAKE_VOLATILE_STRIDE(256, csr), MAKE_VOLATILE_STRIDE(256, csi)) {
Chris@42 818 E T15, T3t, T3U, T2N, Tf, T6b, T6u, T6R, T4L, T5J, T1g, T3V, T5q, T5U, T2I;
Chris@42 819 E T3u, Tu, T6v, T4V, T5s, T6e, T6Q, T1s, T2D, T1D, T2E, T3B, T3Y, T4Q, T5r;
Chris@42 820 E T3y, T3X, TK, T6g, T57, T5N, T6j, T6N, T1W, T34, T25, T35, T3J, T4j, T52;
Chris@42 821 E T5M, T3G, T4i, TZ, T6l, T5i, T5Q, T6o, T6M, T2n, T37, T2w, T38, T3Q, T4m;
Chris@42 822 E T5d, T5P, T3N, T4l;
Chris@42 823 {
Chris@42 824 E T3, T11, T2M, T5n, T6, T2J, T14, T5m, Ta, T16, T19, T4J, Td, T1b, T1e;
Chris@42 825 E T4I;
Chris@42 826 {
Chris@42 827 E T1, T2, T2K, T2L;
Chris@42 828 T1 = Cr[0];
Chris@42 829 T2 = Cr[WS(csr, 31)];
Chris@42 830 T3 = T1 + T2;
Chris@42 831 T11 = T1 - T2;
Chris@42 832 T2K = Ci[0];
Chris@42 833 T2L = Ci[WS(csi, 31)];
Chris@42 834 T2M = T2K + T2L;
Chris@42 835 T5n = T2L - T2K;
Chris@42 836 }
Chris@42 837 {
Chris@42 838 E T4, T5, T12, T13;
Chris@42 839 T4 = Cr[WS(csr, 16)];
Chris@42 840 T5 = Cr[WS(csr, 15)];
Chris@42 841 T6 = T4 + T5;
Chris@42 842 T2J = T4 - T5;
Chris@42 843 T12 = Ci[WS(csi, 16)];
Chris@42 844 T13 = Ci[WS(csi, 15)];
Chris@42 845 T14 = T12 + T13;
Chris@42 846 T5m = T12 - T13;
Chris@42 847 }
Chris@42 848 {
Chris@42 849 E T8, T9, T17, T18;
Chris@42 850 T8 = Cr[WS(csr, 8)];
Chris@42 851 T9 = Cr[WS(csr, 23)];
Chris@42 852 Ta = T8 + T9;
Chris@42 853 T16 = T8 - T9;
Chris@42 854 T17 = Ci[WS(csi, 8)];
Chris@42 855 T18 = Ci[WS(csi, 23)];
Chris@42 856 T19 = T17 + T18;
Chris@42 857 T4J = T17 - T18;
Chris@42 858 }
Chris@42 859 {
Chris@42 860 E Tb, Tc, T1c, T1d;
Chris@42 861 Tb = Cr[WS(csr, 7)];
Chris@42 862 Tc = Cr[WS(csr, 24)];
Chris@42 863 Td = Tb + Tc;
Chris@42 864 T1b = Tb - Tc;
Chris@42 865 T1c = Ci[WS(csi, 7)];
Chris@42 866 T1d = Ci[WS(csi, 24)];
Chris@42 867 T1e = T1c + T1d;
Chris@42 868 T4I = T1d - T1c;
Chris@42 869 }
Chris@42 870 {
Chris@42 871 E T7, Te, T1a, T1f;
Chris@42 872 T15 = T11 - T14;
Chris@42 873 T3t = T11 + T14;
Chris@42 874 T3U = T2J - T2M;
Chris@42 875 T2N = T2J + T2M;
Chris@42 876 T7 = T3 + T6;
Chris@42 877 Te = Ta + Td;
Chris@42 878 Tf = T7 + Te;
Chris@42 879 T6b = T7 - Te;
Chris@42 880 {
Chris@42 881 E T6s, T6t, T4H, T4K;
Chris@42 882 T6s = T4J + T4I;
Chris@42 883 T6t = T5n - T5m;
Chris@42 884 T6u = T6s + T6t;
Chris@42 885 T6R = T6t - T6s;
Chris@42 886 T4H = T3 - T6;
Chris@42 887 T4K = T4I - T4J;
Chris@42 888 T4L = T4H + T4K;
Chris@42 889 T5J = T4H - T4K;
Chris@42 890 }
Chris@42 891 T1a = T16 - T19;
Chris@42 892 T1f = T1b - T1e;
Chris@42 893 T1g = KP707106781 * (T1a + T1f);
Chris@42 894 T3V = KP707106781 * (T1a - T1f);
Chris@42 895 {
Chris@42 896 E T5o, T5p, T2G, T2H;
Chris@42 897 T5o = T5m + T5n;
Chris@42 898 T5p = Ta - Td;
Chris@42 899 T5q = T5o - T5p;
Chris@42 900 T5U = T5p + T5o;
Chris@42 901 T2G = T16 + T19;
Chris@42 902 T2H = T1b + T1e;
Chris@42 903 T2I = KP707106781 * (T2G - T2H);
Chris@42 904 T3u = KP707106781 * (T2G + T2H);
Chris@42 905 }
Chris@42 906 }
Chris@42 907 }
Chris@42 908 {
Chris@42 909 E Ti, T1i, T1q, T4N, Tl, T1n, T1l, T4O, Tp, T1t, T1B, T4S, Ts, T1y, T1w;
Chris@42 910 E T4T;
Chris@42 911 {
Chris@42 912 E Tg, Th, T1o, T1p;
Chris@42 913 Tg = Cr[WS(csr, 4)];
Chris@42 914 Th = Cr[WS(csr, 27)];
Chris@42 915 Ti = Tg + Th;
Chris@42 916 T1i = Tg - Th;
Chris@42 917 T1o = Ci[WS(csi, 4)];
Chris@42 918 T1p = Ci[WS(csi, 27)];
Chris@42 919 T1q = T1o + T1p;
Chris@42 920 T4N = T1o - T1p;
Chris@42 921 }
Chris@42 922 {
Chris@42 923 E Tj, Tk, T1j, T1k;
Chris@42 924 Tj = Cr[WS(csr, 20)];
Chris@42 925 Tk = Cr[WS(csr, 11)];
Chris@42 926 Tl = Tj + Tk;
Chris@42 927 T1n = Tj - Tk;
Chris@42 928 T1j = Ci[WS(csi, 20)];
Chris@42 929 T1k = Ci[WS(csi, 11)];
Chris@42 930 T1l = T1j + T1k;
Chris@42 931 T4O = T1j - T1k;
Chris@42 932 }
Chris@42 933 {
Chris@42 934 E Tn, To, T1z, T1A;
Chris@42 935 Tn = Cr[WS(csr, 3)];
Chris@42 936 To = Cr[WS(csr, 28)];
Chris@42 937 Tp = Tn + To;
Chris@42 938 T1t = Tn - To;
Chris@42 939 T1z = Ci[WS(csi, 3)];
Chris@42 940 T1A = Ci[WS(csi, 28)];
Chris@42 941 T1B = T1z + T1A;
Chris@42 942 T4S = T1A - T1z;
Chris@42 943 }
Chris@42 944 {
Chris@42 945 E Tq, Tr, T1u, T1v;
Chris@42 946 Tq = Cr[WS(csr, 12)];
Chris@42 947 Tr = Cr[WS(csr, 19)];
Chris@42 948 Ts = Tq + Tr;
Chris@42 949 T1y = Tq - Tr;
Chris@42 950 T1u = Ci[WS(csi, 12)];
Chris@42 951 T1v = Ci[WS(csi, 19)];
Chris@42 952 T1w = T1u + T1v;
Chris@42 953 T4T = T1u - T1v;
Chris@42 954 }
Chris@42 955 {
Chris@42 956 E Tm, Tt, T4R, T4U;
Chris@42 957 Tm = Ti + Tl;
Chris@42 958 Tt = Tp + Ts;
Chris@42 959 Tu = Tm + Tt;
Chris@42 960 T6v = Tm - Tt;
Chris@42 961 T4R = Tp - Ts;
Chris@42 962 T4U = T4S - T4T;
Chris@42 963 T4V = T4R + T4U;
Chris@42 964 T5s = T4U - T4R;
Chris@42 965 }
Chris@42 966 {
Chris@42 967 E T6c, T6d, T1m, T1r;
Chris@42 968 T6c = T4T + T4S;
Chris@42 969 T6d = T4O + T4N;
Chris@42 970 T6e = T6c - T6d;
Chris@42 971 T6Q = T6d + T6c;
Chris@42 972 T1m = T1i - T1l;
Chris@42 973 T1r = T1n + T1q;
Chris@42 974 T1s = FNMS(KP382683432, T1r, KP923879532 * T1m);
Chris@42 975 T2D = FMA(KP382683432, T1m, KP923879532 * T1r);
Chris@42 976 }
Chris@42 977 {
Chris@42 978 E T1x, T1C, T3z, T3A;
Chris@42 979 T1x = T1t - T1w;
Chris@42 980 T1C = T1y - T1B;
Chris@42 981 T1D = FMA(KP923879532, T1x, KP382683432 * T1C);
Chris@42 982 T2E = FNMS(KP382683432, T1x, KP923879532 * T1C);
Chris@42 983 T3z = T1t + T1w;
Chris@42 984 T3A = T1y + T1B;
Chris@42 985 T3B = FNMS(KP923879532, T3A, KP382683432 * T3z);
Chris@42 986 T3Y = FMA(KP923879532, T3z, KP382683432 * T3A);
Chris@42 987 }
Chris@42 988 {
Chris@42 989 E T4M, T4P, T3w, T3x;
Chris@42 990 T4M = Ti - Tl;
Chris@42 991 T4P = T4N - T4O;
Chris@42 992 T4Q = T4M - T4P;
Chris@42 993 T5r = T4M + T4P;
Chris@42 994 T3w = T1i + T1l;
Chris@42 995 T3x = T1q - T1n;
Chris@42 996 T3y = FNMS(KP923879532, T3x, KP382683432 * T3w);
Chris@42 997 T3X = FMA(KP923879532, T3w, KP382683432 * T3x);
Chris@42 998 }
Chris@42 999 }
Chris@42 1000 {
Chris@42 1001 E Ty, T1G, T23, T54, TB, T20, T1J, T55, TI, T4Z, T1U, T1Y, TF, T50, T1P;
Chris@42 1002 E T1X;
Chris@42 1003 {
Chris@42 1004 E Tw, Tx, T1H, T1I;
Chris@42 1005 Tw = Cr[WS(csr, 2)];
Chris@42 1006 Tx = Cr[WS(csr, 29)];
Chris@42 1007 Ty = Tw + Tx;
Chris@42 1008 T1G = Tw - Tx;
Chris@42 1009 {
Chris@42 1010 E T21, T22, Tz, TA;
Chris@42 1011 T21 = Ci[WS(csi, 2)];
Chris@42 1012 T22 = Ci[WS(csi, 29)];
Chris@42 1013 T23 = T21 + T22;
Chris@42 1014 T54 = T21 - T22;
Chris@42 1015 Tz = Cr[WS(csr, 18)];
Chris@42 1016 TA = Cr[WS(csr, 13)];
Chris@42 1017 TB = Tz + TA;
Chris@42 1018 T20 = Tz - TA;
Chris@42 1019 }
Chris@42 1020 T1H = Ci[WS(csi, 18)];
Chris@42 1021 T1I = Ci[WS(csi, 13)];
Chris@42 1022 T1J = T1H + T1I;
Chris@42 1023 T55 = T1H - T1I;
Chris@42 1024 {
Chris@42 1025 E TG, TH, T1Q, T1R, T1S, T1T;
Chris@42 1026 TG = Cr[WS(csr, 5)];
Chris@42 1027 TH = Cr[WS(csr, 26)];
Chris@42 1028 T1Q = TG - TH;
Chris@42 1029 T1R = Ci[WS(csi, 5)];
Chris@42 1030 T1S = Ci[WS(csi, 26)];
Chris@42 1031 T1T = T1R + T1S;
Chris@42 1032 TI = TG + TH;
Chris@42 1033 T4Z = T1S - T1R;
Chris@42 1034 T1U = T1Q - T1T;
Chris@42 1035 T1Y = T1Q + T1T;
Chris@42 1036 }
Chris@42 1037 {
Chris@42 1038 E TD, TE, T1L, T1M, T1N, T1O;
Chris@42 1039 TD = Cr[WS(csr, 10)];
Chris@42 1040 TE = Cr[WS(csr, 21)];
Chris@42 1041 T1L = TD - TE;
Chris@42 1042 T1M = Ci[WS(csi, 10)];
Chris@42 1043 T1N = Ci[WS(csi, 21)];
Chris@42 1044 T1O = T1M + T1N;
Chris@42 1045 TF = TD + TE;
Chris@42 1046 T50 = T1M - T1N;
Chris@42 1047 T1P = T1L - T1O;
Chris@42 1048 T1X = T1L + T1O;
Chris@42 1049 }
Chris@42 1050 }
Chris@42 1051 {
Chris@42 1052 E TC, TJ, T53, T56;
Chris@42 1053 TC = Ty + TB;
Chris@42 1054 TJ = TF + TI;
Chris@42 1055 TK = TC + TJ;
Chris@42 1056 T6g = TC - TJ;
Chris@42 1057 T53 = TF - TI;
Chris@42 1058 T56 = T54 - T55;
Chris@42 1059 T57 = T53 + T56;
Chris@42 1060 T5N = T56 - T53;
Chris@42 1061 }
Chris@42 1062 {
Chris@42 1063 E T6h, T6i, T1K, T1V;
Chris@42 1064 T6h = T55 + T54;
Chris@42 1065 T6i = T50 + T4Z;
Chris@42 1066 T6j = T6h - T6i;
Chris@42 1067 T6N = T6i + T6h;
Chris@42 1068 T1K = T1G - T1J;
Chris@42 1069 T1V = KP707106781 * (T1P + T1U);
Chris@42 1070 T1W = T1K + T1V;
Chris@42 1071 T34 = T1K - T1V;
Chris@42 1072 }
Chris@42 1073 {
Chris@42 1074 E T1Z, T24, T3H, T3I;
Chris@42 1075 T1Z = KP707106781 * (T1X - T1Y);
Chris@42 1076 T24 = T20 + T23;
Chris@42 1077 T25 = T1Z + T24;
Chris@42 1078 T35 = T24 - T1Z;
Chris@42 1079 T3H = KP707106781 * (T1P - T1U);
Chris@42 1080 T3I = T23 - T20;
Chris@42 1081 T3J = T3H + T3I;
Chris@42 1082 T4j = T3I - T3H;
Chris@42 1083 }
Chris@42 1084 {
Chris@42 1085 E T4Y, T51, T3E, T3F;
Chris@42 1086 T4Y = Ty - TB;
Chris@42 1087 T51 = T4Z - T50;
Chris@42 1088 T52 = T4Y + T51;
Chris@42 1089 T5M = T4Y - T51;
Chris@42 1090 T3E = T1G + T1J;
Chris@42 1091 T3F = KP707106781 * (T1X + T1Y);
Chris@42 1092 T3G = T3E - T3F;
Chris@42 1093 T4i = T3E + T3F;
Chris@42 1094 }
Chris@42 1095 }
Chris@42 1096 {
Chris@42 1097 E TN, T27, T2u, T5f, TQ, T2r, T2a, T5g, TX, T5a, T2l, T2p, TU, T5b, T2g;
Chris@42 1098 E T2o;
Chris@42 1099 {
Chris@42 1100 E TL, TM, T28, T29;
Chris@42 1101 TL = Cr[WS(csr, 1)];
Chris@42 1102 TM = Cr[WS(csr, 30)];
Chris@42 1103 TN = TL + TM;
Chris@42 1104 T27 = TL - TM;
Chris@42 1105 {
Chris@42 1106 E T2s, T2t, TO, TP;
Chris@42 1107 T2s = Ci[WS(csi, 1)];
Chris@42 1108 T2t = Ci[WS(csi, 30)];
Chris@42 1109 T2u = T2s + T2t;
Chris@42 1110 T5f = T2t - T2s;
Chris@42 1111 TO = Cr[WS(csr, 14)];
Chris@42 1112 TP = Cr[WS(csr, 17)];
Chris@42 1113 TQ = TO + TP;
Chris@42 1114 T2r = TO - TP;
Chris@42 1115 }
Chris@42 1116 T28 = Ci[WS(csi, 14)];
Chris@42 1117 T29 = Ci[WS(csi, 17)];
Chris@42 1118 T2a = T28 + T29;
Chris@42 1119 T5g = T28 - T29;
Chris@42 1120 {
Chris@42 1121 E TV, TW, T2h, T2i, T2j, T2k;
Chris@42 1122 TV = Cr[WS(csr, 9)];
Chris@42 1123 TW = Cr[WS(csr, 22)];
Chris@42 1124 T2h = TV - TW;
Chris@42 1125 T2i = Ci[WS(csi, 9)];
Chris@42 1126 T2j = Ci[WS(csi, 22)];
Chris@42 1127 T2k = T2i + T2j;
Chris@42 1128 TX = TV + TW;
Chris@42 1129 T5a = T2j - T2i;
Chris@42 1130 T2l = T2h - T2k;
Chris@42 1131 T2p = T2h + T2k;
Chris@42 1132 }
Chris@42 1133 {
Chris@42 1134 E TS, TT, T2c, T2d, T2e, T2f;
Chris@42 1135 TS = Cr[WS(csr, 6)];
Chris@42 1136 TT = Cr[WS(csr, 25)];
Chris@42 1137 T2c = TS - TT;
Chris@42 1138 T2d = Ci[WS(csi, 6)];
Chris@42 1139 T2e = Ci[WS(csi, 25)];
Chris@42 1140 T2f = T2d + T2e;
Chris@42 1141 TU = TS + TT;
Chris@42 1142 T5b = T2d - T2e;
Chris@42 1143 T2g = T2c - T2f;
Chris@42 1144 T2o = T2c + T2f;
Chris@42 1145 }
Chris@42 1146 }
Chris@42 1147 {
Chris@42 1148 E TR, TY, T5e, T5h;
Chris@42 1149 TR = TN + TQ;
Chris@42 1150 TY = TU + TX;
Chris@42 1151 TZ = TR + TY;
Chris@42 1152 T6l = TR - TY;
Chris@42 1153 T5e = TU - TX;
Chris@42 1154 T5h = T5f - T5g;
Chris@42 1155 T5i = T5e + T5h;
Chris@42 1156 T5Q = T5h - T5e;
Chris@42 1157 }
Chris@42 1158 {
Chris@42 1159 E T6m, T6n, T2b, T2m;
Chris@42 1160 T6m = T5g + T5f;
Chris@42 1161 T6n = T5b + T5a;
Chris@42 1162 T6o = T6m - T6n;
Chris@42 1163 T6M = T6n + T6m;
Chris@42 1164 T2b = T27 - T2a;
Chris@42 1165 T2m = KP707106781 * (T2g + T2l);
Chris@42 1166 T2n = T2b + T2m;
Chris@42 1167 T37 = T2b - T2m;
Chris@42 1168 }
Chris@42 1169 {
Chris@42 1170 E T2q, T2v, T3O, T3P;
Chris@42 1171 T2q = KP707106781 * (T2o - T2p);
Chris@42 1172 T2v = T2r - T2u;
Chris@42 1173 T2w = T2q + T2v;
Chris@42 1174 T38 = T2v - T2q;
Chris@42 1175 T3O = KP707106781 * (T2g - T2l);
Chris@42 1176 T3P = T2r + T2u;
Chris@42 1177 T3Q = T3O - T3P;
Chris@42 1178 T4m = T3O + T3P;
Chris@42 1179 }
Chris@42 1180 {
Chris@42 1181 E T59, T5c, T3L, T3M;
Chris@42 1182 T59 = TN - TQ;
Chris@42 1183 T5c = T5a - T5b;
Chris@42 1184 T5d = T59 + T5c;
Chris@42 1185 T5P = T59 - T5c;
Chris@42 1186 T3L = T27 + T2a;
Chris@42 1187 T3M = KP707106781 * (T2o + T2p);
Chris@42 1188 T3N = T3L - T3M;
Chris@42 1189 T4l = T3L + T3M;
Chris@42 1190 }
Chris@42 1191 }
Chris@42 1192 {
Chris@42 1193 E Tv, T10, T6X, T6Y, T6Z, T70;
Chris@42 1194 Tv = Tf + Tu;
Chris@42 1195 T10 = TK + TZ;
Chris@42 1196 T6X = Tv - T10;
Chris@42 1197 T6Y = T6N + T6M;
Chris@42 1198 T6Z = T6R - T6Q;
Chris@42 1199 T70 = T6Y + T6Z;
Chris@42 1200 R0[0] = KP2_000000000 * (Tv + T10);
Chris@42 1201 R0[WS(rs, 16)] = KP2_000000000 * (T6Z - T6Y);
Chris@42 1202 R0[WS(rs, 8)] = KP1_414213562 * (T6X + T70);
Chris@42 1203 R0[WS(rs, 24)] = KP1_414213562 * (T70 - T6X);
Chris@42 1204 }
Chris@42 1205 {
Chris@42 1206 E T6P, T6V, T6U, T6W;
Chris@42 1207 {
Chris@42 1208 E T6L, T6O, T6S, T6T;
Chris@42 1209 T6L = Tf - Tu;
Chris@42 1210 T6O = T6M - T6N;
Chris@42 1211 T6P = T6L + T6O;
Chris@42 1212 T6V = T6L - T6O;
Chris@42 1213 T6S = T6Q + T6R;
Chris@42 1214 T6T = TK - TZ;
Chris@42 1215 T6U = T6S - T6T;
Chris@42 1216 T6W = T6T + T6S;
Chris@42 1217 }
Chris@42 1218 R0[WS(rs, 4)] = FMA(KP1_847759065, T6P, KP765366864 * T6U);
Chris@42 1219 R0[WS(rs, 28)] = FNMS(KP1_847759065, T6V, KP765366864 * T6W);
Chris@42 1220 R0[WS(rs, 20)] = FNMS(KP765366864, T6P, KP1_847759065 * T6U);
Chris@42 1221 R0[WS(rs, 12)] = FMA(KP765366864, T6V, KP1_847759065 * T6W);
Chris@42 1222 }
Chris@42 1223 {
Chris@42 1224 E T6f, T6w, T6G, T6D, T6z, T6E, T6q, T6H;
Chris@42 1225 T6f = T6b + T6e;
Chris@42 1226 T6w = T6u - T6v;
Chris@42 1227 T6G = T6v + T6u;
Chris@42 1228 T6D = T6b - T6e;
Chris@42 1229 {
Chris@42 1230 E T6x, T6y, T6k, T6p;
Chris@42 1231 T6x = T6g + T6j;
Chris@42 1232 T6y = T6o - T6l;
Chris@42 1233 T6z = KP707106781 * (T6x + T6y);
Chris@42 1234 T6E = KP707106781 * (T6y - T6x);
Chris@42 1235 T6k = T6g - T6j;
Chris@42 1236 T6p = T6l + T6o;
Chris@42 1237 T6q = KP707106781 * (T6k + T6p);
Chris@42 1238 T6H = KP707106781 * (T6k - T6p);
Chris@42 1239 }
Chris@42 1240 {
Chris@42 1241 E T6r, T6A, T6J, T6K;
Chris@42 1242 T6r = T6f + T6q;
Chris@42 1243 T6A = T6w - T6z;
Chris@42 1244 R0[WS(rs, 2)] = FMA(KP1_961570560, T6r, KP390180644 * T6A);
Chris@42 1245 R0[WS(rs, 18)] = FNMS(KP390180644, T6r, KP1_961570560 * T6A);
Chris@42 1246 T6J = T6D - T6E;
Chris@42 1247 T6K = T6H + T6G;
Chris@42 1248 R0[WS(rs, 14)] = FMA(KP390180644, T6J, KP1_961570560 * T6K);
Chris@42 1249 R0[WS(rs, 30)] = FNMS(KP1_961570560, T6J, KP390180644 * T6K);
Chris@42 1250 }
Chris@42 1251 {
Chris@42 1252 E T6B, T6C, T6F, T6I;
Chris@42 1253 T6B = T6f - T6q;
Chris@42 1254 T6C = T6z + T6w;
Chris@42 1255 R0[WS(rs, 10)] = FMA(KP1_111140466, T6B, KP1_662939224 * T6C);
Chris@42 1256 R0[WS(rs, 26)] = FNMS(KP1_662939224, T6B, KP1_111140466 * T6C);
Chris@42 1257 T6F = T6D + T6E;
Chris@42 1258 T6I = T6G - T6H;
Chris@42 1259 R0[WS(rs, 6)] = FMA(KP1_662939224, T6F, KP1_111140466 * T6I);
Chris@42 1260 R0[WS(rs, 22)] = FNMS(KP1_111140466, T6F, KP1_662939224 * T6I);
Chris@42 1261 }
Chris@42 1262 }
Chris@42 1263 {
Chris@42 1264 E T5L, T63, T5W, T66, T5S, T67, T5Z, T64, T5K, T5V;
Chris@42 1265 T5K = KP707106781 * (T5s - T5r);
Chris@42 1266 T5L = T5J + T5K;
Chris@42 1267 T63 = T5J - T5K;
Chris@42 1268 T5V = KP707106781 * (T4Q - T4V);
Chris@42 1269 T5W = T5U - T5V;
Chris@42 1270 T66 = T5V + T5U;
Chris@42 1271 {
Chris@42 1272 E T5O, T5R, T5X, T5Y;
Chris@42 1273 T5O = FNMS(KP923879532, T5N, KP382683432 * T5M);
Chris@42 1274 T5R = FMA(KP382683432, T5P, KP923879532 * T5Q);
Chris@42 1275 T5S = T5O + T5R;
Chris@42 1276 T67 = T5O - T5R;
Chris@42 1277 T5X = FMA(KP923879532, T5M, KP382683432 * T5N);
Chris@42 1278 T5Y = FNMS(KP923879532, T5P, KP382683432 * T5Q);
Chris@42 1279 T5Z = T5X + T5Y;
Chris@42 1280 T64 = T5Y - T5X;
Chris@42 1281 }
Chris@42 1282 {
Chris@42 1283 E T5T, T60, T69, T6a;
Chris@42 1284 T5T = T5L + T5S;
Chris@42 1285 T60 = T5W - T5Z;
Chris@42 1286 R0[WS(rs, 3)] = FMA(KP1_913880671, T5T, KP580569354 * T60);
Chris@42 1287 R0[WS(rs, 19)] = FNMS(KP580569354, T5T, KP1_913880671 * T60);
Chris@42 1288 T69 = T63 - T64;
Chris@42 1289 T6a = T67 + T66;
Chris@42 1290 R0[WS(rs, 15)] = FMA(KP196034280, T69, KP1_990369453 * T6a);
Chris@42 1291 R0[WS(rs, 31)] = FNMS(KP1_990369453, T69, KP196034280 * T6a);
Chris@42 1292 }
Chris@42 1293 {
Chris@42 1294 E T61, T62, T65, T68;
Chris@42 1295 T61 = T5L - T5S;
Chris@42 1296 T62 = T5Z + T5W;
Chris@42 1297 R0[WS(rs, 11)] = FMA(KP942793473, T61, KP1_763842528 * T62);
Chris@42 1298 R0[WS(rs, 27)] = FNMS(KP1_763842528, T61, KP942793473 * T62);
Chris@42 1299 T65 = T63 + T64;
Chris@42 1300 T68 = T66 - T67;
Chris@42 1301 R0[WS(rs, 7)] = FMA(KP1_546020906, T65, KP1_268786568 * T68);
Chris@42 1302 R0[WS(rs, 23)] = FNMS(KP1_268786568, T65, KP1_546020906 * T68);
Chris@42 1303 }
Chris@42 1304 }
Chris@42 1305 {
Chris@42 1306 E T4X, T5B, T5u, T5E, T5k, T5F, T5x, T5C, T4W, T5t;
Chris@42 1307 T4W = KP707106781 * (T4Q + T4V);
Chris@42 1308 T4X = T4L + T4W;
Chris@42 1309 T5B = T4L - T4W;
Chris@42 1310 T5t = KP707106781 * (T5r + T5s);
Chris@42 1311 T5u = T5q - T5t;
Chris@42 1312 T5E = T5t + T5q;
Chris@42 1313 {
Chris@42 1314 E T58, T5j, T5v, T5w;
Chris@42 1315 T58 = FNMS(KP382683432, T57, KP923879532 * T52);
Chris@42 1316 T5j = FMA(KP923879532, T5d, KP382683432 * T5i);
Chris@42 1317 T5k = T58 + T5j;
Chris@42 1318 T5F = T58 - T5j;
Chris@42 1319 T5v = FMA(KP382683432, T52, KP923879532 * T57);
Chris@42 1320 T5w = FNMS(KP382683432, T5d, KP923879532 * T5i);
Chris@42 1321 T5x = T5v + T5w;
Chris@42 1322 T5C = T5w - T5v;
Chris@42 1323 }
Chris@42 1324 {
Chris@42 1325 E T5l, T5y, T5H, T5I;
Chris@42 1326 T5l = T4X + T5k;
Chris@42 1327 T5y = T5u - T5x;
Chris@42 1328 R0[WS(rs, 1)] = FMA(KP1_990369453, T5l, KP196034280 * T5y);
Chris@42 1329 R0[WS(rs, 17)] = FNMS(KP196034280, T5l, KP1_990369453 * T5y);
Chris@42 1330 T5H = T5B - T5C;
Chris@42 1331 T5I = T5F + T5E;
Chris@42 1332 R0[WS(rs, 13)] = FMA(KP580569354, T5H, KP1_913880671 * T5I);
Chris@42 1333 R0[WS(rs, 29)] = FNMS(KP1_913880671, T5H, KP580569354 * T5I);
Chris@42 1334 }
Chris@42 1335 {
Chris@42 1336 E T5z, T5A, T5D, T5G;
Chris@42 1337 T5z = T4X - T5k;
Chris@42 1338 T5A = T5x + T5u;
Chris@42 1339 R0[WS(rs, 9)] = FMA(KP1_268786568, T5z, KP1_546020906 * T5A);
Chris@42 1340 R0[WS(rs, 25)] = FNMS(KP1_546020906, T5z, KP1_268786568 * T5A);
Chris@42 1341 T5D = T5B + T5C;
Chris@42 1342 T5G = T5E - T5F;
Chris@42 1343 R0[WS(rs, 5)] = FMA(KP1_763842528, T5D, KP942793473 * T5G);
Chris@42 1344 R0[WS(rs, 21)] = FNMS(KP942793473, T5D, KP1_763842528 * T5G);
Chris@42 1345 }
Chris@42 1346 }
Chris@42 1347 {
Chris@42 1348 E T33, T3l, T3h, T3m, T3a, T3p, T3e, T3o;
Chris@42 1349 {
Chris@42 1350 E T31, T32, T3f, T3g;
Chris@42 1351 T31 = T15 - T1g;
Chris@42 1352 T32 = T2E - T2D;
Chris@42 1353 T33 = T31 + T32;
Chris@42 1354 T3l = T31 - T32;
Chris@42 1355 T3f = FMA(KP831469612, T34, KP555570233 * T35);
Chris@42 1356 T3g = FNMS(KP831469612, T37, KP555570233 * T38);
Chris@42 1357 T3h = T3f + T3g;
Chris@42 1358 T3m = T3g - T3f;
Chris@42 1359 }
Chris@42 1360 {
Chris@42 1361 E T36, T39, T3c, T3d;
Chris@42 1362 T36 = FNMS(KP831469612, T35, KP555570233 * T34);
Chris@42 1363 T39 = FMA(KP555570233, T37, KP831469612 * T38);
Chris@42 1364 T3a = T36 + T39;
Chris@42 1365 T3p = T36 - T39;
Chris@42 1366 T3c = T2I - T2N;
Chris@42 1367 T3d = T1s - T1D;
Chris@42 1368 T3e = T3c - T3d;
Chris@42 1369 T3o = T3d + T3c;
Chris@42 1370 }
Chris@42 1371 {
Chris@42 1372 E T3b, T3i, T3r, T3s;
Chris@42 1373 T3b = T33 + T3a;
Chris@42 1374 T3i = T3e - T3h;
Chris@42 1375 R1[WS(rs, 2)] = FMA(KP1_940062506, T3b, KP485960359 * T3i);
Chris@42 1376 R1[WS(rs, 18)] = FNMS(KP485960359, T3b, KP1_940062506 * T3i);
Chris@42 1377 T3r = T3l - T3m;
Chris@42 1378 T3s = T3p + T3o;
Chris@42 1379 R1[WS(rs, 14)] = FMA(KP293460948, T3r, KP1_978353019 * T3s);
Chris@42 1380 R1[WS(rs, 30)] = FNMS(KP1_978353019, T3r, KP293460948 * T3s);
Chris@42 1381 }
Chris@42 1382 {
Chris@42 1383 E T3j, T3k, T3n, T3q;
Chris@42 1384 T3j = T33 - T3a;
Chris@42 1385 T3k = T3h + T3e;
Chris@42 1386 R1[WS(rs, 10)] = FMA(KP1_028205488, T3j, KP1_715457220 * T3k);
Chris@42 1387 R1[WS(rs, 26)] = FNMS(KP1_715457220, T3j, KP1_028205488 * T3k);
Chris@42 1388 T3n = T3l + T3m;
Chris@42 1389 T3q = T3o - T3p;
Chris@42 1390 R1[WS(rs, 6)] = FMA(KP1_606415062, T3n, KP1_191398608 * T3q);
Chris@42 1391 R1[WS(rs, 22)] = FNMS(KP1_191398608, T3n, KP1_606415062 * T3q);
Chris@42 1392 }
Chris@42 1393 }
Chris@42 1394 {
Chris@42 1395 E T4h, T4z, T4v, T4A, T4o, T4D, T4s, T4C;
Chris@42 1396 {
Chris@42 1397 E T4f, T4g, T4t, T4u;
Chris@42 1398 T4f = T3t + T3u;
Chris@42 1399 T4g = T3X + T3Y;
Chris@42 1400 T4h = T4f - T4g;
Chris@42 1401 T4z = T4f + T4g;
Chris@42 1402 T4t = FMA(KP980785280, T4i, KP195090322 * T4j);
Chris@42 1403 T4u = FMA(KP980785280, T4l, KP195090322 * T4m);
Chris@42 1404 T4v = T4t - T4u;
Chris@42 1405 T4A = T4t + T4u;
Chris@42 1406 }
Chris@42 1407 {
Chris@42 1408 E T4k, T4n, T4q, T4r;
Chris@42 1409 T4k = FNMS(KP980785280, T4j, KP195090322 * T4i);
Chris@42 1410 T4n = FNMS(KP980785280, T4m, KP195090322 * T4l);
Chris@42 1411 T4o = T4k + T4n;
Chris@42 1412 T4D = T4k - T4n;
Chris@42 1413 T4q = T3V + T3U;
Chris@42 1414 T4r = T3y - T3B;
Chris@42 1415 T4s = T4q - T4r;
Chris@42 1416 T4C = T4r + T4q;
Chris@42 1417 }
Chris@42 1418 {
Chris@42 1419 E T4p, T4w, T4F, T4G;
Chris@42 1420 T4p = T4h + T4o;
Chris@42 1421 T4w = T4s - T4v;
Chris@42 1422 R1[WS(rs, 3)] = FMA(KP1_883088130, T4p, KP673779706 * T4w);
Chris@42 1423 R1[WS(rs, 19)] = FNMS(KP673779706, T4p, KP1_883088130 * T4w);
Chris@42 1424 T4F = T4z + T4A;
Chris@42 1425 T4G = T4D + T4C;
Chris@42 1426 R1[WS(rs, 15)] = FMA(KP098135348, T4F, KP1_997590912 * T4G);
Chris@42 1427 R1[WS(rs, 31)] = FNMS(KP1_997590912, T4F, KP098135348 * T4G);
Chris@42 1428 }
Chris@42 1429 {
Chris@42 1430 E T4x, T4y, T4B, T4E;
Chris@42 1431 T4x = T4h - T4o;
Chris@42 1432 T4y = T4v + T4s;
Chris@42 1433 R1[WS(rs, 11)] = FMA(KP855110186, T4x, KP1_807978586 * T4y);
Chris@42 1434 R1[WS(rs, 27)] = FNMS(KP1_807978586, T4x, KP855110186 * T4y);
Chris@42 1435 T4B = T4z - T4A;
Chris@42 1436 T4E = T4C - T4D;
Chris@42 1437 R1[WS(rs, 7)] = FMA(KP1_481902250, T4B, KP1_343117909 * T4E);
Chris@42 1438 R1[WS(rs, 23)] = FNMS(KP1_343117909, T4B, KP1_481902250 * T4E);
Chris@42 1439 }
Chris@42 1440 }
Chris@42 1441 {
Chris@42 1442 E T1F, T2T, T2P, T2W, T2y, T2X, T2C, T2U;
Chris@42 1443 {
Chris@42 1444 E T1h, T1E, T2F, T2O;
Chris@42 1445 T1h = T15 + T1g;
Chris@42 1446 T1E = T1s + T1D;
Chris@42 1447 T1F = T1h + T1E;
Chris@42 1448 T2T = T1h - T1E;
Chris@42 1449 T2F = T2D + T2E;
Chris@42 1450 T2O = T2I + T2N;
Chris@42 1451 T2P = T2F + T2O;
Chris@42 1452 T2W = T2F - T2O;
Chris@42 1453 }
Chris@42 1454 {
Chris@42 1455 E T26, T2x, T2A, T2B;
Chris@42 1456 T26 = FNMS(KP195090322, T25, KP980785280 * T1W);
Chris@42 1457 T2x = FMA(KP980785280, T2n, KP195090322 * T2w);
Chris@42 1458 T2y = T26 + T2x;
Chris@42 1459 T2X = T26 - T2x;
Chris@42 1460 T2A = FMA(KP195090322, T1W, KP980785280 * T25);
Chris@42 1461 T2B = FNMS(KP195090322, T2n, KP980785280 * T2w);
Chris@42 1462 T2C = T2A + T2B;
Chris@42 1463 T2U = T2B - T2A;
Chris@42 1464 }
Chris@42 1465 {
Chris@42 1466 E T2z, T2Q, T2Z, T30;
Chris@42 1467 T2z = T1F + T2y;
Chris@42 1468 T2Q = T2C + T2P;
Chris@42 1469 R1[0] = FNMS(KP098135348, T2Q, KP1_997590912 * T2z);
Chris@42 1470 R1[WS(rs, 16)] = -(FMA(KP098135348, T2z, KP1_997590912 * T2Q));
Chris@42 1471 T2Z = T2T - T2U;
Chris@42 1472 T30 = T2X + T2W;
Chris@42 1473 R1[WS(rs, 12)] = FMA(KP673779706, T2Z, KP1_883088130 * T30);
Chris@42 1474 R1[WS(rs, 28)] = FNMS(KP1_883088130, T2Z, KP673779706 * T30);
Chris@42 1475 }
Chris@42 1476 {
Chris@42 1477 E T2R, T2S, T2V, T2Y;
Chris@42 1478 T2R = T1F - T2y;
Chris@42 1479 T2S = T2C - T2P;
Chris@42 1480 R1[WS(rs, 8)] = FMA(KP1_343117909, T2R, KP1_481902250 * T2S);
Chris@42 1481 R1[WS(rs, 24)] = FNMS(KP1_481902250, T2R, KP1_343117909 * T2S);
Chris@42 1482 T2V = T2T + T2U;
Chris@42 1483 T2Y = T2W - T2X;
Chris@42 1484 R1[WS(rs, 4)] = FMA(KP1_807978586, T2V, KP855110186 * T2Y);
Chris@42 1485 R1[WS(rs, 20)] = FNMS(KP855110186, T2V, KP1_807978586 * T2Y);
Chris@42 1486 }
Chris@42 1487 }
Chris@42 1488 {
Chris@42 1489 E T3D, T47, T43, T48, T3S, T4b, T40, T4a;
Chris@42 1490 {
Chris@42 1491 E T3v, T3C, T41, T42;
Chris@42 1492 T3v = T3t - T3u;
Chris@42 1493 T3C = T3y + T3B;
Chris@42 1494 T3D = T3v + T3C;
Chris@42 1495 T47 = T3v - T3C;
Chris@42 1496 T41 = FMA(KP555570233, T3G, KP831469612 * T3J);
Chris@42 1497 T42 = FNMS(KP555570233, T3N, KP831469612 * T3Q);
Chris@42 1498 T43 = T41 + T42;
Chris@42 1499 T48 = T42 - T41;
Chris@42 1500 }
Chris@42 1501 {
Chris@42 1502 E T3K, T3R, T3W, T3Z;
Chris@42 1503 T3K = FNMS(KP555570233, T3J, KP831469612 * T3G);
Chris@42 1504 T3R = FMA(KP831469612, T3N, KP555570233 * T3Q);
Chris@42 1505 T3S = T3K + T3R;
Chris@42 1506 T4b = T3K - T3R;
Chris@42 1507 T3W = T3U - T3V;
Chris@42 1508 T3Z = T3X - T3Y;
Chris@42 1509 T40 = T3W - T3Z;
Chris@42 1510 T4a = T3Z + T3W;
Chris@42 1511 }
Chris@42 1512 {
Chris@42 1513 E T3T, T44, T4d, T4e;
Chris@42 1514 T3T = T3D + T3S;
Chris@42 1515 T44 = T40 - T43;
Chris@42 1516 R1[WS(rs, 1)] = FMA(KP1_978353019, T3T, KP293460948 * T44);
Chris@42 1517 R1[WS(rs, 17)] = FNMS(KP293460948, T3T, KP1_978353019 * T44);
Chris@42 1518 T4d = T47 - T48;
Chris@42 1519 T4e = T4b + T4a;
Chris@42 1520 R1[WS(rs, 13)] = FMA(KP485960359, T4d, KP1_940062506 * T4e);
Chris@42 1521 R1[WS(rs, 29)] = FNMS(KP1_940062506, T4d, KP485960359 * T4e);
Chris@42 1522 }
Chris@42 1523 {
Chris@42 1524 E T45, T46, T49, T4c;
Chris@42 1525 T45 = T3D - T3S;
Chris@42 1526 T46 = T43 + T40;
Chris@42 1527 R1[WS(rs, 9)] = FMA(KP1_191398608, T45, KP1_606415062 * T46);
Chris@42 1528 R1[WS(rs, 25)] = FNMS(KP1_606415062, T45, KP1_191398608 * T46);
Chris@42 1529 T49 = T47 + T48;
Chris@42 1530 T4c = T4a - T4b;
Chris@42 1531 R1[WS(rs, 5)] = FMA(KP1_715457220, T49, KP1_028205488 * T4c);
Chris@42 1532 R1[WS(rs, 21)] = FNMS(KP1_028205488, T49, KP1_715457220 * T4c);
Chris@42 1533 }
Chris@42 1534 }
Chris@42 1535 }
Chris@42 1536 }
Chris@42 1537 }
Chris@42 1538
Chris@42 1539 static const kr2c_desc desc = { 64, "r2cbIII_64", {342, 116, 92, 0}, &GENUS };
Chris@42 1540
Chris@42 1541 void X(codelet_r2cbIII_64) (planner *p) {
Chris@42 1542 X(kr2c_register) (p, r2cbIII_64, &desc);
Chris@42 1543 }
Chris@42 1544
Chris@42 1545 #endif /* HAVE_FMA */