annotate src/fftw-3.3.5/dft/scalar/codelets/q1_8.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:37:39 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_twidsq.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 8 -name q1_8 -include q.h */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 528 FP additions, 288 FP multiplications,
Chris@42 32 * (or, 352 additions, 112 multiplications, 176 fused multiply/add),
Chris@42 33 * 190 stack variables, 1 constants, and 256 memory accesses
Chris@42 34 */
Chris@42 35 #include "q.h"
Chris@42 36
Chris@42 37 static void q1_8(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
Chris@42 38 {
Chris@42 39 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 40 {
Chris@42 41 INT m;
Chris@42 42 for (m = mb, W = W + (mb * 14); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
Chris@42 43 E T9C, T9N, T9l, T9E, T9D, T9O;
Chris@42 44 {
Chris@42 45 E TV, Tk, T1d, T7, T18, T1t, TQ, TD, T5t, T4S, T5L, T4F, T5G, T61, T5o;
Chris@42 46 E T5b, T6Z, T6o, T7h, T6b, T7c, T7x, T6U, T6H, Tbx, TaW, TbP, TaJ, TbK, Tc5;
Chris@42 47 E Tbs, Tbf, T2r, T1Q, T2J, T1D, T2E, T2Z, T2m, T29, T3X, T3m, T4f, T39, T4a;
Chris@42 48 E T4v, T3S, T3F, T8v, T7U, T8N, T7H, T8I, T93, T8q, T8d, Ta1, T9q, Taj, T9d;
Chris@42 49 E Tae, Taz, T9W, T9J, Te, T19, T1u, T1g, Tv, TR, TG, TW, T5H, T4M, T5O;
Chris@42 50 E T62, T5p, T53, T5u, T5e, T6i, T7d, T7y, T7k, T6z, T6V, T6K, T70, TbL, TaQ;
Chris@42 51 E TbS, Tc6, Tbt, Tb7, Tby, Tbi, T1K, T2F, T30, T2M, T21, T2n, T2c, T2s, T4b;
Chris@42 52 E T3g, T4i, T4w, T3T, T3x, T3Y, T3I, T7O, T8J, T94, T8Q, T85, T8r, T8g, T8w;
Chris@42 53 E Tak, T9r, T9K, T9A, Taf, T9k, Tal, T9u;
Chris@42 54 {
Chris@42 55 E T9a, T9F, T99, Tac, T9p, T9b, T9G, T9H;
Chris@42 56 {
Chris@42 57 E TaG, Tbb, TaF, TbI, TaV, TaH, Tbc, Tbd;
Chris@42 58 {
Chris@42 59 E T4C, T57, T4B, T5E, T4R, T4D, T58, T59;
Chris@42 60 {
Chris@42 61 E T4, Tz, T3, T16, Tj, T5, TA, TB;
Chris@42 62 {
Chris@42 63 E T1, T2, Th, Ti;
Chris@42 64 T1 = rio[0];
Chris@42 65 T2 = rio[WS(rs, 4)];
Chris@42 66 Th = iio[0];
Chris@42 67 Ti = iio[WS(rs, 4)];
Chris@42 68 T4 = rio[WS(rs, 2)];
Chris@42 69 Tz = T1 - T2;
Chris@42 70 T3 = T1 + T2;
Chris@42 71 T16 = Th + Ti;
Chris@42 72 Tj = Th - Ti;
Chris@42 73 T5 = rio[WS(rs, 6)];
Chris@42 74 TA = iio[WS(rs, 2)];
Chris@42 75 TB = iio[WS(rs, 6)];
Chris@42 76 }
Chris@42 77 {
Chris@42 78 E T4z, T4A, T4P, T4Q;
Chris@42 79 T4z = rio[WS(vs, 3)];
Chris@42 80 {
Chris@42 81 E Tg, T6, T17, TC;
Chris@42 82 Tg = T4 - T5;
Chris@42 83 T6 = T4 + T5;
Chris@42 84 T17 = TA + TB;
Chris@42 85 TC = TA - TB;
Chris@42 86 TV = Tj - Tg;
Chris@42 87 Tk = Tg + Tj;
Chris@42 88 T1d = T3 - T6;
Chris@42 89 T7 = T3 + T6;
Chris@42 90 T18 = T16 - T17;
Chris@42 91 T1t = T16 + T17;
Chris@42 92 TQ = Tz + TC;
Chris@42 93 TD = Tz - TC;
Chris@42 94 T4A = rio[WS(vs, 3) + WS(rs, 4)];
Chris@42 95 }
Chris@42 96 T4P = iio[WS(vs, 3)];
Chris@42 97 T4Q = iio[WS(vs, 3) + WS(rs, 4)];
Chris@42 98 T4C = rio[WS(vs, 3) + WS(rs, 2)];
Chris@42 99 T57 = T4z - T4A;
Chris@42 100 T4B = T4z + T4A;
Chris@42 101 T5E = T4P + T4Q;
Chris@42 102 T4R = T4P - T4Q;
Chris@42 103 T4D = rio[WS(vs, 3) + WS(rs, 6)];
Chris@42 104 T58 = iio[WS(vs, 3) + WS(rs, 2)];
Chris@42 105 T59 = iio[WS(vs, 3) + WS(rs, 6)];
Chris@42 106 }
Chris@42 107 }
Chris@42 108 {
Chris@42 109 E T68, T6D, T67, T7a, T6n, T69, T6E, T6F;
Chris@42 110 {
Chris@42 111 E T65, T66, T6l, T6m;
Chris@42 112 T65 = rio[WS(vs, 4)];
Chris@42 113 {
Chris@42 114 E T4O, T4E, T5F, T5a;
Chris@42 115 T4O = T4C - T4D;
Chris@42 116 T4E = T4C + T4D;
Chris@42 117 T5F = T58 + T59;
Chris@42 118 T5a = T58 - T59;
Chris@42 119 T5t = T4R - T4O;
Chris@42 120 T4S = T4O + T4R;
Chris@42 121 T5L = T4B - T4E;
Chris@42 122 T4F = T4B + T4E;
Chris@42 123 T5G = T5E - T5F;
Chris@42 124 T61 = T5E + T5F;
Chris@42 125 T5o = T57 + T5a;
Chris@42 126 T5b = T57 - T5a;
Chris@42 127 T66 = rio[WS(vs, 4) + WS(rs, 4)];
Chris@42 128 }
Chris@42 129 T6l = iio[WS(vs, 4)];
Chris@42 130 T6m = iio[WS(vs, 4) + WS(rs, 4)];
Chris@42 131 T68 = rio[WS(vs, 4) + WS(rs, 2)];
Chris@42 132 T6D = T65 - T66;
Chris@42 133 T67 = T65 + T66;
Chris@42 134 T7a = T6l + T6m;
Chris@42 135 T6n = T6l - T6m;
Chris@42 136 T69 = rio[WS(vs, 4) + WS(rs, 6)];
Chris@42 137 T6E = iio[WS(vs, 4) + WS(rs, 2)];
Chris@42 138 T6F = iio[WS(vs, 4) + WS(rs, 6)];
Chris@42 139 }
Chris@42 140 {
Chris@42 141 E TaD, TaE, TaT, TaU;
Chris@42 142 TaD = rio[WS(vs, 7)];
Chris@42 143 {
Chris@42 144 E T6k, T6a, T7b, T6G;
Chris@42 145 T6k = T68 - T69;
Chris@42 146 T6a = T68 + T69;
Chris@42 147 T7b = T6E + T6F;
Chris@42 148 T6G = T6E - T6F;
Chris@42 149 T6Z = T6n - T6k;
Chris@42 150 T6o = T6k + T6n;
Chris@42 151 T7h = T67 - T6a;
Chris@42 152 T6b = T67 + T6a;
Chris@42 153 T7c = T7a - T7b;
Chris@42 154 T7x = T7a + T7b;
Chris@42 155 T6U = T6D + T6G;
Chris@42 156 T6H = T6D - T6G;
Chris@42 157 TaE = rio[WS(vs, 7) + WS(rs, 4)];
Chris@42 158 }
Chris@42 159 TaT = iio[WS(vs, 7)];
Chris@42 160 TaU = iio[WS(vs, 7) + WS(rs, 4)];
Chris@42 161 TaG = rio[WS(vs, 7) + WS(rs, 2)];
Chris@42 162 Tbb = TaD - TaE;
Chris@42 163 TaF = TaD + TaE;
Chris@42 164 TbI = TaT + TaU;
Chris@42 165 TaV = TaT - TaU;
Chris@42 166 TaH = rio[WS(vs, 7) + WS(rs, 6)];
Chris@42 167 Tbc = iio[WS(vs, 7) + WS(rs, 2)];
Chris@42 168 Tbd = iio[WS(vs, 7) + WS(rs, 6)];
Chris@42 169 }
Chris@42 170 }
Chris@42 171 }
Chris@42 172 {
Chris@42 173 E T36, T3B, T35, T48, T3l, T37, T3C, T3D;
Chris@42 174 {
Chris@42 175 E T1A, T25, T1z, T2C, T1P, T1B, T26, T27;
Chris@42 176 {
Chris@42 177 E T1x, T1y, T1N, T1O;
Chris@42 178 T1x = rio[WS(vs, 1)];
Chris@42 179 {
Chris@42 180 E TaS, TaI, TbJ, Tbe;
Chris@42 181 TaS = TaG - TaH;
Chris@42 182 TaI = TaG + TaH;
Chris@42 183 TbJ = Tbc + Tbd;
Chris@42 184 Tbe = Tbc - Tbd;
Chris@42 185 Tbx = TaV - TaS;
Chris@42 186 TaW = TaS + TaV;
Chris@42 187 TbP = TaF - TaI;
Chris@42 188 TaJ = TaF + TaI;
Chris@42 189 TbK = TbI - TbJ;
Chris@42 190 Tc5 = TbI + TbJ;
Chris@42 191 Tbs = Tbb + Tbe;
Chris@42 192 Tbf = Tbb - Tbe;
Chris@42 193 T1y = rio[WS(vs, 1) + WS(rs, 4)];
Chris@42 194 }
Chris@42 195 T1N = iio[WS(vs, 1)];
Chris@42 196 T1O = iio[WS(vs, 1) + WS(rs, 4)];
Chris@42 197 T1A = rio[WS(vs, 1) + WS(rs, 2)];
Chris@42 198 T25 = T1x - T1y;
Chris@42 199 T1z = T1x + T1y;
Chris@42 200 T2C = T1N + T1O;
Chris@42 201 T1P = T1N - T1O;
Chris@42 202 T1B = rio[WS(vs, 1) + WS(rs, 6)];
Chris@42 203 T26 = iio[WS(vs, 1) + WS(rs, 2)];
Chris@42 204 T27 = iio[WS(vs, 1) + WS(rs, 6)];
Chris@42 205 }
Chris@42 206 {
Chris@42 207 E T33, T34, T3j, T3k;
Chris@42 208 T33 = rio[WS(vs, 2)];
Chris@42 209 {
Chris@42 210 E T1M, T1C, T2D, T28;
Chris@42 211 T1M = T1A - T1B;
Chris@42 212 T1C = T1A + T1B;
Chris@42 213 T2D = T26 + T27;
Chris@42 214 T28 = T26 - T27;
Chris@42 215 T2r = T1P - T1M;
Chris@42 216 T1Q = T1M + T1P;
Chris@42 217 T2J = T1z - T1C;
Chris@42 218 T1D = T1z + T1C;
Chris@42 219 T2E = T2C - T2D;
Chris@42 220 T2Z = T2C + T2D;
Chris@42 221 T2m = T25 + T28;
Chris@42 222 T29 = T25 - T28;
Chris@42 223 T34 = rio[WS(vs, 2) + WS(rs, 4)];
Chris@42 224 }
Chris@42 225 T3j = iio[WS(vs, 2)];
Chris@42 226 T3k = iio[WS(vs, 2) + WS(rs, 4)];
Chris@42 227 T36 = rio[WS(vs, 2) + WS(rs, 2)];
Chris@42 228 T3B = T33 - T34;
Chris@42 229 T35 = T33 + T34;
Chris@42 230 T48 = T3j + T3k;
Chris@42 231 T3l = T3j - T3k;
Chris@42 232 T37 = rio[WS(vs, 2) + WS(rs, 6)];
Chris@42 233 T3C = iio[WS(vs, 2) + WS(rs, 2)];
Chris@42 234 T3D = iio[WS(vs, 2) + WS(rs, 6)];
Chris@42 235 }
Chris@42 236 }
Chris@42 237 {
Chris@42 238 E T7E, T89, T7D, T8G, T7T, T7F, T8a, T8b;
Chris@42 239 {
Chris@42 240 E T7B, T7C, T7R, T7S;
Chris@42 241 T7B = rio[WS(vs, 5)];
Chris@42 242 {
Chris@42 243 E T3i, T38, T49, T3E;
Chris@42 244 T3i = T36 - T37;
Chris@42 245 T38 = T36 + T37;
Chris@42 246 T49 = T3C + T3D;
Chris@42 247 T3E = T3C - T3D;
Chris@42 248 T3X = T3l - T3i;
Chris@42 249 T3m = T3i + T3l;
Chris@42 250 T4f = T35 - T38;
Chris@42 251 T39 = T35 + T38;
Chris@42 252 T4a = T48 - T49;
Chris@42 253 T4v = T48 + T49;
Chris@42 254 T3S = T3B + T3E;
Chris@42 255 T3F = T3B - T3E;
Chris@42 256 T7C = rio[WS(vs, 5) + WS(rs, 4)];
Chris@42 257 }
Chris@42 258 T7R = iio[WS(vs, 5)];
Chris@42 259 T7S = iio[WS(vs, 5) + WS(rs, 4)];
Chris@42 260 T7E = rio[WS(vs, 5) + WS(rs, 2)];
Chris@42 261 T89 = T7B - T7C;
Chris@42 262 T7D = T7B + T7C;
Chris@42 263 T8G = T7R + T7S;
Chris@42 264 T7T = T7R - T7S;
Chris@42 265 T7F = rio[WS(vs, 5) + WS(rs, 6)];
Chris@42 266 T8a = iio[WS(vs, 5) + WS(rs, 2)];
Chris@42 267 T8b = iio[WS(vs, 5) + WS(rs, 6)];
Chris@42 268 }
Chris@42 269 {
Chris@42 270 E T97, T98, T9n, T9o;
Chris@42 271 T97 = rio[WS(vs, 6)];
Chris@42 272 {
Chris@42 273 E T7Q, T7G, T8H, T8c;
Chris@42 274 T7Q = T7E - T7F;
Chris@42 275 T7G = T7E + T7F;
Chris@42 276 T8H = T8a + T8b;
Chris@42 277 T8c = T8a - T8b;
Chris@42 278 T8v = T7T - T7Q;
Chris@42 279 T7U = T7Q + T7T;
Chris@42 280 T8N = T7D - T7G;
Chris@42 281 T7H = T7D + T7G;
Chris@42 282 T8I = T8G - T8H;
Chris@42 283 T93 = T8G + T8H;
Chris@42 284 T8q = T89 + T8c;
Chris@42 285 T8d = T89 - T8c;
Chris@42 286 T98 = rio[WS(vs, 6) + WS(rs, 4)];
Chris@42 287 }
Chris@42 288 T9n = iio[WS(vs, 6)];
Chris@42 289 T9o = iio[WS(vs, 6) + WS(rs, 4)];
Chris@42 290 T9a = rio[WS(vs, 6) + WS(rs, 2)];
Chris@42 291 T9F = T97 - T98;
Chris@42 292 T99 = T97 + T98;
Chris@42 293 Tac = T9n + T9o;
Chris@42 294 T9p = T9n - T9o;
Chris@42 295 T9b = rio[WS(vs, 6) + WS(rs, 6)];
Chris@42 296 T9G = iio[WS(vs, 6) + WS(rs, 2)];
Chris@42 297 T9H = iio[WS(vs, 6) + WS(rs, 6)];
Chris@42 298 }
Chris@42 299 }
Chris@42 300 }
Chris@42 301 }
Chris@42 302 {
Chris@42 303 E TbQ, TaX, Tbg, Tb6, TbR, Tb0;
Chris@42 304 {
Chris@42 305 E T5M, T4T, T5c, T52, T5N, T4W;
Chris@42 306 {
Chris@42 307 E Tu, TE, TF, Tp;
Chris@42 308 {
Chris@42 309 E Tb, Tq, Ta, T1e, Tt, Tc, Tm, Tn;
Chris@42 310 {
Chris@42 311 E T8, T9, Tr, Ts;
Chris@42 312 T8 = rio[WS(rs, 1)];
Chris@42 313 {
Chris@42 314 E T9m, T9c, Tad, T9I;
Chris@42 315 T9m = T9a - T9b;
Chris@42 316 T9c = T9a + T9b;
Chris@42 317 Tad = T9G + T9H;
Chris@42 318 T9I = T9G - T9H;
Chris@42 319 Ta1 = T9p - T9m;
Chris@42 320 T9q = T9m + T9p;
Chris@42 321 Taj = T99 - T9c;
Chris@42 322 T9d = T99 + T9c;
Chris@42 323 Tae = Tac - Tad;
Chris@42 324 Taz = Tac + Tad;
Chris@42 325 T9W = T9F + T9I;
Chris@42 326 T9J = T9F - T9I;
Chris@42 327 T9 = rio[WS(rs, 5)];
Chris@42 328 }
Chris@42 329 Tr = iio[WS(rs, 1)];
Chris@42 330 Ts = iio[WS(rs, 5)];
Chris@42 331 Tb = rio[WS(rs, 7)];
Chris@42 332 Tq = T8 - T9;
Chris@42 333 Ta = T8 + T9;
Chris@42 334 T1e = Tr + Ts;
Chris@42 335 Tt = Tr - Ts;
Chris@42 336 Tc = rio[WS(rs, 3)];
Chris@42 337 Tm = iio[WS(rs, 7)];
Chris@42 338 Tn = iio[WS(rs, 3)];
Chris@42 339 }
Chris@42 340 {
Chris@42 341 E Tl, Td, T1f, To;
Chris@42 342 Tu = Tq + Tt;
Chris@42 343 TE = Tt - Tq;
Chris@42 344 Tl = Tb - Tc;
Chris@42 345 Td = Tb + Tc;
Chris@42 346 T1f = Tm + Tn;
Chris@42 347 To = Tm - Tn;
Chris@42 348 Te = Ta + Td;
Chris@42 349 T19 = Td - Ta;
Chris@42 350 T1u = T1e + T1f;
Chris@42 351 T1g = T1e - T1f;
Chris@42 352 TF = Tl + To;
Chris@42 353 Tp = Tl - To;
Chris@42 354 }
Chris@42 355 }
Chris@42 356 {
Chris@42 357 E T4I, T4Y, T4U, T51, T4L, T4V;
Chris@42 358 {
Chris@42 359 E T4Z, T50, T4G, T4H, T4J, T4K;
Chris@42 360 T4G = rio[WS(vs, 3) + WS(rs, 1)];
Chris@42 361 T4H = rio[WS(vs, 3) + WS(rs, 5)];
Chris@42 362 Tv = Tp - Tu;
Chris@42 363 TR = Tu + Tp;
Chris@42 364 TG = TE - TF;
Chris@42 365 TW = TE + TF;
Chris@42 366 T4I = T4G + T4H;
Chris@42 367 T4Y = T4G - T4H;
Chris@42 368 T4Z = iio[WS(vs, 3) + WS(rs, 1)];
Chris@42 369 T50 = iio[WS(vs, 3) + WS(rs, 5)];
Chris@42 370 T4J = rio[WS(vs, 3) + WS(rs, 7)];
Chris@42 371 T4K = rio[WS(vs, 3) + WS(rs, 3)];
Chris@42 372 T4U = iio[WS(vs, 3) + WS(rs, 7)];
Chris@42 373 T51 = T4Z - T50;
Chris@42 374 T5M = T4Z + T50;
Chris@42 375 T4L = T4J + T4K;
Chris@42 376 T4T = T4J - T4K;
Chris@42 377 T4V = iio[WS(vs, 3) + WS(rs, 3)];
Chris@42 378 }
Chris@42 379 T5c = T51 - T4Y;
Chris@42 380 T52 = T4Y + T51;
Chris@42 381 T5H = T4L - T4I;
Chris@42 382 T4M = T4I + T4L;
Chris@42 383 T5N = T4U + T4V;
Chris@42 384 T4W = T4U - T4V;
Chris@42 385 }
Chris@42 386 }
Chris@42 387 {
Chris@42 388 E T7i, T6p, T6y, T6I, T6s, T7j;
Chris@42 389 {
Chris@42 390 E T6e, T6u, T6q, T6x, T6h, T6r;
Chris@42 391 {
Chris@42 392 E T6v, T6w, T6f, T6g;
Chris@42 393 {
Chris@42 394 E T4X, T5d, T6c, T6d;
Chris@42 395 T6c = rio[WS(vs, 4) + WS(rs, 1)];
Chris@42 396 T6d = rio[WS(vs, 4) + WS(rs, 5)];
Chris@42 397 T5O = T5M - T5N;
Chris@42 398 T62 = T5M + T5N;
Chris@42 399 T4X = T4T - T4W;
Chris@42 400 T5d = T4T + T4W;
Chris@42 401 T6e = T6c + T6d;
Chris@42 402 T6u = T6c - T6d;
Chris@42 403 T5p = T52 + T4X;
Chris@42 404 T53 = T4X - T52;
Chris@42 405 T5u = T5c + T5d;
Chris@42 406 T5e = T5c - T5d;
Chris@42 407 T6v = iio[WS(vs, 4) + WS(rs, 1)];
Chris@42 408 T6w = iio[WS(vs, 4) + WS(rs, 5)];
Chris@42 409 }
Chris@42 410 T6f = rio[WS(vs, 4) + WS(rs, 7)];
Chris@42 411 T6g = rio[WS(vs, 4) + WS(rs, 3)];
Chris@42 412 T6q = iio[WS(vs, 4) + WS(rs, 7)];
Chris@42 413 T7i = T6v + T6w;
Chris@42 414 T6x = T6v - T6w;
Chris@42 415 T6p = T6f - T6g;
Chris@42 416 T6h = T6f + T6g;
Chris@42 417 T6r = iio[WS(vs, 4) + WS(rs, 3)];
Chris@42 418 }
Chris@42 419 T6y = T6u + T6x;
Chris@42 420 T6I = T6x - T6u;
Chris@42 421 T6i = T6e + T6h;
Chris@42 422 T7d = T6h - T6e;
Chris@42 423 T6s = T6q - T6r;
Chris@42 424 T7j = T6q + T6r;
Chris@42 425 }
Chris@42 426 {
Chris@42 427 E Tb2, TaM, TaY, Tb5, TaP, TaZ;
Chris@42 428 {
Chris@42 429 E Tb3, Tb4, TaN, TaO;
Chris@42 430 {
Chris@42 431 E T6J, T6t, TaK, TaL;
Chris@42 432 TaK = rio[WS(vs, 7) + WS(rs, 1)];
Chris@42 433 TaL = rio[WS(vs, 7) + WS(rs, 5)];
Chris@42 434 T7y = T7i + T7j;
Chris@42 435 T7k = T7i - T7j;
Chris@42 436 T6J = T6p + T6s;
Chris@42 437 T6t = T6p - T6s;
Chris@42 438 Tb2 = TaK - TaL;
Chris@42 439 TaM = TaK + TaL;
Chris@42 440 T6z = T6t - T6y;
Chris@42 441 T6V = T6y + T6t;
Chris@42 442 T6K = T6I - T6J;
Chris@42 443 T70 = T6I + T6J;
Chris@42 444 Tb3 = iio[WS(vs, 7) + WS(rs, 1)];
Chris@42 445 Tb4 = iio[WS(vs, 7) + WS(rs, 5)];
Chris@42 446 }
Chris@42 447 TaN = rio[WS(vs, 7) + WS(rs, 7)];
Chris@42 448 TaO = rio[WS(vs, 7) + WS(rs, 3)];
Chris@42 449 TaY = iio[WS(vs, 7) + WS(rs, 7)];
Chris@42 450 Tb5 = Tb3 - Tb4;
Chris@42 451 TbQ = Tb3 + Tb4;
Chris@42 452 TaP = TaN + TaO;
Chris@42 453 TaX = TaN - TaO;
Chris@42 454 TaZ = iio[WS(vs, 7) + WS(rs, 3)];
Chris@42 455 }
Chris@42 456 Tbg = Tb5 - Tb2;
Chris@42 457 Tb6 = Tb2 + Tb5;
Chris@42 458 TbL = TaP - TaM;
Chris@42 459 TaQ = TaM + TaP;
Chris@42 460 TbR = TaY + TaZ;
Chris@42 461 Tb0 = TaY - TaZ;
Chris@42 462 }
Chris@42 463 }
Chris@42 464 }
Chris@42 465 {
Chris@42 466 E T4g, T3n, T3G, T3w, T4h, T3q;
Chris@42 467 {
Chris@42 468 E T2K, T1R, T20, T2a, T1U, T2L;
Chris@42 469 {
Chris@42 470 E T1G, T1W, T1S, T1Z, T1J, T1T;
Chris@42 471 {
Chris@42 472 E T1X, T1Y, T1H, T1I;
Chris@42 473 {
Chris@42 474 E Tb1, Tbh, T1E, T1F;
Chris@42 475 T1E = rio[WS(vs, 1) + WS(rs, 1)];
Chris@42 476 T1F = rio[WS(vs, 1) + WS(rs, 5)];
Chris@42 477 TbS = TbQ - TbR;
Chris@42 478 Tc6 = TbQ + TbR;
Chris@42 479 Tb1 = TaX - Tb0;
Chris@42 480 Tbh = TaX + Tb0;
Chris@42 481 T1G = T1E + T1F;
Chris@42 482 T1W = T1E - T1F;
Chris@42 483 Tbt = Tb6 + Tb1;
Chris@42 484 Tb7 = Tb1 - Tb6;
Chris@42 485 Tby = Tbg + Tbh;
Chris@42 486 Tbi = Tbg - Tbh;
Chris@42 487 T1X = iio[WS(vs, 1) + WS(rs, 1)];
Chris@42 488 T1Y = iio[WS(vs, 1) + WS(rs, 5)];
Chris@42 489 }
Chris@42 490 T1H = rio[WS(vs, 1) + WS(rs, 7)];
Chris@42 491 T1I = rio[WS(vs, 1) + WS(rs, 3)];
Chris@42 492 T1S = iio[WS(vs, 1) + WS(rs, 7)];
Chris@42 493 T2K = T1X + T1Y;
Chris@42 494 T1Z = T1X - T1Y;
Chris@42 495 T1R = T1H - T1I;
Chris@42 496 T1J = T1H + T1I;
Chris@42 497 T1T = iio[WS(vs, 1) + WS(rs, 3)];
Chris@42 498 }
Chris@42 499 T20 = T1W + T1Z;
Chris@42 500 T2a = T1Z - T1W;
Chris@42 501 T1K = T1G + T1J;
Chris@42 502 T2F = T1J - T1G;
Chris@42 503 T1U = T1S - T1T;
Chris@42 504 T2L = T1S + T1T;
Chris@42 505 }
Chris@42 506 {
Chris@42 507 E T3s, T3c, T3o, T3v, T3f, T3p;
Chris@42 508 {
Chris@42 509 E T3t, T3u, T3d, T3e;
Chris@42 510 {
Chris@42 511 E T2b, T1V, T3a, T3b;
Chris@42 512 T3a = rio[WS(vs, 2) + WS(rs, 1)];
Chris@42 513 T3b = rio[WS(vs, 2) + WS(rs, 5)];
Chris@42 514 T30 = T2K + T2L;
Chris@42 515 T2M = T2K - T2L;
Chris@42 516 T2b = T1R + T1U;
Chris@42 517 T1V = T1R - T1U;
Chris@42 518 T3s = T3a - T3b;
Chris@42 519 T3c = T3a + T3b;
Chris@42 520 T21 = T1V - T20;
Chris@42 521 T2n = T20 + T1V;
Chris@42 522 T2c = T2a - T2b;
Chris@42 523 T2s = T2a + T2b;
Chris@42 524 T3t = iio[WS(vs, 2) + WS(rs, 1)];
Chris@42 525 T3u = iio[WS(vs, 2) + WS(rs, 5)];
Chris@42 526 }
Chris@42 527 T3d = rio[WS(vs, 2) + WS(rs, 7)];
Chris@42 528 T3e = rio[WS(vs, 2) + WS(rs, 3)];
Chris@42 529 T3o = iio[WS(vs, 2) + WS(rs, 7)];
Chris@42 530 T3v = T3t - T3u;
Chris@42 531 T4g = T3t + T3u;
Chris@42 532 T3f = T3d + T3e;
Chris@42 533 T3n = T3d - T3e;
Chris@42 534 T3p = iio[WS(vs, 2) + WS(rs, 3)];
Chris@42 535 }
Chris@42 536 T3G = T3v - T3s;
Chris@42 537 T3w = T3s + T3v;
Chris@42 538 T4b = T3f - T3c;
Chris@42 539 T3g = T3c + T3f;
Chris@42 540 T4h = T3o + T3p;
Chris@42 541 T3q = T3o - T3p;
Chris@42 542 }
Chris@42 543 }
Chris@42 544 {
Chris@42 545 E T8O, T7V, T84, T8e, T7Y, T8P;
Chris@42 546 {
Chris@42 547 E T7K, T80, T7W, T83, T7N, T7X;
Chris@42 548 {
Chris@42 549 E T81, T82, T7L, T7M;
Chris@42 550 {
Chris@42 551 E T3r, T3H, T7I, T7J;
Chris@42 552 T7I = rio[WS(vs, 5) + WS(rs, 1)];
Chris@42 553 T7J = rio[WS(vs, 5) + WS(rs, 5)];
Chris@42 554 T4i = T4g - T4h;
Chris@42 555 T4w = T4g + T4h;
Chris@42 556 T3r = T3n - T3q;
Chris@42 557 T3H = T3n + T3q;
Chris@42 558 T7K = T7I + T7J;
Chris@42 559 T80 = T7I - T7J;
Chris@42 560 T3T = T3w + T3r;
Chris@42 561 T3x = T3r - T3w;
Chris@42 562 T3Y = T3G + T3H;
Chris@42 563 T3I = T3G - T3H;
Chris@42 564 T81 = iio[WS(vs, 5) + WS(rs, 1)];
Chris@42 565 T82 = iio[WS(vs, 5) + WS(rs, 5)];
Chris@42 566 }
Chris@42 567 T7L = rio[WS(vs, 5) + WS(rs, 7)];
Chris@42 568 T7M = rio[WS(vs, 5) + WS(rs, 3)];
Chris@42 569 T7W = iio[WS(vs, 5) + WS(rs, 7)];
Chris@42 570 T8O = T81 + T82;
Chris@42 571 T83 = T81 - T82;
Chris@42 572 T7V = T7L - T7M;
Chris@42 573 T7N = T7L + T7M;
Chris@42 574 T7X = iio[WS(vs, 5) + WS(rs, 3)];
Chris@42 575 }
Chris@42 576 T84 = T80 + T83;
Chris@42 577 T8e = T83 - T80;
Chris@42 578 T7O = T7K + T7N;
Chris@42 579 T8J = T7N - T7K;
Chris@42 580 T7Y = T7W - T7X;
Chris@42 581 T8P = T7W + T7X;
Chris@42 582 }
Chris@42 583 {
Chris@42 584 E T9w, T9g, T9s, T9z, T9j, T9t;
Chris@42 585 {
Chris@42 586 E T9x, T9y, T9h, T9i;
Chris@42 587 {
Chris@42 588 E T8f, T7Z, T9e, T9f;
Chris@42 589 T9e = rio[WS(vs, 6) + WS(rs, 1)];
Chris@42 590 T9f = rio[WS(vs, 6) + WS(rs, 5)];
Chris@42 591 T94 = T8O + T8P;
Chris@42 592 T8Q = T8O - T8P;
Chris@42 593 T8f = T7V + T7Y;
Chris@42 594 T7Z = T7V - T7Y;
Chris@42 595 T9w = T9e - T9f;
Chris@42 596 T9g = T9e + T9f;
Chris@42 597 T85 = T7Z - T84;
Chris@42 598 T8r = T84 + T7Z;
Chris@42 599 T8g = T8e - T8f;
Chris@42 600 T8w = T8e + T8f;
Chris@42 601 T9x = iio[WS(vs, 6) + WS(rs, 1)];
Chris@42 602 T9y = iio[WS(vs, 6) + WS(rs, 5)];
Chris@42 603 }
Chris@42 604 T9h = rio[WS(vs, 6) + WS(rs, 7)];
Chris@42 605 T9i = rio[WS(vs, 6) + WS(rs, 3)];
Chris@42 606 T9s = iio[WS(vs, 6) + WS(rs, 7)];
Chris@42 607 T9z = T9x - T9y;
Chris@42 608 Tak = T9x + T9y;
Chris@42 609 T9j = T9h + T9i;
Chris@42 610 T9r = T9h - T9i;
Chris@42 611 T9t = iio[WS(vs, 6) + WS(rs, 3)];
Chris@42 612 }
Chris@42 613 T9K = T9z - T9w;
Chris@42 614 T9A = T9w + T9z;
Chris@42 615 Taf = T9j - T9g;
Chris@42 616 T9k = T9g + T9j;
Chris@42 617 Tal = T9s + T9t;
Chris@42 618 T9u = T9s - T9t;
Chris@42 619 }
Chris@42 620 }
Chris@42 621 }
Chris@42 622 }
Chris@42 623 }
Chris@42 624 {
Chris@42 625 E T9X, T9B, Ta2, T9M, T2T, T2Q, TbT, TbH, TbO, TbN, TbU;
Chris@42 626 {
Chris@42 627 E Tam, TaA, T9v, T9L;
Chris@42 628 rio[0] = T7 + Te;
Chris@42 629 iio[0] = T1t + T1u;
Chris@42 630 Tam = Tak - Tal;
Chris@42 631 TaA = Tak + Tal;
Chris@42 632 T9v = T9r - T9u;
Chris@42 633 T9L = T9r + T9u;
Chris@42 634 rio[WS(rs, 1)] = T1D + T1K;
Chris@42 635 iio[WS(rs, 1)] = T2Z + T30;
Chris@42 636 T9X = T9A + T9v;
Chris@42 637 T9B = T9v - T9A;
Chris@42 638 Ta2 = T9K + T9L;
Chris@42 639 T9M = T9K - T9L;
Chris@42 640 rio[WS(rs, 2)] = T39 + T3g;
Chris@42 641 iio[WS(rs, 2)] = T4v + T4w;
Chris@42 642 rio[WS(rs, 3)] = T4F + T4M;
Chris@42 643 iio[WS(rs, 3)] = T61 + T62;
Chris@42 644 rio[WS(rs, 4)] = T6b + T6i;
Chris@42 645 iio[WS(rs, 4)] = T7x + T7y;
Chris@42 646 rio[WS(rs, 5)] = T7H + T7O;
Chris@42 647 iio[WS(rs, 5)] = T93 + T94;
Chris@42 648 rio[WS(rs, 6)] = T9d + T9k;
Chris@42 649 iio[WS(rs, 6)] = Taz + TaA;
Chris@42 650 rio[WS(rs, 7)] = TaJ + TaQ;
Chris@42 651 iio[WS(rs, 7)] = Tc5 + Tc6;
Chris@42 652 {
Chris@42 653 E T10, T13, T1h, T1a, Tat, Taq, TbC, TbF, TbE, TbG, TbD;
Chris@42 654 {
Chris@42 655 E T1q, T1v, T1s, T1w, T1r;
Chris@42 656 {
Chris@42 657 E T2N, T2B, T2I, T2H, T2O;
Chris@42 658 {
Chris@42 659 E TS, TX, TP, TU, T2G, TY, TT;
Chris@42 660 T10 = FMA(KP707106781, TR, TQ);
Chris@42 661 TS = FNMS(KP707106781, TR, TQ);
Chris@42 662 TX = FNMS(KP707106781, TW, TV);
Chris@42 663 T13 = FMA(KP707106781, TW, TV);
Chris@42 664 TP = W[8];
Chris@42 665 TU = W[9];
Chris@42 666 T2T = T2J + T2M;
Chris@42 667 T2N = T2J - T2M;
Chris@42 668 T2G = T2E - T2F;
Chris@42 669 T2Q = T2F + T2E;
Chris@42 670 TY = TP * TX;
Chris@42 671 TT = TP * TS;
Chris@42 672 T2B = W[10];
Chris@42 673 T2I = W[11];
Chris@42 674 iio[WS(vs, 5)] = FNMS(TU, TS, TY);
Chris@42 675 rio[WS(vs, 5)] = FMA(TU, TX, TT);
Chris@42 676 T2H = T2B * T2G;
Chris@42 677 T2O = T2I * T2G;
Chris@42 678 }
Chris@42 679 {
Chris@42 680 E T1n, T1k, T1j, T1m, T1l, T1o, T1p;
Chris@42 681 T1h = T1d - T1g;
Chris@42 682 T1n = T1d + T1g;
Chris@42 683 T1k = T19 + T18;
Chris@42 684 T1a = T18 - T19;
Chris@42 685 iio[WS(vs, 6) + WS(rs, 1)] = FNMS(T2I, T2N, T2H);
Chris@42 686 rio[WS(vs, 6) + WS(rs, 1)] = FMA(T2B, T2N, T2O);
Chris@42 687 T1j = W[2];
Chris@42 688 T1m = W[3];
Chris@42 689 T1q = T7 - Te;
Chris@42 690 T1v = T1t - T1u;
Chris@42 691 T1l = T1j * T1k;
Chris@42 692 T1o = T1m * T1k;
Chris@42 693 T1p = W[6];
Chris@42 694 T1s = W[7];
Chris@42 695 iio[WS(vs, 2)] = FNMS(T1m, T1n, T1l);
Chris@42 696 rio[WS(vs, 2)] = FMA(T1j, T1n, T1o);
Chris@42 697 T1w = T1p * T1v;
Chris@42 698 T1r = T1p * T1q;
Chris@42 699 }
Chris@42 700 }
Chris@42 701 {
Chris@42 702 E Tc2, Tc7, Tc4, Tc8, Tc3;
Chris@42 703 {
Chris@42 704 E Tan, Tag, Tab, Tai, Tah, Tao, Tc1;
Chris@42 705 Tat = Taj + Tam;
Chris@42 706 Tan = Taj - Tam;
Chris@42 707 Tag = Tae - Taf;
Chris@42 708 Taq = Taf + Tae;
Chris@42 709 iio[WS(vs, 4)] = FNMS(T1s, T1q, T1w);
Chris@42 710 rio[WS(vs, 4)] = FMA(T1s, T1v, T1r);
Chris@42 711 Tab = W[10];
Chris@42 712 Tai = W[11];
Chris@42 713 Tc2 = TaJ - TaQ;
Chris@42 714 Tc7 = Tc5 - Tc6;
Chris@42 715 Tah = Tab * Tag;
Chris@42 716 Tao = Tai * Tag;
Chris@42 717 Tc1 = W[6];
Chris@42 718 Tc4 = W[7];
Chris@42 719 iio[WS(vs, 6) + WS(rs, 6)] = FNMS(Tai, Tan, Tah);
Chris@42 720 rio[WS(vs, 6) + WS(rs, 6)] = FMA(Tab, Tan, Tao);
Chris@42 721 Tc8 = Tc1 * Tc7;
Chris@42 722 Tc3 = Tc1 * Tc2;
Chris@42 723 }
Chris@42 724 {
Chris@42 725 E Tbu, Tbz, Tbr, Tbw, TbA, Tbv, TbB;
Chris@42 726 TbC = FMA(KP707106781, Tbt, Tbs);
Chris@42 727 Tbu = FNMS(KP707106781, Tbt, Tbs);
Chris@42 728 Tbz = FNMS(KP707106781, Tby, Tbx);
Chris@42 729 TbF = FMA(KP707106781, Tby, Tbx);
Chris@42 730 iio[WS(vs, 4) + WS(rs, 7)] = FNMS(Tc4, Tc2, Tc8);
Chris@42 731 rio[WS(vs, 4) + WS(rs, 7)] = FMA(Tc4, Tc7, Tc3);
Chris@42 732 Tbr = W[8];
Chris@42 733 Tbw = W[9];
Chris@42 734 TbA = Tbr * Tbz;
Chris@42 735 Tbv = Tbr * Tbu;
Chris@42 736 TbB = W[0];
Chris@42 737 TbE = W[1];
Chris@42 738 iio[WS(vs, 5) + WS(rs, 7)] = FNMS(Tbw, Tbu, TbA);
Chris@42 739 rio[WS(vs, 5) + WS(rs, 7)] = FMA(Tbw, Tbz, Tbv);
Chris@42 740 TbG = TbB * TbF;
Chris@42 741 TbD = TbB * TbC;
Chris@42 742 }
Chris@42 743 }
Chris@42 744 }
Chris@42 745 {
Chris@42 746 E T2o, T2t, T2q, T2u, T2p;
Chris@42 747 {
Chris@42 748 E T2w, T2z, T2y, T2A, T2x;
Chris@42 749 {
Chris@42 750 E TZ, T12, T14, T11, T2v;
Chris@42 751 iio[WS(vs, 1) + WS(rs, 7)] = FNMS(TbE, TbC, TbG);
Chris@42 752 rio[WS(vs, 1) + WS(rs, 7)] = FMA(TbE, TbF, TbD);
Chris@42 753 TZ = W[0];
Chris@42 754 T12 = W[1];
Chris@42 755 T2o = FNMS(KP707106781, T2n, T2m);
Chris@42 756 T2w = FMA(KP707106781, T2n, T2m);
Chris@42 757 T2z = FMA(KP707106781, T2s, T2r);
Chris@42 758 T2t = FNMS(KP707106781, T2s, T2r);
Chris@42 759 T14 = TZ * T13;
Chris@42 760 T11 = TZ * T10;
Chris@42 761 T2v = W[0];
Chris@42 762 T2y = W[1];
Chris@42 763 iio[WS(vs, 1)] = FNMS(T12, T10, T14);
Chris@42 764 rio[WS(vs, 1)] = FMA(T12, T13, T11);
Chris@42 765 T2A = T2v * T2z;
Chris@42 766 T2x = T2v * T2w;
Chris@42 767 }
Chris@42 768 {
Chris@42 769 E T15, T1c, T1b, T1i, T2l;
Chris@42 770 iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T2y, T2w, T2A);
Chris@42 771 rio[WS(vs, 1) + WS(rs, 1)] = FMA(T2y, T2z, T2x);
Chris@42 772 T15 = W[10];
Chris@42 773 T1c = W[11];
Chris@42 774 T1b = T15 * T1a;
Chris@42 775 T1i = T1c * T1a;
Chris@42 776 T2l = W[8];
Chris@42 777 T2q = W[9];
Chris@42 778 iio[WS(vs, 6)] = FNMS(T1c, T1h, T1b);
Chris@42 779 rio[WS(vs, 6)] = FMA(T15, T1h, T1i);
Chris@42 780 T2u = T2l * T2t;
Chris@42 781 T2p = T2l * T2o;
Chris@42 782 }
Chris@42 783 }
Chris@42 784 {
Chris@42 785 E TbZ, TbM, TbV, TbY, TbX, Tc0;
Chris@42 786 {
Chris@42 787 E Tap, Tas, TbW, Tar, Tau;
Chris@42 788 iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T2q, T2o, T2u);
Chris@42 789 rio[WS(vs, 5) + WS(rs, 1)] = FMA(T2q, T2t, T2p);
Chris@42 790 Tap = W[2];
Chris@42 791 Tas = W[3];
Chris@42 792 TbT = TbP - TbS;
Chris@42 793 TbZ = TbP + TbS;
Chris@42 794 TbW = TbL + TbK;
Chris@42 795 TbM = TbK - TbL;
Chris@42 796 Tar = Tap * Taq;
Chris@42 797 Tau = Tas * Taq;
Chris@42 798 TbV = W[2];
Chris@42 799 TbY = W[3];
Chris@42 800 iio[WS(vs, 2) + WS(rs, 6)] = FNMS(Tas, Tat, Tar);
Chris@42 801 rio[WS(vs, 2) + WS(rs, 6)] = FMA(Tap, Tat, Tau);
Chris@42 802 TbX = TbV * TbW;
Chris@42 803 Tc0 = TbY * TbW;
Chris@42 804 }
Chris@42 805 {
Chris@42 806 E Taw, TaB, Tav, Tay, TaC, Tax;
Chris@42 807 Taw = T9d - T9k;
Chris@42 808 TaB = Taz - TaA;
Chris@42 809 iio[WS(vs, 2) + WS(rs, 7)] = FNMS(TbY, TbZ, TbX);
Chris@42 810 rio[WS(vs, 2) + WS(rs, 7)] = FMA(TbV, TbZ, Tc0);
Chris@42 811 Tav = W[6];
Chris@42 812 Tay = W[7];
Chris@42 813 TaC = Tav * TaB;
Chris@42 814 Tax = Tav * Taw;
Chris@42 815 TbH = W[10];
Chris@42 816 TbO = W[11];
Chris@42 817 iio[WS(vs, 4) + WS(rs, 6)] = FNMS(Tay, Taw, TaC);
Chris@42 818 rio[WS(vs, 4) + WS(rs, 6)] = FMA(Tay, TaB, Tax);
Chris@42 819 TbN = TbH * TbM;
Chris@42 820 TbU = TbO * TbM;
Chris@42 821 }
Chris@42 822 }
Chris@42 823 }
Chris@42 824 }
Chris@42 825 }
Chris@42 826 {
Chris@42 827 E T5q, T5v, T8R, T8K, T90, T95, T92, T96, T91;
Chris@42 828 {
Chris@42 829 E T3U, T3Z, T74, T77, T9Y, Ta3, T7l, T7e, T8X, T8T, T8W, T8V, T8Y;
Chris@42 830 {
Chris@42 831 E T5y, T5B, T5A, T5C, T5z;
Chris@42 832 {
Chris@42 833 E T5Y, T63, T60, T64, T5Z;
Chris@42 834 {
Chris@42 835 E T2P, T2S, T2R, T2U, T5X;
Chris@42 836 iio[WS(vs, 6) + WS(rs, 7)] = FNMS(TbO, TbT, TbN);
Chris@42 837 rio[WS(vs, 6) + WS(rs, 7)] = FMA(TbH, TbT, TbU);
Chris@42 838 T2P = W[2];
Chris@42 839 T2S = W[3];
Chris@42 840 T5Y = T4F - T4M;
Chris@42 841 T63 = T61 - T62;
Chris@42 842 T2R = T2P * T2Q;
Chris@42 843 T2U = T2S * T2Q;
Chris@42 844 T5X = W[6];
Chris@42 845 T60 = W[7];
Chris@42 846 iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T2S, T2T, T2R);
Chris@42 847 rio[WS(vs, 2) + WS(rs, 1)] = FMA(T2P, T2T, T2U);
Chris@42 848 T64 = T5X * T63;
Chris@42 849 T5Z = T5X * T5Y;
Chris@42 850 }
Chris@42 851 {
Chris@42 852 E T42, T45, T41, T44, T46, T43, T5x;
Chris@42 853 T3U = FNMS(KP707106781, T3T, T3S);
Chris@42 854 T42 = FMA(KP707106781, T3T, T3S);
Chris@42 855 T45 = FMA(KP707106781, T3Y, T3X);
Chris@42 856 T3Z = FNMS(KP707106781, T3Y, T3X);
Chris@42 857 iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T60, T5Y, T64);
Chris@42 858 rio[WS(vs, 4) + WS(rs, 3)] = FMA(T60, T63, T5Z);
Chris@42 859 T41 = W[0];
Chris@42 860 T44 = W[1];
Chris@42 861 T5q = FNMS(KP707106781, T5p, T5o);
Chris@42 862 T5y = FMA(KP707106781, T5p, T5o);
Chris@42 863 T5B = FMA(KP707106781, T5u, T5t);
Chris@42 864 T5v = FNMS(KP707106781, T5u, T5t);
Chris@42 865 T46 = T41 * T45;
Chris@42 866 T43 = T41 * T42;
Chris@42 867 T5x = W[0];
Chris@42 868 T5A = W[1];
Chris@42 869 iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T44, T42, T46);
Chris@42 870 rio[WS(vs, 1) + WS(rs, 2)] = FMA(T44, T45, T43);
Chris@42 871 T5C = T5x * T5B;
Chris@42 872 T5z = T5x * T5y;
Chris@42 873 }
Chris@42 874 }
Chris@42 875 {
Chris@42 876 E Ta6, Ta9, Ta8, Taa, Ta7;
Chris@42 877 {
Chris@42 878 E T6W, T71, T6T, T6Y, T72, T6X, Ta5;
Chris@42 879 T74 = FMA(KP707106781, T6V, T6U);
Chris@42 880 T6W = FNMS(KP707106781, T6V, T6U);
Chris@42 881 T71 = FNMS(KP707106781, T70, T6Z);
Chris@42 882 T77 = FMA(KP707106781, T70, T6Z);
Chris@42 883 iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T5A, T5y, T5C);
Chris@42 884 rio[WS(vs, 1) + WS(rs, 3)] = FMA(T5A, T5B, T5z);
Chris@42 885 T6T = W[8];
Chris@42 886 T6Y = W[9];
Chris@42 887 T9Y = FNMS(KP707106781, T9X, T9W);
Chris@42 888 Ta6 = FMA(KP707106781, T9X, T9W);
Chris@42 889 Ta9 = FMA(KP707106781, Ta2, Ta1);
Chris@42 890 Ta3 = FNMS(KP707106781, Ta2, Ta1);
Chris@42 891 T72 = T6T * T71;
Chris@42 892 T6X = T6T * T6W;
Chris@42 893 Ta5 = W[0];
Chris@42 894 Ta8 = W[1];
Chris@42 895 iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T6Y, T6W, T72);
Chris@42 896 rio[WS(vs, 5) + WS(rs, 4)] = FMA(T6Y, T71, T6X);
Chris@42 897 Taa = Ta5 * Ta9;
Chris@42 898 Ta7 = Ta5 * Ta6;
Chris@42 899 }
Chris@42 900 {
Chris@42 901 E T7r, T7o, T7n, T7q, T8U, T7p, T7s;
Chris@42 902 T7l = T7h - T7k;
Chris@42 903 T7r = T7h + T7k;
Chris@42 904 T7o = T7d + T7c;
Chris@42 905 T7e = T7c - T7d;
Chris@42 906 iio[WS(vs, 1) + WS(rs, 6)] = FNMS(Ta8, Ta6, Taa);
Chris@42 907 rio[WS(vs, 1) + WS(rs, 6)] = FMA(Ta8, Ta9, Ta7);
Chris@42 908 T7n = W[2];
Chris@42 909 T7q = W[3];
Chris@42 910 T8R = T8N - T8Q;
Chris@42 911 T8X = T8N + T8Q;
Chris@42 912 T8U = T8J + T8I;
Chris@42 913 T8K = T8I - T8J;
Chris@42 914 T7p = T7n * T7o;
Chris@42 915 T7s = T7q * T7o;
Chris@42 916 T8T = W[2];
Chris@42 917 T8W = W[3];
Chris@42 918 iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T7q, T7r, T7p);
Chris@42 919 rio[WS(vs, 2) + WS(rs, 4)] = FMA(T7n, T7r, T7s);
Chris@42 920 T8V = T8T * T8U;
Chris@42 921 T8Y = T8W * T8U;
Chris@42 922 }
Chris@42 923 }
Chris@42 924 }
Chris@42 925 {
Chris@42 926 E T5P, T5D, T5K, T5J, T5Q, Ta0, Ta4, T9Z;
Chris@42 927 {
Chris@42 928 E T5V, T5I, T5R, T5U, T5T, T5W;
Chris@42 929 {
Chris@42 930 E T2W, T31, T2V, T2Y, T5S, T32, T2X;
Chris@42 931 T2W = T1D - T1K;
Chris@42 932 T31 = T2Z - T30;
Chris@42 933 iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T8W, T8X, T8V);
Chris@42 934 rio[WS(vs, 2) + WS(rs, 5)] = FMA(T8T, T8X, T8Y);
Chris@42 935 T2V = W[6];
Chris@42 936 T2Y = W[7];
Chris@42 937 T5P = T5L - T5O;
Chris@42 938 T5V = T5L + T5O;
Chris@42 939 T5S = T5H + T5G;
Chris@42 940 T5I = T5G - T5H;
Chris@42 941 T32 = T2V * T31;
Chris@42 942 T2X = T2V * T2W;
Chris@42 943 T5R = W[2];
Chris@42 944 T5U = W[3];
Chris@42 945 iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T2Y, T2W, T32);
Chris@42 946 rio[WS(vs, 4) + WS(rs, 1)] = FMA(T2Y, T31, T2X);
Chris@42 947 T5T = T5R * T5S;
Chris@42 948 T5W = T5U * T5S;
Chris@42 949 }
Chris@42 950 {
Chris@42 951 E T3R, T3W, T40, T3V;
Chris@42 952 iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T5U, T5V, T5T);
Chris@42 953 rio[WS(vs, 2) + WS(rs, 3)] = FMA(T5R, T5V, T5W);
Chris@42 954 T3R = W[8];
Chris@42 955 T3W = W[9];
Chris@42 956 T40 = T3R * T3Z;
Chris@42 957 T3V = T3R * T3U;
Chris@42 958 T5D = W[10];
Chris@42 959 T5K = W[11];
Chris@42 960 iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T3W, T3U, T40);
Chris@42 961 rio[WS(vs, 5) + WS(rs, 2)] = FMA(T3W, T3Z, T3V);
Chris@42 962 T5J = T5D * T5I;
Chris@42 963 T5Q = T5K * T5I;
Chris@42 964 }
Chris@42 965 }
Chris@42 966 {
Chris@42 967 E T73, T76, T78, T75, T9V;
Chris@42 968 iio[WS(vs, 6) + WS(rs, 3)] = FNMS(T5K, T5P, T5J);
Chris@42 969 rio[WS(vs, 6) + WS(rs, 3)] = FMA(T5D, T5P, T5Q);
Chris@42 970 T73 = W[0];
Chris@42 971 T76 = W[1];
Chris@42 972 T78 = T73 * T77;
Chris@42 973 T75 = T73 * T74;
Chris@42 974 T9V = W[8];
Chris@42 975 Ta0 = W[9];
Chris@42 976 iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T76, T74, T78);
Chris@42 977 rio[WS(vs, 1) + WS(rs, 4)] = FMA(T76, T77, T75);
Chris@42 978 Ta4 = T9V * Ta3;
Chris@42 979 T9Z = T9V * T9Y;
Chris@42 980 }
Chris@42 981 {
Chris@42 982 E T79, T7g, T7f, T7m, T8Z;
Chris@42 983 iio[WS(vs, 5) + WS(rs, 6)] = FNMS(Ta0, T9Y, Ta4);
Chris@42 984 rio[WS(vs, 5) + WS(rs, 6)] = FMA(Ta0, Ta3, T9Z);
Chris@42 985 T79 = W[10];
Chris@42 986 T7g = W[11];
Chris@42 987 T90 = T7H - T7O;
Chris@42 988 T95 = T93 - T94;
Chris@42 989 T7f = T79 * T7e;
Chris@42 990 T7m = T7g * T7e;
Chris@42 991 T8Z = W[6];
Chris@42 992 T92 = W[7];
Chris@42 993 iio[WS(vs, 6) + WS(rs, 4)] = FNMS(T7g, T7l, T7f);
Chris@42 994 rio[WS(vs, 6) + WS(rs, 4)] = FMA(T79, T7l, T7m);
Chris@42 995 T96 = T8Z * T95;
Chris@42 996 T91 = T8Z * T90;
Chris@42 997 }
Chris@42 998 }
Chris@42 999 }
Chris@42 1000 {
Chris@42 1001 E T8A, T8D, T8C, T8E, T8B;
Chris@42 1002 {
Chris@42 1003 E T4s, T4x, T4u, T4y, T4t;
Chris@42 1004 {
Chris@42 1005 E T4p, T4m, T5s, T5w, T5r;
Chris@42 1006 {
Chris@42 1007 E T4j, T4c, T47, T4e, T4d, T4k, T5n;
Chris@42 1008 T4p = T4f + T4i;
Chris@42 1009 T4j = T4f - T4i;
Chris@42 1010 T4c = T4a - T4b;
Chris@42 1011 T4m = T4b + T4a;
Chris@42 1012 iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T92, T90, T96);
Chris@42 1013 rio[WS(vs, 4) + WS(rs, 5)] = FMA(T92, T95, T91);
Chris@42 1014 T47 = W[10];
Chris@42 1015 T4e = W[11];
Chris@42 1016 T4d = T47 * T4c;
Chris@42 1017 T4k = T4e * T4c;
Chris@42 1018 T5n = W[8];
Chris@42 1019 T5s = W[9];
Chris@42 1020 iio[WS(vs, 6) + WS(rs, 2)] = FNMS(T4e, T4j, T4d);
Chris@42 1021 rio[WS(vs, 6) + WS(rs, 2)] = FMA(T47, T4j, T4k);
Chris@42 1022 T5w = T5n * T5v;
Chris@42 1023 T5r = T5n * T5q;
Chris@42 1024 }
Chris@42 1025 {
Chris@42 1026 E T4l, T4o, T4n, T4q, T4r;
Chris@42 1027 iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T5s, T5q, T5w);
Chris@42 1028 rio[WS(vs, 5) + WS(rs, 3)] = FMA(T5s, T5v, T5r);
Chris@42 1029 T4l = W[2];
Chris@42 1030 T4o = W[3];
Chris@42 1031 T4s = T39 - T3g;
Chris@42 1032 T4x = T4v - T4w;
Chris@42 1033 T4n = T4l * T4m;
Chris@42 1034 T4q = T4o * T4m;
Chris@42 1035 T4r = W[6];
Chris@42 1036 T4u = W[7];
Chris@42 1037 iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T4o, T4p, T4n);
Chris@42 1038 rio[WS(vs, 2) + WS(rs, 2)] = FMA(T4l, T4p, T4q);
Chris@42 1039 T4y = T4r * T4x;
Chris@42 1040 T4t = T4r * T4s;
Chris@42 1041 }
Chris@42 1042 }
Chris@42 1043 {
Chris@42 1044 E T8F, T8M, T8L, T8S;
Chris@42 1045 {
Chris@42 1046 E T7u, T7z, T7t, T7w, T7A, T7v;
Chris@42 1047 T7u = T6b - T6i;
Chris@42 1048 T7z = T7x - T7y;
Chris@42 1049 iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T4u, T4s, T4y);
Chris@42 1050 rio[WS(vs, 4) + WS(rs, 2)] = FMA(T4u, T4x, T4t);
Chris@42 1051 T7t = W[6];
Chris@42 1052 T7w = W[7];
Chris@42 1053 T7A = T7t * T7z;
Chris@42 1054 T7v = T7t * T7u;
Chris@42 1055 T8F = W[10];
Chris@42 1056 T8M = W[11];
Chris@42 1057 iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T7w, T7u, T7A);
Chris@42 1058 rio[WS(vs, 4) + WS(rs, 4)] = FMA(T7w, T7z, T7v);
Chris@42 1059 T8L = T8F * T8K;
Chris@42 1060 T8S = T8M * T8K;
Chris@42 1061 }
Chris@42 1062 {
Chris@42 1063 E T8s, T8x, T8p, T8u, T8y, T8t, T8z;
Chris@42 1064 T8A = FMA(KP707106781, T8r, T8q);
Chris@42 1065 T8s = FNMS(KP707106781, T8r, T8q);
Chris@42 1066 T8x = FNMS(KP707106781, T8w, T8v);
Chris@42 1067 T8D = FMA(KP707106781, T8w, T8v);
Chris@42 1068 iio[WS(vs, 6) + WS(rs, 5)] = FNMS(T8M, T8R, T8L);
Chris@42 1069 rio[WS(vs, 6) + WS(rs, 5)] = FMA(T8F, T8R, T8S);
Chris@42 1070 T8p = W[8];
Chris@42 1071 T8u = W[9];
Chris@42 1072 T8y = T8p * T8x;
Chris@42 1073 T8t = T8p * T8s;
Chris@42 1074 T8z = W[0];
Chris@42 1075 T8C = W[1];
Chris@42 1076 iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T8u, T8s, T8y);
Chris@42 1077 rio[WS(vs, 5) + WS(rs, 5)] = FMA(T8u, T8x, T8t);
Chris@42 1078 T8E = T8z * T8D;
Chris@42 1079 T8B = T8z * T8A;
Chris@42 1080 }
Chris@42 1081 }
Chris@42 1082 }
Chris@42 1083 {
Chris@42 1084 E T3y, T3J, T3h, T3A, T3z, T3K;
Chris@42 1085 {
Chris@42 1086 E T54, T5f, T4N, T56, T55, T5g;
Chris@42 1087 {
Chris@42 1088 E Tw, TH, Tf, Ty, Tx, TI;
Chris@42 1089 {
Chris@42 1090 E TN, TJ, TM, TL, TO, TK;
Chris@42 1091 TK = FMA(KP707106781, Tv, Tk);
Chris@42 1092 Tw = FNMS(KP707106781, Tv, Tk);
Chris@42 1093 iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T8C, T8A, T8E);
Chris@42 1094 rio[WS(vs, 1) + WS(rs, 5)] = FMA(T8C, T8D, T8B);
Chris@42 1095 TH = FNMS(KP707106781, TG, TD);
Chris@42 1096 TN = FMA(KP707106781, TG, TD);
Chris@42 1097 TJ = W[4];
Chris@42 1098 TM = W[5];
Chris@42 1099 Tf = W[12];
Chris@42 1100 TL = TJ * TK;
Chris@42 1101 TO = TM * TK;
Chris@42 1102 Ty = W[13];
Chris@42 1103 Tx = Tf * Tw;
Chris@42 1104 iio[WS(vs, 3)] = FNMS(TM, TN, TL);
Chris@42 1105 rio[WS(vs, 3)] = FMA(TJ, TN, TO);
Chris@42 1106 }
Chris@42 1107 TI = Ty * Tw;
Chris@42 1108 iio[WS(vs, 7)] = FNMS(Ty, TH, Tx);
Chris@42 1109 {
Chris@42 1110 E T5h, T5l, T5k, T5j, T5m, T5i;
Chris@42 1111 T5i = FMA(KP707106781, T53, T4S);
Chris@42 1112 T54 = FNMS(KP707106781, T53, T4S);
Chris@42 1113 rio[WS(vs, 7)] = FMA(Tf, TH, TI);
Chris@42 1114 T5h = W[4];
Chris@42 1115 T5f = FNMS(KP707106781, T5e, T5b);
Chris@42 1116 T5l = FMA(KP707106781, T5e, T5b);
Chris@42 1117 T5k = W[5];
Chris@42 1118 T5j = T5h * T5i;
Chris@42 1119 T4N = W[12];
Chris@42 1120 T5m = T5k * T5i;
Chris@42 1121 T56 = W[13];
Chris@42 1122 iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T5k, T5l, T5j);
Chris@42 1123 T55 = T4N * T54;
Chris@42 1124 rio[WS(vs, 3) + WS(rs, 3)] = FMA(T5h, T5l, T5m);
Chris@42 1125 }
Chris@42 1126 }
Chris@42 1127 T5g = T56 * T54;
Chris@42 1128 {
Chris@42 1129 E T22, T2d, T1L, T24, T23, T2e;
Chris@42 1130 {
Chris@42 1131 E T2j, T2f, T2i, T2h, T2k, T2g;
Chris@42 1132 iio[WS(vs, 7) + WS(rs, 3)] = FNMS(T56, T5f, T55);
Chris@42 1133 T22 = FNMS(KP707106781, T21, T1Q);
Chris@42 1134 T2g = FMA(KP707106781, T21, T1Q);
Chris@42 1135 rio[WS(vs, 7) + WS(rs, 3)] = FMA(T4N, T5f, T5g);
Chris@42 1136 T2d = FNMS(KP707106781, T2c, T29);
Chris@42 1137 T2j = FMA(KP707106781, T2c, T29);
Chris@42 1138 T2f = W[4];
Chris@42 1139 T2i = W[5];
Chris@42 1140 T1L = W[12];
Chris@42 1141 T2h = T2f * T2g;
Chris@42 1142 T2k = T2i * T2g;
Chris@42 1143 T24 = W[13];
Chris@42 1144 T23 = T1L * T22;
Chris@42 1145 iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T2i, T2j, T2h);
Chris@42 1146 rio[WS(vs, 3) + WS(rs, 1)] = FMA(T2f, T2j, T2k);
Chris@42 1147 }
Chris@42 1148 T2e = T24 * T22;
Chris@42 1149 iio[WS(vs, 7) + WS(rs, 1)] = FNMS(T24, T2d, T23);
Chris@42 1150 {
Chris@42 1151 E T3L, T3P, T3O, T3N, T3Q, T3M;
Chris@42 1152 T3M = FMA(KP707106781, T3x, T3m);
Chris@42 1153 T3y = FNMS(KP707106781, T3x, T3m);
Chris@42 1154 rio[WS(vs, 7) + WS(rs, 1)] = FMA(T1L, T2d, T2e);
Chris@42 1155 T3L = W[4];
Chris@42 1156 T3J = FNMS(KP707106781, T3I, T3F);
Chris@42 1157 T3P = FMA(KP707106781, T3I, T3F);
Chris@42 1158 T3O = W[5];
Chris@42 1159 T3N = T3L * T3M;
Chris@42 1160 T3h = W[12];
Chris@42 1161 T3Q = T3O * T3M;
Chris@42 1162 T3A = W[13];
Chris@42 1163 iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T3O, T3P, T3N);
Chris@42 1164 T3z = T3h * T3y;
Chris@42 1165 rio[WS(vs, 3) + WS(rs, 2)] = FMA(T3L, T3P, T3Q);
Chris@42 1166 }
Chris@42 1167 }
Chris@42 1168 }
Chris@42 1169 T3K = T3A * T3y;
Chris@42 1170 {
Chris@42 1171 E Tb8, Tbj, TaR, Tba, Tb9, Tbk;
Chris@42 1172 {
Chris@42 1173 E T6A, T6L, T6j, T6C, T6B, T6M;
Chris@42 1174 {
Chris@42 1175 E T6R, T6N, T6Q, T6P, T6S, T6O;
Chris@42 1176 iio[WS(vs, 7) + WS(rs, 2)] = FNMS(T3A, T3J, T3z);
Chris@42 1177 T6A = FNMS(KP707106781, T6z, T6o);
Chris@42 1178 T6O = FMA(KP707106781, T6z, T6o);
Chris@42 1179 rio[WS(vs, 7) + WS(rs, 2)] = FMA(T3h, T3J, T3K);
Chris@42 1180 T6L = FNMS(KP707106781, T6K, T6H);
Chris@42 1181 T6R = FMA(KP707106781, T6K, T6H);
Chris@42 1182 T6N = W[4];
Chris@42 1183 T6Q = W[5];
Chris@42 1184 T6j = W[12];
Chris@42 1185 T6P = T6N * T6O;
Chris@42 1186 T6S = T6Q * T6O;
Chris@42 1187 T6C = W[13];
Chris@42 1188 T6B = T6j * T6A;
Chris@42 1189 iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T6Q, T6R, T6P);
Chris@42 1190 rio[WS(vs, 3) + WS(rs, 4)] = FMA(T6N, T6R, T6S);
Chris@42 1191 }
Chris@42 1192 T6M = T6C * T6A;
Chris@42 1193 iio[WS(vs, 7) + WS(rs, 4)] = FNMS(T6C, T6L, T6B);
Chris@42 1194 {
Chris@42 1195 E Tbl, Tbp, Tbo, Tbn, Tbq, Tbm;
Chris@42 1196 Tbm = FMA(KP707106781, Tb7, TaW);
Chris@42 1197 Tb8 = FNMS(KP707106781, Tb7, TaW);
Chris@42 1198 rio[WS(vs, 7) + WS(rs, 4)] = FMA(T6j, T6L, T6M);
Chris@42 1199 Tbl = W[4];
Chris@42 1200 Tbj = FNMS(KP707106781, Tbi, Tbf);
Chris@42 1201 Tbp = FMA(KP707106781, Tbi, Tbf);
Chris@42 1202 Tbo = W[5];
Chris@42 1203 Tbn = Tbl * Tbm;
Chris@42 1204 TaR = W[12];
Chris@42 1205 Tbq = Tbo * Tbm;
Chris@42 1206 Tba = W[13];
Chris@42 1207 iio[WS(vs, 3) + WS(rs, 7)] = FNMS(Tbo, Tbp, Tbn);
Chris@42 1208 Tb9 = TaR * Tb8;
Chris@42 1209 rio[WS(vs, 3) + WS(rs, 7)] = FMA(Tbl, Tbp, Tbq);
Chris@42 1210 }
Chris@42 1211 }
Chris@42 1212 Tbk = Tba * Tb8;
Chris@42 1213 {
Chris@42 1214 E T86, T8h, T7P, T88, T87, T8i;
Chris@42 1215 {
Chris@42 1216 E T8n, T8j, T8m, T8l, T8o, T8k;
Chris@42 1217 iio[WS(vs, 7) + WS(rs, 7)] = FNMS(Tba, Tbj, Tb9);
Chris@42 1218 T86 = FNMS(KP707106781, T85, T7U);
Chris@42 1219 T8k = FMA(KP707106781, T85, T7U);
Chris@42 1220 rio[WS(vs, 7) + WS(rs, 7)] = FMA(TaR, Tbj, Tbk);
Chris@42 1221 T8h = FNMS(KP707106781, T8g, T8d);
Chris@42 1222 T8n = FMA(KP707106781, T8g, T8d);
Chris@42 1223 T8j = W[4];
Chris@42 1224 T8m = W[5];
Chris@42 1225 T7P = W[12];
Chris@42 1226 T8l = T8j * T8k;
Chris@42 1227 T8o = T8m * T8k;
Chris@42 1228 T88 = W[13];
Chris@42 1229 T87 = T7P * T86;
Chris@42 1230 iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T8m, T8n, T8l);
Chris@42 1231 rio[WS(vs, 3) + WS(rs, 5)] = FMA(T8j, T8n, T8o);
Chris@42 1232 }
Chris@42 1233 T8i = T88 * T86;
Chris@42 1234 iio[WS(vs, 7) + WS(rs, 5)] = FNMS(T88, T8h, T87);
Chris@42 1235 {
Chris@42 1236 E T9P, T9T, T9S, T9R, T9U, T9Q;
Chris@42 1237 T9Q = FMA(KP707106781, T9B, T9q);
Chris@42 1238 T9C = FNMS(KP707106781, T9B, T9q);
Chris@42 1239 rio[WS(vs, 7) + WS(rs, 5)] = FMA(T7P, T8h, T8i);
Chris@42 1240 T9P = W[4];
Chris@42 1241 T9N = FNMS(KP707106781, T9M, T9J);
Chris@42 1242 T9T = FMA(KP707106781, T9M, T9J);
Chris@42 1243 T9S = W[5];
Chris@42 1244 T9R = T9P * T9Q;
Chris@42 1245 T9l = W[12];
Chris@42 1246 T9U = T9S * T9Q;
Chris@42 1247 T9E = W[13];
Chris@42 1248 iio[WS(vs, 3) + WS(rs, 6)] = FNMS(T9S, T9T, T9R);
Chris@42 1249 T9D = T9l * T9C;
Chris@42 1250 rio[WS(vs, 3) + WS(rs, 6)] = FMA(T9P, T9T, T9U);
Chris@42 1251 }
Chris@42 1252 }
Chris@42 1253 }
Chris@42 1254 }
Chris@42 1255 }
Chris@42 1256 }
Chris@42 1257 }
Chris@42 1258 }
Chris@42 1259 T9O = T9E * T9C;
Chris@42 1260 iio[WS(vs, 7) + WS(rs, 6)] = FNMS(T9E, T9N, T9D);
Chris@42 1261 rio[WS(vs, 7) + WS(rs, 6)] = FMA(T9l, T9N, T9O);
Chris@42 1262 }
Chris@42 1263 }
Chris@42 1264 }
Chris@42 1265
Chris@42 1266 static const tw_instr twinstr[] = {
Chris@42 1267 {TW_FULL, 0, 8},
Chris@42 1268 {TW_NEXT, 1, 0}
Chris@42 1269 };
Chris@42 1270
Chris@42 1271 static const ct_desc desc = { 8, "q1_8", twinstr, &GENUS, {352, 112, 176, 0}, 0, 0, 0 };
Chris@42 1272
Chris@42 1273 void X(codelet_q1_8) (planner *p) {
Chris@42 1274 X(kdft_difsq_register) (p, q1_8, &desc);
Chris@42 1275 }
Chris@42 1276 #else /* HAVE_FMA */
Chris@42 1277
Chris@42 1278 /* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 8 -name q1_8 -include q.h */
Chris@42 1279
Chris@42 1280 /*
Chris@42 1281 * This function contains 528 FP additions, 256 FP multiplications,
Chris@42 1282 * (or, 416 additions, 144 multiplications, 112 fused multiply/add),
Chris@42 1283 * 142 stack variables, 1 constants, and 256 memory accesses
Chris@42 1284 */
Chris@42 1285 #include "q.h"
Chris@42 1286
Chris@42 1287 static void q1_8(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
Chris@42 1288 {
Chris@42 1289 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 1290 {
Chris@42 1291 INT m;
Chris@42 1292 for (m = mb, W = W + (mb * 14); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
Chris@42 1293 E T7, T14, T1g, Tk, TC, TQ, T10, TM, T1w, T2p, T2z, T1H, T1M, T1W, T2j;
Chris@42 1294 E T1V, T7R, T8O, T90, T84, T8m, T8A, T8K, T8w, T9g, Ta9, Taj, T9r, T9w, T9G;
Chris@42 1295 E Ta3, T9F, Te, T17, T1h, Tp, Tu, TE, T11, TD, T1p, T2m, T2y, T1C, T1U;
Chris@42 1296 E T28, T2i, T24, T7Y, T8R, T91, T89, T8e, T8o, T8L, T8n, T99, Ta6, Tai, T9m;
Chris@42 1297 E T9E, T9S, Ta2, T9O, T2H, T3E, T3Q, T2U, T3c, T3q, T3A, T3m, T46, T4Z, T59;
Chris@42 1298 E T4h, T4m, T4w, T4T, T4v, T5h, T6e, T6q, T5u, T5M, T60, T6a, T5W, T6G, T7z;
Chris@42 1299 E T7J, T6R, T6W, T76, T7t, T75, T2O, T3H, T3R, T2Z, T34, T3e, T3B, T3d, T3Z;
Chris@42 1300 E T4W, T58, T4c, T4u, T4I, T4S, T4E, T5o, T6h, T6r, T5z, T5E, T5O, T6b, T5N;
Chris@42 1301 E T6z, T7w, T7I, T6M, T74, T7i, T7s, T7e;
Chris@42 1302 {
Chris@42 1303 E T3, Ty, Tj, TY, T6, Tg, TB, TZ;
Chris@42 1304 {
Chris@42 1305 E T1, T2, Th, Ti;
Chris@42 1306 T1 = rio[0];
Chris@42 1307 T2 = rio[WS(rs, 4)];
Chris@42 1308 T3 = T1 + T2;
Chris@42 1309 Ty = T1 - T2;
Chris@42 1310 Th = iio[0];
Chris@42 1311 Ti = iio[WS(rs, 4)];
Chris@42 1312 Tj = Th - Ti;
Chris@42 1313 TY = Th + Ti;
Chris@42 1314 }
Chris@42 1315 {
Chris@42 1316 E T4, T5, Tz, TA;
Chris@42 1317 T4 = rio[WS(rs, 2)];
Chris@42 1318 T5 = rio[WS(rs, 6)];
Chris@42 1319 T6 = T4 + T5;
Chris@42 1320 Tg = T4 - T5;
Chris@42 1321 Tz = iio[WS(rs, 2)];
Chris@42 1322 TA = iio[WS(rs, 6)];
Chris@42 1323 TB = Tz - TA;
Chris@42 1324 TZ = Tz + TA;
Chris@42 1325 }
Chris@42 1326 T7 = T3 + T6;
Chris@42 1327 T14 = T3 - T6;
Chris@42 1328 T1g = TY + TZ;
Chris@42 1329 Tk = Tg + Tj;
Chris@42 1330 TC = Ty - TB;
Chris@42 1331 TQ = Tj - Tg;
Chris@42 1332 T10 = TY - TZ;
Chris@42 1333 TM = Ty + TB;
Chris@42 1334 }
Chris@42 1335 {
Chris@42 1336 E T1s, T1I, T1L, T2n, T1v, T1D, T1G, T2o;
Chris@42 1337 {
Chris@42 1338 E T1q, T1r, T1J, T1K;
Chris@42 1339 T1q = rio[WS(vs, 1) + WS(rs, 1)];
Chris@42 1340 T1r = rio[WS(vs, 1) + WS(rs, 5)];
Chris@42 1341 T1s = T1q + T1r;
Chris@42 1342 T1I = T1q - T1r;
Chris@42 1343 T1J = iio[WS(vs, 1) + WS(rs, 1)];
Chris@42 1344 T1K = iio[WS(vs, 1) + WS(rs, 5)];
Chris@42 1345 T1L = T1J - T1K;
Chris@42 1346 T2n = T1J + T1K;
Chris@42 1347 }
Chris@42 1348 {
Chris@42 1349 E T1t, T1u, T1E, T1F;
Chris@42 1350 T1t = rio[WS(vs, 1) + WS(rs, 7)];
Chris@42 1351 T1u = rio[WS(vs, 1) + WS(rs, 3)];
Chris@42 1352 T1v = T1t + T1u;
Chris@42 1353 T1D = T1t - T1u;
Chris@42 1354 T1E = iio[WS(vs, 1) + WS(rs, 7)];
Chris@42 1355 T1F = iio[WS(vs, 1) + WS(rs, 3)];
Chris@42 1356 T1G = T1E - T1F;
Chris@42 1357 T2o = T1E + T1F;
Chris@42 1358 }
Chris@42 1359 T1w = T1s + T1v;
Chris@42 1360 T2p = T2n - T2o;
Chris@42 1361 T2z = T2n + T2o;
Chris@42 1362 T1H = T1D - T1G;
Chris@42 1363 T1M = T1I + T1L;
Chris@42 1364 T1W = T1D + T1G;
Chris@42 1365 T2j = T1v - T1s;
Chris@42 1366 T1V = T1L - T1I;
Chris@42 1367 }
Chris@42 1368 {
Chris@42 1369 E T7N, T8i, T83, T8I, T7Q, T80, T8l, T8J;
Chris@42 1370 {
Chris@42 1371 E T7L, T7M, T81, T82;
Chris@42 1372 T7L = rio[WS(vs, 6)];
Chris@42 1373 T7M = rio[WS(vs, 6) + WS(rs, 4)];
Chris@42 1374 T7N = T7L + T7M;
Chris@42 1375 T8i = T7L - T7M;
Chris@42 1376 T81 = iio[WS(vs, 6)];
Chris@42 1377 T82 = iio[WS(vs, 6) + WS(rs, 4)];
Chris@42 1378 T83 = T81 - T82;
Chris@42 1379 T8I = T81 + T82;
Chris@42 1380 }
Chris@42 1381 {
Chris@42 1382 E T7O, T7P, T8j, T8k;
Chris@42 1383 T7O = rio[WS(vs, 6) + WS(rs, 2)];
Chris@42 1384 T7P = rio[WS(vs, 6) + WS(rs, 6)];
Chris@42 1385 T7Q = T7O + T7P;
Chris@42 1386 T80 = T7O - T7P;
Chris@42 1387 T8j = iio[WS(vs, 6) + WS(rs, 2)];
Chris@42 1388 T8k = iio[WS(vs, 6) + WS(rs, 6)];
Chris@42 1389 T8l = T8j - T8k;
Chris@42 1390 T8J = T8j + T8k;
Chris@42 1391 }
Chris@42 1392 T7R = T7N + T7Q;
Chris@42 1393 T8O = T7N - T7Q;
Chris@42 1394 T90 = T8I + T8J;
Chris@42 1395 T84 = T80 + T83;
Chris@42 1396 T8m = T8i - T8l;
Chris@42 1397 T8A = T83 - T80;
Chris@42 1398 T8K = T8I - T8J;
Chris@42 1399 T8w = T8i + T8l;
Chris@42 1400 }
Chris@42 1401 {
Chris@42 1402 E T9c, T9s, T9v, Ta7, T9f, T9n, T9q, Ta8;
Chris@42 1403 {
Chris@42 1404 E T9a, T9b, T9t, T9u;
Chris@42 1405 T9a = rio[WS(vs, 7) + WS(rs, 1)];
Chris@42 1406 T9b = rio[WS(vs, 7) + WS(rs, 5)];
Chris@42 1407 T9c = T9a + T9b;
Chris@42 1408 T9s = T9a - T9b;
Chris@42 1409 T9t = iio[WS(vs, 7) + WS(rs, 1)];
Chris@42 1410 T9u = iio[WS(vs, 7) + WS(rs, 5)];
Chris@42 1411 T9v = T9t - T9u;
Chris@42 1412 Ta7 = T9t + T9u;
Chris@42 1413 }
Chris@42 1414 {
Chris@42 1415 E T9d, T9e, T9o, T9p;
Chris@42 1416 T9d = rio[WS(vs, 7) + WS(rs, 7)];
Chris@42 1417 T9e = rio[WS(vs, 7) + WS(rs, 3)];
Chris@42 1418 T9f = T9d + T9e;
Chris@42 1419 T9n = T9d - T9e;
Chris@42 1420 T9o = iio[WS(vs, 7) + WS(rs, 7)];
Chris@42 1421 T9p = iio[WS(vs, 7) + WS(rs, 3)];
Chris@42 1422 T9q = T9o - T9p;
Chris@42 1423 Ta8 = T9o + T9p;
Chris@42 1424 }
Chris@42 1425 T9g = T9c + T9f;
Chris@42 1426 Ta9 = Ta7 - Ta8;
Chris@42 1427 Taj = Ta7 + Ta8;
Chris@42 1428 T9r = T9n - T9q;
Chris@42 1429 T9w = T9s + T9v;
Chris@42 1430 T9G = T9n + T9q;
Chris@42 1431 Ta3 = T9f - T9c;
Chris@42 1432 T9F = T9v - T9s;
Chris@42 1433 }
Chris@42 1434 {
Chris@42 1435 E Ta, Tq, Tt, T15, Td, Tl, To, T16;
Chris@42 1436 {
Chris@42 1437 E T8, T9, Tr, Ts;
Chris@42 1438 T8 = rio[WS(rs, 1)];
Chris@42 1439 T9 = rio[WS(rs, 5)];
Chris@42 1440 Ta = T8 + T9;
Chris@42 1441 Tq = T8 - T9;
Chris@42 1442 Tr = iio[WS(rs, 1)];
Chris@42 1443 Ts = iio[WS(rs, 5)];
Chris@42 1444 Tt = Tr - Ts;
Chris@42 1445 T15 = Tr + Ts;
Chris@42 1446 }
Chris@42 1447 {
Chris@42 1448 E Tb, Tc, Tm, Tn;
Chris@42 1449 Tb = rio[WS(rs, 7)];
Chris@42 1450 Tc = rio[WS(rs, 3)];
Chris@42 1451 Td = Tb + Tc;
Chris@42 1452 Tl = Tb - Tc;
Chris@42 1453 Tm = iio[WS(rs, 7)];
Chris@42 1454 Tn = iio[WS(rs, 3)];
Chris@42 1455 To = Tm - Tn;
Chris@42 1456 T16 = Tm + Tn;
Chris@42 1457 }
Chris@42 1458 Te = Ta + Td;
Chris@42 1459 T17 = T15 - T16;
Chris@42 1460 T1h = T15 + T16;
Chris@42 1461 Tp = Tl - To;
Chris@42 1462 Tu = Tq + Tt;
Chris@42 1463 TE = Tl + To;
Chris@42 1464 T11 = Td - Ta;
Chris@42 1465 TD = Tt - Tq;
Chris@42 1466 }
Chris@42 1467 {
Chris@42 1468 E T1l, T1Q, T1B, T2g, T1o, T1y, T1T, T2h;
Chris@42 1469 {
Chris@42 1470 E T1j, T1k, T1z, T1A;
Chris@42 1471 T1j = rio[WS(vs, 1)];
Chris@42 1472 T1k = rio[WS(vs, 1) + WS(rs, 4)];
Chris@42 1473 T1l = T1j + T1k;
Chris@42 1474 T1Q = T1j - T1k;
Chris@42 1475 T1z = iio[WS(vs, 1)];
Chris@42 1476 T1A = iio[WS(vs, 1) + WS(rs, 4)];
Chris@42 1477 T1B = T1z - T1A;
Chris@42 1478 T2g = T1z + T1A;
Chris@42 1479 }
Chris@42 1480 {
Chris@42 1481 E T1m, T1n, T1R, T1S;
Chris@42 1482 T1m = rio[WS(vs, 1) + WS(rs, 2)];
Chris@42 1483 T1n = rio[WS(vs, 1) + WS(rs, 6)];
Chris@42 1484 T1o = T1m + T1n;
Chris@42 1485 T1y = T1m - T1n;
Chris@42 1486 T1R = iio[WS(vs, 1) + WS(rs, 2)];
Chris@42 1487 T1S = iio[WS(vs, 1) + WS(rs, 6)];
Chris@42 1488 T1T = T1R - T1S;
Chris@42 1489 T2h = T1R + T1S;
Chris@42 1490 }
Chris@42 1491 T1p = T1l + T1o;
Chris@42 1492 T2m = T1l - T1o;
Chris@42 1493 T2y = T2g + T2h;
Chris@42 1494 T1C = T1y + T1B;
Chris@42 1495 T1U = T1Q - T1T;
Chris@42 1496 T28 = T1B - T1y;
Chris@42 1497 T2i = T2g - T2h;
Chris@42 1498 T24 = T1Q + T1T;
Chris@42 1499 }
Chris@42 1500 {
Chris@42 1501 E T7U, T8a, T8d, T8P, T7X, T85, T88, T8Q;
Chris@42 1502 {
Chris@42 1503 E T7S, T7T, T8b, T8c;
Chris@42 1504 T7S = rio[WS(vs, 6) + WS(rs, 1)];
Chris@42 1505 T7T = rio[WS(vs, 6) + WS(rs, 5)];
Chris@42 1506 T7U = T7S + T7T;
Chris@42 1507 T8a = T7S - T7T;
Chris@42 1508 T8b = iio[WS(vs, 6) + WS(rs, 1)];
Chris@42 1509 T8c = iio[WS(vs, 6) + WS(rs, 5)];
Chris@42 1510 T8d = T8b - T8c;
Chris@42 1511 T8P = T8b + T8c;
Chris@42 1512 }
Chris@42 1513 {
Chris@42 1514 E T7V, T7W, T86, T87;
Chris@42 1515 T7V = rio[WS(vs, 6) + WS(rs, 7)];
Chris@42 1516 T7W = rio[WS(vs, 6) + WS(rs, 3)];
Chris@42 1517 T7X = T7V + T7W;
Chris@42 1518 T85 = T7V - T7W;
Chris@42 1519 T86 = iio[WS(vs, 6) + WS(rs, 7)];
Chris@42 1520 T87 = iio[WS(vs, 6) + WS(rs, 3)];
Chris@42 1521 T88 = T86 - T87;
Chris@42 1522 T8Q = T86 + T87;
Chris@42 1523 }
Chris@42 1524 T7Y = T7U + T7X;
Chris@42 1525 T8R = T8P - T8Q;
Chris@42 1526 T91 = T8P + T8Q;
Chris@42 1527 T89 = T85 - T88;
Chris@42 1528 T8e = T8a + T8d;
Chris@42 1529 T8o = T85 + T88;
Chris@42 1530 T8L = T7X - T7U;
Chris@42 1531 T8n = T8d - T8a;
Chris@42 1532 }
Chris@42 1533 {
Chris@42 1534 E T95, T9A, T9l, Ta0, T98, T9i, T9D, Ta1;
Chris@42 1535 {
Chris@42 1536 E T93, T94, T9j, T9k;
Chris@42 1537 T93 = rio[WS(vs, 7)];
Chris@42 1538 T94 = rio[WS(vs, 7) + WS(rs, 4)];
Chris@42 1539 T95 = T93 + T94;
Chris@42 1540 T9A = T93 - T94;
Chris@42 1541 T9j = iio[WS(vs, 7)];
Chris@42 1542 T9k = iio[WS(vs, 7) + WS(rs, 4)];
Chris@42 1543 T9l = T9j - T9k;
Chris@42 1544 Ta0 = T9j + T9k;
Chris@42 1545 }
Chris@42 1546 {
Chris@42 1547 E T96, T97, T9B, T9C;
Chris@42 1548 T96 = rio[WS(vs, 7) + WS(rs, 2)];
Chris@42 1549 T97 = rio[WS(vs, 7) + WS(rs, 6)];
Chris@42 1550 T98 = T96 + T97;
Chris@42 1551 T9i = T96 - T97;
Chris@42 1552 T9B = iio[WS(vs, 7) + WS(rs, 2)];
Chris@42 1553 T9C = iio[WS(vs, 7) + WS(rs, 6)];
Chris@42 1554 T9D = T9B - T9C;
Chris@42 1555 Ta1 = T9B + T9C;
Chris@42 1556 }
Chris@42 1557 T99 = T95 + T98;
Chris@42 1558 Ta6 = T95 - T98;
Chris@42 1559 Tai = Ta0 + Ta1;
Chris@42 1560 T9m = T9i + T9l;
Chris@42 1561 T9E = T9A - T9D;
Chris@42 1562 T9S = T9l - T9i;
Chris@42 1563 Ta2 = Ta0 - Ta1;
Chris@42 1564 T9O = T9A + T9D;
Chris@42 1565 }
Chris@42 1566 {
Chris@42 1567 E T2D, T38, T2T, T3y, T2G, T2Q, T3b, T3z;
Chris@42 1568 {
Chris@42 1569 E T2B, T2C, T2R, T2S;
Chris@42 1570 T2B = rio[WS(vs, 2)];
Chris@42 1571 T2C = rio[WS(vs, 2) + WS(rs, 4)];
Chris@42 1572 T2D = T2B + T2C;
Chris@42 1573 T38 = T2B - T2C;
Chris@42 1574 T2R = iio[WS(vs, 2)];
Chris@42 1575 T2S = iio[WS(vs, 2) + WS(rs, 4)];
Chris@42 1576 T2T = T2R - T2S;
Chris@42 1577 T3y = T2R + T2S;
Chris@42 1578 }
Chris@42 1579 {
Chris@42 1580 E T2E, T2F, T39, T3a;
Chris@42 1581 T2E = rio[WS(vs, 2) + WS(rs, 2)];
Chris@42 1582 T2F = rio[WS(vs, 2) + WS(rs, 6)];
Chris@42 1583 T2G = T2E + T2F;
Chris@42 1584 T2Q = T2E - T2F;
Chris@42 1585 T39 = iio[WS(vs, 2) + WS(rs, 2)];
Chris@42 1586 T3a = iio[WS(vs, 2) + WS(rs, 6)];
Chris@42 1587 T3b = T39 - T3a;
Chris@42 1588 T3z = T39 + T3a;
Chris@42 1589 }
Chris@42 1590 T2H = T2D + T2G;
Chris@42 1591 T3E = T2D - T2G;
Chris@42 1592 T3Q = T3y + T3z;
Chris@42 1593 T2U = T2Q + T2T;
Chris@42 1594 T3c = T38 - T3b;
Chris@42 1595 T3q = T2T - T2Q;
Chris@42 1596 T3A = T3y - T3z;
Chris@42 1597 T3m = T38 + T3b;
Chris@42 1598 }
Chris@42 1599 {
Chris@42 1600 E T42, T4i, T4l, T4X, T45, T4d, T4g, T4Y;
Chris@42 1601 {
Chris@42 1602 E T40, T41, T4j, T4k;
Chris@42 1603 T40 = rio[WS(vs, 3) + WS(rs, 1)];
Chris@42 1604 T41 = rio[WS(vs, 3) + WS(rs, 5)];
Chris@42 1605 T42 = T40 + T41;
Chris@42 1606 T4i = T40 - T41;
Chris@42 1607 T4j = iio[WS(vs, 3) + WS(rs, 1)];
Chris@42 1608 T4k = iio[WS(vs, 3) + WS(rs, 5)];
Chris@42 1609 T4l = T4j - T4k;
Chris@42 1610 T4X = T4j + T4k;
Chris@42 1611 }
Chris@42 1612 {
Chris@42 1613 E T43, T44, T4e, T4f;
Chris@42 1614 T43 = rio[WS(vs, 3) + WS(rs, 7)];
Chris@42 1615 T44 = rio[WS(vs, 3) + WS(rs, 3)];
Chris@42 1616 T45 = T43 + T44;
Chris@42 1617 T4d = T43 - T44;
Chris@42 1618 T4e = iio[WS(vs, 3) + WS(rs, 7)];
Chris@42 1619 T4f = iio[WS(vs, 3) + WS(rs, 3)];
Chris@42 1620 T4g = T4e - T4f;
Chris@42 1621 T4Y = T4e + T4f;
Chris@42 1622 }
Chris@42 1623 T46 = T42 + T45;
Chris@42 1624 T4Z = T4X - T4Y;
Chris@42 1625 T59 = T4X + T4Y;
Chris@42 1626 T4h = T4d - T4g;
Chris@42 1627 T4m = T4i + T4l;
Chris@42 1628 T4w = T4d + T4g;
Chris@42 1629 T4T = T45 - T42;
Chris@42 1630 T4v = T4l - T4i;
Chris@42 1631 }
Chris@42 1632 {
Chris@42 1633 E T5d, T5I, T5t, T68, T5g, T5q, T5L, T69;
Chris@42 1634 {
Chris@42 1635 E T5b, T5c, T5r, T5s;
Chris@42 1636 T5b = rio[WS(vs, 4)];
Chris@42 1637 T5c = rio[WS(vs, 4) + WS(rs, 4)];
Chris@42 1638 T5d = T5b + T5c;
Chris@42 1639 T5I = T5b - T5c;
Chris@42 1640 T5r = iio[WS(vs, 4)];
Chris@42 1641 T5s = iio[WS(vs, 4) + WS(rs, 4)];
Chris@42 1642 T5t = T5r - T5s;
Chris@42 1643 T68 = T5r + T5s;
Chris@42 1644 }
Chris@42 1645 {
Chris@42 1646 E T5e, T5f, T5J, T5K;
Chris@42 1647 T5e = rio[WS(vs, 4) + WS(rs, 2)];
Chris@42 1648 T5f = rio[WS(vs, 4) + WS(rs, 6)];
Chris@42 1649 T5g = T5e + T5f;
Chris@42 1650 T5q = T5e - T5f;
Chris@42 1651 T5J = iio[WS(vs, 4) + WS(rs, 2)];
Chris@42 1652 T5K = iio[WS(vs, 4) + WS(rs, 6)];
Chris@42 1653 T5L = T5J - T5K;
Chris@42 1654 T69 = T5J + T5K;
Chris@42 1655 }
Chris@42 1656 T5h = T5d + T5g;
Chris@42 1657 T6e = T5d - T5g;
Chris@42 1658 T6q = T68 + T69;
Chris@42 1659 T5u = T5q + T5t;
Chris@42 1660 T5M = T5I - T5L;
Chris@42 1661 T60 = T5t - T5q;
Chris@42 1662 T6a = T68 - T69;
Chris@42 1663 T5W = T5I + T5L;
Chris@42 1664 }
Chris@42 1665 {
Chris@42 1666 E T6C, T6S, T6V, T7x, T6F, T6N, T6Q, T7y;
Chris@42 1667 {
Chris@42 1668 E T6A, T6B, T6T, T6U;
Chris@42 1669 T6A = rio[WS(vs, 5) + WS(rs, 1)];
Chris@42 1670 T6B = rio[WS(vs, 5) + WS(rs, 5)];
Chris@42 1671 T6C = T6A + T6B;
Chris@42 1672 T6S = T6A - T6B;
Chris@42 1673 T6T = iio[WS(vs, 5) + WS(rs, 1)];
Chris@42 1674 T6U = iio[WS(vs, 5) + WS(rs, 5)];
Chris@42 1675 T6V = T6T - T6U;
Chris@42 1676 T7x = T6T + T6U;
Chris@42 1677 }
Chris@42 1678 {
Chris@42 1679 E T6D, T6E, T6O, T6P;
Chris@42 1680 T6D = rio[WS(vs, 5) + WS(rs, 7)];
Chris@42 1681 T6E = rio[WS(vs, 5) + WS(rs, 3)];
Chris@42 1682 T6F = T6D + T6E;
Chris@42 1683 T6N = T6D - T6E;
Chris@42 1684 T6O = iio[WS(vs, 5) + WS(rs, 7)];
Chris@42 1685 T6P = iio[WS(vs, 5) + WS(rs, 3)];
Chris@42 1686 T6Q = T6O - T6P;
Chris@42 1687 T7y = T6O + T6P;
Chris@42 1688 }
Chris@42 1689 T6G = T6C + T6F;
Chris@42 1690 T7z = T7x - T7y;
Chris@42 1691 T7J = T7x + T7y;
Chris@42 1692 T6R = T6N - T6Q;
Chris@42 1693 T6W = T6S + T6V;
Chris@42 1694 T76 = T6N + T6Q;
Chris@42 1695 T7t = T6F - T6C;
Chris@42 1696 T75 = T6V - T6S;
Chris@42 1697 }
Chris@42 1698 {
Chris@42 1699 E T2K, T30, T33, T3F, T2N, T2V, T2Y, T3G;
Chris@42 1700 {
Chris@42 1701 E T2I, T2J, T31, T32;
Chris@42 1702 T2I = rio[WS(vs, 2) + WS(rs, 1)];
Chris@42 1703 T2J = rio[WS(vs, 2) + WS(rs, 5)];
Chris@42 1704 T2K = T2I + T2J;
Chris@42 1705 T30 = T2I - T2J;
Chris@42 1706 T31 = iio[WS(vs, 2) + WS(rs, 1)];
Chris@42 1707 T32 = iio[WS(vs, 2) + WS(rs, 5)];
Chris@42 1708 T33 = T31 - T32;
Chris@42 1709 T3F = T31 + T32;
Chris@42 1710 }
Chris@42 1711 {
Chris@42 1712 E T2L, T2M, T2W, T2X;
Chris@42 1713 T2L = rio[WS(vs, 2) + WS(rs, 7)];
Chris@42 1714 T2M = rio[WS(vs, 2) + WS(rs, 3)];
Chris@42 1715 T2N = T2L + T2M;
Chris@42 1716 T2V = T2L - T2M;
Chris@42 1717 T2W = iio[WS(vs, 2) + WS(rs, 7)];
Chris@42 1718 T2X = iio[WS(vs, 2) + WS(rs, 3)];
Chris@42 1719 T2Y = T2W - T2X;
Chris@42 1720 T3G = T2W + T2X;
Chris@42 1721 }
Chris@42 1722 T2O = T2K + T2N;
Chris@42 1723 T3H = T3F - T3G;
Chris@42 1724 T3R = T3F + T3G;
Chris@42 1725 T2Z = T2V - T2Y;
Chris@42 1726 T34 = T30 + T33;
Chris@42 1727 T3e = T2V + T2Y;
Chris@42 1728 T3B = T2N - T2K;
Chris@42 1729 T3d = T33 - T30;
Chris@42 1730 }
Chris@42 1731 {
Chris@42 1732 E T3V, T4q, T4b, T4Q, T3Y, T48, T4t, T4R;
Chris@42 1733 {
Chris@42 1734 E T3T, T3U, T49, T4a;
Chris@42 1735 T3T = rio[WS(vs, 3)];
Chris@42 1736 T3U = rio[WS(vs, 3) + WS(rs, 4)];
Chris@42 1737 T3V = T3T + T3U;
Chris@42 1738 T4q = T3T - T3U;
Chris@42 1739 T49 = iio[WS(vs, 3)];
Chris@42 1740 T4a = iio[WS(vs, 3) + WS(rs, 4)];
Chris@42 1741 T4b = T49 - T4a;
Chris@42 1742 T4Q = T49 + T4a;
Chris@42 1743 }
Chris@42 1744 {
Chris@42 1745 E T3W, T3X, T4r, T4s;
Chris@42 1746 T3W = rio[WS(vs, 3) + WS(rs, 2)];
Chris@42 1747 T3X = rio[WS(vs, 3) + WS(rs, 6)];
Chris@42 1748 T3Y = T3W + T3X;
Chris@42 1749 T48 = T3W - T3X;
Chris@42 1750 T4r = iio[WS(vs, 3) + WS(rs, 2)];
Chris@42 1751 T4s = iio[WS(vs, 3) + WS(rs, 6)];
Chris@42 1752 T4t = T4r - T4s;
Chris@42 1753 T4R = T4r + T4s;
Chris@42 1754 }
Chris@42 1755 T3Z = T3V + T3Y;
Chris@42 1756 T4W = T3V - T3Y;
Chris@42 1757 T58 = T4Q + T4R;
Chris@42 1758 T4c = T48 + T4b;
Chris@42 1759 T4u = T4q - T4t;
Chris@42 1760 T4I = T4b - T48;
Chris@42 1761 T4S = T4Q - T4R;
Chris@42 1762 T4E = T4q + T4t;
Chris@42 1763 }
Chris@42 1764 {
Chris@42 1765 E T5k, T5A, T5D, T6f, T5n, T5v, T5y, T6g;
Chris@42 1766 {
Chris@42 1767 E T5i, T5j, T5B, T5C;
Chris@42 1768 T5i = rio[WS(vs, 4) + WS(rs, 1)];
Chris@42 1769 T5j = rio[WS(vs, 4) + WS(rs, 5)];
Chris@42 1770 T5k = T5i + T5j;
Chris@42 1771 T5A = T5i - T5j;
Chris@42 1772 T5B = iio[WS(vs, 4) + WS(rs, 1)];
Chris@42 1773 T5C = iio[WS(vs, 4) + WS(rs, 5)];
Chris@42 1774 T5D = T5B - T5C;
Chris@42 1775 T6f = T5B + T5C;
Chris@42 1776 }
Chris@42 1777 {
Chris@42 1778 E T5l, T5m, T5w, T5x;
Chris@42 1779 T5l = rio[WS(vs, 4) + WS(rs, 7)];
Chris@42 1780 T5m = rio[WS(vs, 4) + WS(rs, 3)];
Chris@42 1781 T5n = T5l + T5m;
Chris@42 1782 T5v = T5l - T5m;
Chris@42 1783 T5w = iio[WS(vs, 4) + WS(rs, 7)];
Chris@42 1784 T5x = iio[WS(vs, 4) + WS(rs, 3)];
Chris@42 1785 T5y = T5w - T5x;
Chris@42 1786 T6g = T5w + T5x;
Chris@42 1787 }
Chris@42 1788 T5o = T5k + T5n;
Chris@42 1789 T6h = T6f - T6g;
Chris@42 1790 T6r = T6f + T6g;
Chris@42 1791 T5z = T5v - T5y;
Chris@42 1792 T5E = T5A + T5D;
Chris@42 1793 T5O = T5v + T5y;
Chris@42 1794 T6b = T5n - T5k;
Chris@42 1795 T5N = T5D - T5A;
Chris@42 1796 }
Chris@42 1797 {
Chris@42 1798 E T6v, T70, T6L, T7q, T6y, T6I, T73, T7r;
Chris@42 1799 {
Chris@42 1800 E T6t, T6u, T6J, T6K;
Chris@42 1801 T6t = rio[WS(vs, 5)];
Chris@42 1802 T6u = rio[WS(vs, 5) + WS(rs, 4)];
Chris@42 1803 T6v = T6t + T6u;
Chris@42 1804 T70 = T6t - T6u;
Chris@42 1805 T6J = iio[WS(vs, 5)];
Chris@42 1806 T6K = iio[WS(vs, 5) + WS(rs, 4)];
Chris@42 1807 T6L = T6J - T6K;
Chris@42 1808 T7q = T6J + T6K;
Chris@42 1809 }
Chris@42 1810 {
Chris@42 1811 E T6w, T6x, T71, T72;
Chris@42 1812 T6w = rio[WS(vs, 5) + WS(rs, 2)];
Chris@42 1813 T6x = rio[WS(vs, 5) + WS(rs, 6)];
Chris@42 1814 T6y = T6w + T6x;
Chris@42 1815 T6I = T6w - T6x;
Chris@42 1816 T71 = iio[WS(vs, 5) + WS(rs, 2)];
Chris@42 1817 T72 = iio[WS(vs, 5) + WS(rs, 6)];
Chris@42 1818 T73 = T71 - T72;
Chris@42 1819 T7r = T71 + T72;
Chris@42 1820 }
Chris@42 1821 T6z = T6v + T6y;
Chris@42 1822 T7w = T6v - T6y;
Chris@42 1823 T7I = T7q + T7r;
Chris@42 1824 T6M = T6I + T6L;
Chris@42 1825 T74 = T70 - T73;
Chris@42 1826 T7i = T6L - T6I;
Chris@42 1827 T7s = T7q - T7r;
Chris@42 1828 T7e = T70 + T73;
Chris@42 1829 }
Chris@42 1830 rio[0] = T7 + Te;
Chris@42 1831 iio[0] = T1g + T1h;
Chris@42 1832 rio[WS(rs, 1)] = T1p + T1w;
Chris@42 1833 iio[WS(rs, 1)] = T2y + T2z;
Chris@42 1834 rio[WS(rs, 3)] = T3Z + T46;
Chris@42 1835 rio[WS(rs, 2)] = T2H + T2O;
Chris@42 1836 iio[WS(rs, 2)] = T3Q + T3R;
Chris@42 1837 iio[WS(rs, 3)] = T58 + T59;
Chris@42 1838 rio[WS(rs, 6)] = T7R + T7Y;
Chris@42 1839 iio[WS(rs, 6)] = T90 + T91;
Chris@42 1840 iio[WS(rs, 5)] = T7I + T7J;
Chris@42 1841 rio[WS(rs, 5)] = T6z + T6G;
Chris@42 1842 iio[WS(rs, 4)] = T6q + T6r;
Chris@42 1843 rio[WS(rs, 4)] = T5h + T5o;
Chris@42 1844 rio[WS(rs, 7)] = T99 + T9g;
Chris@42 1845 iio[WS(rs, 7)] = Tai + Taj;
Chris@42 1846 {
Chris@42 1847 E T12, T18, TX, T13;
Chris@42 1848 T12 = T10 - T11;
Chris@42 1849 T18 = T14 - T17;
Chris@42 1850 TX = W[10];
Chris@42 1851 T13 = W[11];
Chris@42 1852 iio[WS(vs, 6)] = FNMS(T13, T18, TX * T12);
Chris@42 1853 rio[WS(vs, 6)] = FMA(T13, T12, TX * T18);
Chris@42 1854 }
Chris@42 1855 {
Chris@42 1856 E Tag, Tak, Taf, Tah;
Chris@42 1857 Tag = T99 - T9g;
Chris@42 1858 Tak = Tai - Taj;
Chris@42 1859 Taf = W[6];
Chris@42 1860 Tah = W[7];
Chris@42 1861 rio[WS(vs, 4) + WS(rs, 7)] = FMA(Taf, Tag, Tah * Tak);
Chris@42 1862 iio[WS(vs, 4) + WS(rs, 7)] = FNMS(Tah, Tag, Taf * Tak);
Chris@42 1863 }
Chris@42 1864 {
Chris@42 1865 E T8M, T8S, T8H, T8N;
Chris@42 1866 T8M = T8K - T8L;
Chris@42 1867 T8S = T8O - T8R;
Chris@42 1868 T8H = W[10];
Chris@42 1869 T8N = W[11];
Chris@42 1870 iio[WS(vs, 6) + WS(rs, 6)] = FNMS(T8N, T8S, T8H * T8M);
Chris@42 1871 rio[WS(vs, 6) + WS(rs, 6)] = FMA(T8N, T8M, T8H * T8S);
Chris@42 1872 }
Chris@42 1873 {
Chris@42 1874 E T2k, T2q, T2f, T2l;
Chris@42 1875 T2k = T2i - T2j;
Chris@42 1876 T2q = T2m - T2p;
Chris@42 1877 T2f = W[10];
Chris@42 1878 T2l = W[11];
Chris@42 1879 iio[WS(vs, 6) + WS(rs, 1)] = FNMS(T2l, T2q, T2f * T2k);
Chris@42 1880 rio[WS(vs, 6) + WS(rs, 1)] = FMA(T2l, T2k, T2f * T2q);
Chris@42 1881 }
Chris@42 1882 {
Chris@42 1883 E Ta4, Taa, T9Z, Ta5;
Chris@42 1884 Ta4 = Ta2 - Ta3;
Chris@42 1885 Taa = Ta6 - Ta9;
Chris@42 1886 T9Z = W[10];
Chris@42 1887 Ta5 = W[11];
Chris@42 1888 iio[WS(vs, 6) + WS(rs, 7)] = FNMS(Ta5, Taa, T9Z * Ta4);
Chris@42 1889 rio[WS(vs, 6) + WS(rs, 7)] = FMA(Ta5, Ta4, T9Z * Taa);
Chris@42 1890 }
Chris@42 1891 {
Chris@42 1892 E T8Y, T92, T8X, T8Z;
Chris@42 1893 T8Y = T7R - T7Y;
Chris@42 1894 T92 = T90 - T91;
Chris@42 1895 T8X = W[6];
Chris@42 1896 T8Z = W[7];
Chris@42 1897 rio[WS(vs, 4) + WS(rs, 6)] = FMA(T8X, T8Y, T8Z * T92);
Chris@42 1898 iio[WS(vs, 4) + WS(rs, 6)] = FNMS(T8Z, T8Y, T8X * T92);
Chris@42 1899 }
Chris@42 1900 {
Chris@42 1901 E T2w, T2A, T2v, T2x;
Chris@42 1902 T2w = T1p - T1w;
Chris@42 1903 T2A = T2y - T2z;
Chris@42 1904 T2v = W[6];
Chris@42 1905 T2x = W[7];
Chris@42 1906 rio[WS(vs, 4) + WS(rs, 1)] = FMA(T2v, T2w, T2x * T2A);
Chris@42 1907 iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T2x, T2w, T2v * T2A);
Chris@42 1908 }
Chris@42 1909 {
Chris@42 1910 E Tac, Tae, Tab, Tad;
Chris@42 1911 Tac = Ta3 + Ta2;
Chris@42 1912 Tae = Ta6 + Ta9;
Chris@42 1913 Tab = W[2];
Chris@42 1914 Tad = W[3];
Chris@42 1915 iio[WS(vs, 2) + WS(rs, 7)] = FNMS(Tad, Tae, Tab * Tac);
Chris@42 1916 rio[WS(vs, 2) + WS(rs, 7)] = FMA(Tad, Tac, Tab * Tae);
Chris@42 1917 }
Chris@42 1918 {
Chris@42 1919 E T8U, T8W, T8T, T8V;
Chris@42 1920 T8U = T8L + T8K;
Chris@42 1921 T8W = T8O + T8R;
Chris@42 1922 T8T = W[2];
Chris@42 1923 T8V = W[3];
Chris@42 1924 iio[WS(vs, 2) + WS(rs, 6)] = FNMS(T8V, T8W, T8T * T8U);
Chris@42 1925 rio[WS(vs, 2) + WS(rs, 6)] = FMA(T8V, T8U, T8T * T8W);
Chris@42 1926 }
Chris@42 1927 {
Chris@42 1928 E T1a, T1c, T19, T1b;
Chris@42 1929 T1a = T11 + T10;
Chris@42 1930 T1c = T14 + T17;
Chris@42 1931 T19 = W[2];
Chris@42 1932 T1b = W[3];
Chris@42 1933 iio[WS(vs, 2)] = FNMS(T1b, T1c, T19 * T1a);
Chris@42 1934 rio[WS(vs, 2)] = FMA(T1b, T1a, T19 * T1c);
Chris@42 1935 }
Chris@42 1936 {
Chris@42 1937 E T1e, T1i, T1d, T1f;
Chris@42 1938 T1e = T7 - Te;
Chris@42 1939 T1i = T1g - T1h;
Chris@42 1940 T1d = W[6];
Chris@42 1941 T1f = W[7];
Chris@42 1942 rio[WS(vs, 4)] = FMA(T1d, T1e, T1f * T1i);
Chris@42 1943 iio[WS(vs, 4)] = FNMS(T1f, T1e, T1d * T1i);
Chris@42 1944 }
Chris@42 1945 {
Chris@42 1946 E T2s, T2u, T2r, T2t;
Chris@42 1947 T2s = T2j + T2i;
Chris@42 1948 T2u = T2m + T2p;
Chris@42 1949 T2r = W[2];
Chris@42 1950 T2t = W[3];
Chris@42 1951 iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T2t, T2u, T2r * T2s);
Chris@42 1952 rio[WS(vs, 2) + WS(rs, 1)] = FMA(T2t, T2s, T2r * T2u);
Chris@42 1953 }
Chris@42 1954 {
Chris@42 1955 E T3C, T3I, T3x, T3D;
Chris@42 1956 T3C = T3A - T3B;
Chris@42 1957 T3I = T3E - T3H;
Chris@42 1958 T3x = W[10];
Chris@42 1959 T3D = W[11];
Chris@42 1960 iio[WS(vs, 6) + WS(rs, 2)] = FNMS(T3D, T3I, T3x * T3C);
Chris@42 1961 rio[WS(vs, 6) + WS(rs, 2)] = FMA(T3D, T3C, T3x * T3I);
Chris@42 1962 }
Chris@42 1963 {
Chris@42 1964 E T4U, T50, T4P, T4V;
Chris@42 1965 T4U = T4S - T4T;
Chris@42 1966 T50 = T4W - T4Z;
Chris@42 1967 T4P = W[10];
Chris@42 1968 T4V = W[11];
Chris@42 1969 iio[WS(vs, 6) + WS(rs, 3)] = FNMS(T4V, T50, T4P * T4U);
Chris@42 1970 rio[WS(vs, 6) + WS(rs, 3)] = FMA(T4V, T4U, T4P * T50);
Chris@42 1971 }
Chris@42 1972 {
Chris@42 1973 E T56, T5a, T55, T57;
Chris@42 1974 T56 = T3Z - T46;
Chris@42 1975 T5a = T58 - T59;
Chris@42 1976 T55 = W[6];
Chris@42 1977 T57 = W[7];
Chris@42 1978 rio[WS(vs, 4) + WS(rs, 3)] = FMA(T55, T56, T57 * T5a);
Chris@42 1979 iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T57, T56, T55 * T5a);
Chris@42 1980 }
Chris@42 1981 {
Chris@42 1982 E T6o, T6s, T6n, T6p;
Chris@42 1983 T6o = T5h - T5o;
Chris@42 1984 T6s = T6q - T6r;
Chris@42 1985 T6n = W[6];
Chris@42 1986 T6p = W[7];
Chris@42 1987 rio[WS(vs, 4) + WS(rs, 4)] = FMA(T6n, T6o, T6p * T6s);
Chris@42 1988 iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T6p, T6o, T6n * T6s);
Chris@42 1989 }
Chris@42 1990 {
Chris@42 1991 E T7u, T7A, T7p, T7v;
Chris@42 1992 T7u = T7s - T7t;
Chris@42 1993 T7A = T7w - T7z;
Chris@42 1994 T7p = W[10];
Chris@42 1995 T7v = W[11];
Chris@42 1996 iio[WS(vs, 6) + WS(rs, 5)] = FNMS(T7v, T7A, T7p * T7u);
Chris@42 1997 rio[WS(vs, 6) + WS(rs, 5)] = FMA(T7v, T7u, T7p * T7A);
Chris@42 1998 }
Chris@42 1999 {
Chris@42 2000 E T6c, T6i, T67, T6d;
Chris@42 2001 T6c = T6a - T6b;
Chris@42 2002 T6i = T6e - T6h;
Chris@42 2003 T67 = W[10];
Chris@42 2004 T6d = W[11];
Chris@42 2005 iio[WS(vs, 6) + WS(rs, 4)] = FNMS(T6d, T6i, T67 * T6c);
Chris@42 2006 rio[WS(vs, 6) + WS(rs, 4)] = FMA(T6d, T6c, T67 * T6i);
Chris@42 2007 }
Chris@42 2008 {
Chris@42 2009 E T7G, T7K, T7F, T7H;
Chris@42 2010 T7G = T6z - T6G;
Chris@42 2011 T7K = T7I - T7J;
Chris@42 2012 T7F = W[6];
Chris@42 2013 T7H = W[7];
Chris@42 2014 rio[WS(vs, 4) + WS(rs, 5)] = FMA(T7F, T7G, T7H * T7K);
Chris@42 2015 iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T7H, T7G, T7F * T7K);
Chris@42 2016 }
Chris@42 2017 {
Chris@42 2018 E T3O, T3S, T3N, T3P;
Chris@42 2019 T3O = T2H - T2O;
Chris@42 2020 T3S = T3Q - T3R;
Chris@42 2021 T3N = W[6];
Chris@42 2022 T3P = W[7];
Chris@42 2023 rio[WS(vs, 4) + WS(rs, 2)] = FMA(T3N, T3O, T3P * T3S);
Chris@42 2024 iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T3P, T3O, T3N * T3S);
Chris@42 2025 }
Chris@42 2026 {
Chris@42 2027 E T3K, T3M, T3J, T3L;
Chris@42 2028 T3K = T3B + T3A;
Chris@42 2029 T3M = T3E + T3H;
Chris@42 2030 T3J = W[2];
Chris@42 2031 T3L = W[3];
Chris@42 2032 iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T3L, T3M, T3J * T3K);
Chris@42 2033 rio[WS(vs, 2) + WS(rs, 2)] = FMA(T3L, T3K, T3J * T3M);
Chris@42 2034 }
Chris@42 2035 {
Chris@42 2036 E T7C, T7E, T7B, T7D;
Chris@42 2037 T7C = T7t + T7s;
Chris@42 2038 T7E = T7w + T7z;
Chris@42 2039 T7B = W[2];
Chris@42 2040 T7D = W[3];
Chris@42 2041 iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T7D, T7E, T7B * T7C);
Chris@42 2042 rio[WS(vs, 2) + WS(rs, 5)] = FMA(T7D, T7C, T7B * T7E);
Chris@42 2043 }
Chris@42 2044 {
Chris@42 2045 E T6k, T6m, T6j, T6l;
Chris@42 2046 T6k = T6b + T6a;
Chris@42 2047 T6m = T6e + T6h;
Chris@42 2048 T6j = W[2];
Chris@42 2049 T6l = W[3];
Chris@42 2050 iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T6l, T6m, T6j * T6k);
Chris@42 2051 rio[WS(vs, 2) + WS(rs, 4)] = FMA(T6l, T6k, T6j * T6m);
Chris@42 2052 }
Chris@42 2053 {
Chris@42 2054 E T52, T54, T51, T53;
Chris@42 2055 T52 = T4T + T4S;
Chris@42 2056 T54 = T4W + T4Z;
Chris@42 2057 T51 = W[2];
Chris@42 2058 T53 = W[3];
Chris@42 2059 iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T53, T54, T51 * T52);
Chris@42 2060 rio[WS(vs, 2) + WS(rs, 3)] = FMA(T53, T52, T51 * T54);
Chris@42 2061 }
Chris@42 2062 {
Chris@42 2063 E T5G, T5S, T5Q, T5U, T5F, T5P;
Chris@42 2064 T5F = KP707106781 * (T5z - T5E);
Chris@42 2065 T5G = T5u - T5F;
Chris@42 2066 T5S = T5u + T5F;
Chris@42 2067 T5P = KP707106781 * (T5N - T5O);
Chris@42 2068 T5Q = T5M - T5P;
Chris@42 2069 T5U = T5M + T5P;
Chris@42 2070 {
Chris@42 2071 E T5p, T5H, T5R, T5T;
Chris@42 2072 T5p = W[12];
Chris@42 2073 T5H = W[13];
Chris@42 2074 iio[WS(vs, 7) + WS(rs, 4)] = FNMS(T5H, T5Q, T5p * T5G);
Chris@42 2075 rio[WS(vs, 7) + WS(rs, 4)] = FMA(T5H, T5G, T5p * T5Q);
Chris@42 2076 T5R = W[4];
Chris@42 2077 T5T = W[5];
Chris@42 2078 iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T5T, T5U, T5R * T5S);
Chris@42 2079 rio[WS(vs, 3) + WS(rs, 4)] = FMA(T5T, T5S, T5R * T5U);
Chris@42 2080 }
Chris@42 2081 }
Chris@42 2082 {
Chris@42 2083 E Tw, TI, TG, TK, Tv, TF;
Chris@42 2084 Tv = KP707106781 * (Tp - Tu);
Chris@42 2085 Tw = Tk - Tv;
Chris@42 2086 TI = Tk + Tv;
Chris@42 2087 TF = KP707106781 * (TD - TE);
Chris@42 2088 TG = TC - TF;
Chris@42 2089 TK = TC + TF;
Chris@42 2090 {
Chris@42 2091 E Tf, Tx, TH, TJ;
Chris@42 2092 Tf = W[12];
Chris@42 2093 Tx = W[13];
Chris@42 2094 iio[WS(vs, 7)] = FNMS(Tx, TG, Tf * Tw);
Chris@42 2095 rio[WS(vs, 7)] = FMA(Tx, Tw, Tf * TG);
Chris@42 2096 TH = W[4];
Chris@42 2097 TJ = W[5];
Chris@42 2098 iio[WS(vs, 3)] = FNMS(TJ, TK, TH * TI);
Chris@42 2099 rio[WS(vs, 3)] = FMA(TJ, TI, TH * TK);
Chris@42 2100 }
Chris@42 2101 }
Chris@42 2102 {
Chris@42 2103 E T9Q, T9W, T9U, T9Y, T9P, T9T;
Chris@42 2104 T9P = KP707106781 * (T9w + T9r);
Chris@42 2105 T9Q = T9O - T9P;
Chris@42 2106 T9W = T9O + T9P;
Chris@42 2107 T9T = KP707106781 * (T9F + T9G);
Chris@42 2108 T9U = T9S - T9T;
Chris@42 2109 T9Y = T9S + T9T;
Chris@42 2110 {
Chris@42 2111 E T9N, T9R, T9V, T9X;
Chris@42 2112 T9N = W[8];
Chris@42 2113 T9R = W[9];
Chris@42 2114 rio[WS(vs, 5) + WS(rs, 7)] = FMA(T9N, T9Q, T9R * T9U);
Chris@42 2115 iio[WS(vs, 5) + WS(rs, 7)] = FNMS(T9R, T9Q, T9N * T9U);
Chris@42 2116 T9V = W[0];
Chris@42 2117 T9X = W[1];
Chris@42 2118 rio[WS(vs, 1) + WS(rs, 7)] = FMA(T9V, T9W, T9X * T9Y);
Chris@42 2119 iio[WS(vs, 1) + WS(rs, 7)] = FNMS(T9X, T9W, T9V * T9Y);
Chris@42 2120 }
Chris@42 2121 }
Chris@42 2122 {
Chris@42 2123 E T36, T3i, T3g, T3k, T35, T3f;
Chris@42 2124 T35 = KP707106781 * (T2Z - T34);
Chris@42 2125 T36 = T2U - T35;
Chris@42 2126 T3i = T2U + T35;
Chris@42 2127 T3f = KP707106781 * (T3d - T3e);
Chris@42 2128 T3g = T3c - T3f;
Chris@42 2129 T3k = T3c + T3f;
Chris@42 2130 {
Chris@42 2131 E T2P, T37, T3h, T3j;
Chris@42 2132 T2P = W[12];
Chris@42 2133 T37 = W[13];
Chris@42 2134 iio[WS(vs, 7) + WS(rs, 2)] = FNMS(T37, T3g, T2P * T36);
Chris@42 2135 rio[WS(vs, 7) + WS(rs, 2)] = FMA(T37, T36, T2P * T3g);
Chris@42 2136 T3h = W[4];
Chris@42 2137 T3j = W[5];
Chris@42 2138 iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T3j, T3k, T3h * T3i);
Chris@42 2139 rio[WS(vs, 3) + WS(rs, 2)] = FMA(T3j, T3i, T3h * T3k);
Chris@42 2140 }
Chris@42 2141 }
Chris@42 2142 {
Chris@42 2143 E T5Y, T64, T62, T66, T5X, T61;
Chris@42 2144 T5X = KP707106781 * (T5E + T5z);
Chris@42 2145 T5Y = T5W - T5X;
Chris@42 2146 T64 = T5W + T5X;
Chris@42 2147 T61 = KP707106781 * (T5N + T5O);
Chris@42 2148 T62 = T60 - T61;
Chris@42 2149 T66 = T60 + T61;
Chris@42 2150 {
Chris@42 2151 E T5V, T5Z, T63, T65;
Chris@42 2152 T5V = W[8];
Chris@42 2153 T5Z = W[9];
Chris@42 2154 rio[WS(vs, 5) + WS(rs, 4)] = FMA(T5V, T5Y, T5Z * T62);
Chris@42 2155 iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T5Z, T5Y, T5V * T62);
Chris@42 2156 T63 = W[0];
Chris@42 2157 T65 = W[1];
Chris@42 2158 rio[WS(vs, 1) + WS(rs, 4)] = FMA(T63, T64, T65 * T66);
Chris@42 2159 iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T65, T64, T63 * T66);
Chris@42 2160 }
Chris@42 2161 }
Chris@42 2162 {
Chris@42 2163 E T7g, T7m, T7k, T7o, T7f, T7j;
Chris@42 2164 T7f = KP707106781 * (T6W + T6R);
Chris@42 2165 T7g = T7e - T7f;
Chris@42 2166 T7m = T7e + T7f;
Chris@42 2167 T7j = KP707106781 * (T75 + T76);
Chris@42 2168 T7k = T7i - T7j;
Chris@42 2169 T7o = T7i + T7j;
Chris@42 2170 {
Chris@42 2171 E T7d, T7h, T7l, T7n;
Chris@42 2172 T7d = W[8];
Chris@42 2173 T7h = W[9];
Chris@42 2174 rio[WS(vs, 5) + WS(rs, 5)] = FMA(T7d, T7g, T7h * T7k);
Chris@42 2175 iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T7h, T7g, T7d * T7k);
Chris@42 2176 T7l = W[0];
Chris@42 2177 T7n = W[1];
Chris@42 2178 rio[WS(vs, 1) + WS(rs, 5)] = FMA(T7l, T7m, T7n * T7o);
Chris@42 2179 iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T7n, T7m, T7l * T7o);
Chris@42 2180 }
Chris@42 2181 }
Chris@42 2182 {
Chris@42 2183 E T8g, T8s, T8q, T8u, T8f, T8p;
Chris@42 2184 T8f = KP707106781 * (T89 - T8e);
Chris@42 2185 T8g = T84 - T8f;
Chris@42 2186 T8s = T84 + T8f;
Chris@42 2187 T8p = KP707106781 * (T8n - T8o);
Chris@42 2188 T8q = T8m - T8p;
Chris@42 2189 T8u = T8m + T8p;
Chris@42 2190 {
Chris@42 2191 E T7Z, T8h, T8r, T8t;
Chris@42 2192 T7Z = W[12];
Chris@42 2193 T8h = W[13];
Chris@42 2194 iio[WS(vs, 7) + WS(rs, 6)] = FNMS(T8h, T8q, T7Z * T8g);
Chris@42 2195 rio[WS(vs, 7) + WS(rs, 6)] = FMA(T8h, T8g, T7Z * T8q);
Chris@42 2196 T8r = W[4];
Chris@42 2197 T8t = W[5];
Chris@42 2198 iio[WS(vs, 3) + WS(rs, 6)] = FNMS(T8t, T8u, T8r * T8s);
Chris@42 2199 rio[WS(vs, 3) + WS(rs, 6)] = FMA(T8t, T8s, T8r * T8u);
Chris@42 2200 }
Chris@42 2201 }
Chris@42 2202 {
Chris@42 2203 E T4G, T4M, T4K, T4O, T4F, T4J;
Chris@42 2204 T4F = KP707106781 * (T4m + T4h);
Chris@42 2205 T4G = T4E - T4F;
Chris@42 2206 T4M = T4E + T4F;
Chris@42 2207 T4J = KP707106781 * (T4v + T4w);
Chris@42 2208 T4K = T4I - T4J;
Chris@42 2209 T4O = T4I + T4J;
Chris@42 2210 {
Chris@42 2211 E T4D, T4H, T4L, T4N;
Chris@42 2212 T4D = W[8];
Chris@42 2213 T4H = W[9];
Chris@42 2214 rio[WS(vs, 5) + WS(rs, 3)] = FMA(T4D, T4G, T4H * T4K);
Chris@42 2215 iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T4H, T4G, T4D * T4K);
Chris@42 2216 T4L = W[0];
Chris@42 2217 T4N = W[1];
Chris@42 2218 rio[WS(vs, 1) + WS(rs, 3)] = FMA(T4L, T4M, T4N * T4O);
Chris@42 2219 iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T4N, T4M, T4L * T4O);
Chris@42 2220 }
Chris@42 2221 }
Chris@42 2222 {
Chris@42 2223 E TO, TU, TS, TW, TN, TR;
Chris@42 2224 TN = KP707106781 * (Tu + Tp);
Chris@42 2225 TO = TM - TN;
Chris@42 2226 TU = TM + TN;
Chris@42 2227 TR = KP707106781 * (TD + TE);
Chris@42 2228 TS = TQ - TR;
Chris@42 2229 TW = TQ + TR;
Chris@42 2230 {
Chris@42 2231 E TL, TP, TT, TV;
Chris@42 2232 TL = W[8];
Chris@42 2233 TP = W[9];
Chris@42 2234 rio[WS(vs, 5)] = FMA(TL, TO, TP * TS);
Chris@42 2235 iio[WS(vs, 5)] = FNMS(TP, TO, TL * TS);
Chris@42 2236 TT = W[0];
Chris@42 2237 TV = W[1];
Chris@42 2238 rio[WS(vs, 1)] = FMA(TT, TU, TV * TW);
Chris@42 2239 iio[WS(vs, 1)] = FNMS(TV, TU, TT * TW);
Chris@42 2240 }
Chris@42 2241 }
Chris@42 2242 {
Chris@42 2243 E T26, T2c, T2a, T2e, T25, T29;
Chris@42 2244 T25 = KP707106781 * (T1M + T1H);
Chris@42 2245 T26 = T24 - T25;
Chris@42 2246 T2c = T24 + T25;
Chris@42 2247 T29 = KP707106781 * (T1V + T1W);
Chris@42 2248 T2a = T28 - T29;
Chris@42 2249 T2e = T28 + T29;
Chris@42 2250 {
Chris@42 2251 E T23, T27, T2b, T2d;
Chris@42 2252 T23 = W[8];
Chris@42 2253 T27 = W[9];
Chris@42 2254 rio[WS(vs, 5) + WS(rs, 1)] = FMA(T23, T26, T27 * T2a);
Chris@42 2255 iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T27, T26, T23 * T2a);
Chris@42 2256 T2b = W[0];
Chris@42 2257 T2d = W[1];
Chris@42 2258 rio[WS(vs, 1) + WS(rs, 1)] = FMA(T2b, T2c, T2d * T2e);
Chris@42 2259 iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T2d, T2c, T2b * T2e);
Chris@42 2260 }
Chris@42 2261 }
Chris@42 2262 {
Chris@42 2263 E T9y, T9K, T9I, T9M, T9x, T9H;
Chris@42 2264 T9x = KP707106781 * (T9r - T9w);
Chris@42 2265 T9y = T9m - T9x;
Chris@42 2266 T9K = T9m + T9x;
Chris@42 2267 T9H = KP707106781 * (T9F - T9G);
Chris@42 2268 T9I = T9E - T9H;
Chris@42 2269 T9M = T9E + T9H;
Chris@42 2270 {
Chris@42 2271 E T9h, T9z, T9J, T9L;
Chris@42 2272 T9h = W[12];
Chris@42 2273 T9z = W[13];
Chris@42 2274 iio[WS(vs, 7) + WS(rs, 7)] = FNMS(T9z, T9I, T9h * T9y);
Chris@42 2275 rio[WS(vs, 7) + WS(rs, 7)] = FMA(T9z, T9y, T9h * T9I);
Chris@42 2276 T9J = W[4];
Chris@42 2277 T9L = W[5];
Chris@42 2278 iio[WS(vs, 3) + WS(rs, 7)] = FNMS(T9L, T9M, T9J * T9K);
Chris@42 2279 rio[WS(vs, 3) + WS(rs, 7)] = FMA(T9L, T9K, T9J * T9M);
Chris@42 2280 }
Chris@42 2281 }
Chris@42 2282 {
Chris@42 2283 E T6Y, T7a, T78, T7c, T6X, T77;
Chris@42 2284 T6X = KP707106781 * (T6R - T6W);
Chris@42 2285 T6Y = T6M - T6X;
Chris@42 2286 T7a = T6M + T6X;
Chris@42 2287 T77 = KP707106781 * (T75 - T76);
Chris@42 2288 T78 = T74 - T77;
Chris@42 2289 T7c = T74 + T77;
Chris@42 2290 {
Chris@42 2291 E T6H, T6Z, T79, T7b;
Chris@42 2292 T6H = W[12];
Chris@42 2293 T6Z = W[13];
Chris@42 2294 iio[WS(vs, 7) + WS(rs, 5)] = FNMS(T6Z, T78, T6H * T6Y);
Chris@42 2295 rio[WS(vs, 7) + WS(rs, 5)] = FMA(T6Z, T6Y, T6H * T78);
Chris@42 2296 T79 = W[4];
Chris@42 2297 T7b = W[5];
Chris@42 2298 iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T7b, T7c, T79 * T7a);
Chris@42 2299 rio[WS(vs, 3) + WS(rs, 5)] = FMA(T7b, T7a, T79 * T7c);
Chris@42 2300 }
Chris@42 2301 }
Chris@42 2302 {
Chris@42 2303 E T1O, T20, T1Y, T22, T1N, T1X;
Chris@42 2304 T1N = KP707106781 * (T1H - T1M);
Chris@42 2305 T1O = T1C - T1N;
Chris@42 2306 T20 = T1C + T1N;
Chris@42 2307 T1X = KP707106781 * (T1V - T1W);
Chris@42 2308 T1Y = T1U - T1X;
Chris@42 2309 T22 = T1U + T1X;
Chris@42 2310 {
Chris@42 2311 E T1x, T1P, T1Z, T21;
Chris@42 2312 T1x = W[12];
Chris@42 2313 T1P = W[13];
Chris@42 2314 iio[WS(vs, 7) + WS(rs, 1)] = FNMS(T1P, T1Y, T1x * T1O);
Chris@42 2315 rio[WS(vs, 7) + WS(rs, 1)] = FMA(T1P, T1O, T1x * T1Y);
Chris@42 2316 T1Z = W[4];
Chris@42 2317 T21 = W[5];
Chris@42 2318 iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T21, T22, T1Z * T20);
Chris@42 2319 rio[WS(vs, 3) + WS(rs, 1)] = FMA(T21, T20, T1Z * T22);
Chris@42 2320 }
Chris@42 2321 }
Chris@42 2322 {
Chris@42 2323 E T4o, T4A, T4y, T4C, T4n, T4x;
Chris@42 2324 T4n = KP707106781 * (T4h - T4m);
Chris@42 2325 T4o = T4c - T4n;
Chris@42 2326 T4A = T4c + T4n;
Chris@42 2327 T4x = KP707106781 * (T4v - T4w);
Chris@42 2328 T4y = T4u - T4x;
Chris@42 2329 T4C = T4u + T4x;
Chris@42 2330 {
Chris@42 2331 E T47, T4p, T4z, T4B;
Chris@42 2332 T47 = W[12];
Chris@42 2333 T4p = W[13];
Chris@42 2334 iio[WS(vs, 7) + WS(rs, 3)] = FNMS(T4p, T4y, T47 * T4o);
Chris@42 2335 rio[WS(vs, 7) + WS(rs, 3)] = FMA(T4p, T4o, T47 * T4y);
Chris@42 2336 T4z = W[4];
Chris@42 2337 T4B = W[5];
Chris@42 2338 iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T4B, T4C, T4z * T4A);
Chris@42 2339 rio[WS(vs, 3) + WS(rs, 3)] = FMA(T4B, T4A, T4z * T4C);
Chris@42 2340 }
Chris@42 2341 }
Chris@42 2342 {
Chris@42 2343 E T3o, T3u, T3s, T3w, T3n, T3r;
Chris@42 2344 T3n = KP707106781 * (T34 + T2Z);
Chris@42 2345 T3o = T3m - T3n;
Chris@42 2346 T3u = T3m + T3n;
Chris@42 2347 T3r = KP707106781 * (T3d + T3e);
Chris@42 2348 T3s = T3q - T3r;
Chris@42 2349 T3w = T3q + T3r;
Chris@42 2350 {
Chris@42 2351 E T3l, T3p, T3t, T3v;
Chris@42 2352 T3l = W[8];
Chris@42 2353 T3p = W[9];
Chris@42 2354 rio[WS(vs, 5) + WS(rs, 2)] = FMA(T3l, T3o, T3p * T3s);
Chris@42 2355 iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T3p, T3o, T3l * T3s);
Chris@42 2356 T3t = W[0];
Chris@42 2357 T3v = W[1];
Chris@42 2358 rio[WS(vs, 1) + WS(rs, 2)] = FMA(T3t, T3u, T3v * T3w);
Chris@42 2359 iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T3v, T3u, T3t * T3w);
Chris@42 2360 }
Chris@42 2361 }
Chris@42 2362 {
Chris@42 2363 E T8y, T8E, T8C, T8G, T8x, T8B;
Chris@42 2364 T8x = KP707106781 * (T8e + T89);
Chris@42 2365 T8y = T8w - T8x;
Chris@42 2366 T8E = T8w + T8x;
Chris@42 2367 T8B = KP707106781 * (T8n + T8o);
Chris@42 2368 T8C = T8A - T8B;
Chris@42 2369 T8G = T8A + T8B;
Chris@42 2370 {
Chris@42 2371 E T8v, T8z, T8D, T8F;
Chris@42 2372 T8v = W[8];
Chris@42 2373 T8z = W[9];
Chris@42 2374 rio[WS(vs, 5) + WS(rs, 6)] = FMA(T8v, T8y, T8z * T8C);
Chris@42 2375 iio[WS(vs, 5) + WS(rs, 6)] = FNMS(T8z, T8y, T8v * T8C);
Chris@42 2376 T8D = W[0];
Chris@42 2377 T8F = W[1];
Chris@42 2378 rio[WS(vs, 1) + WS(rs, 6)] = FMA(T8D, T8E, T8F * T8G);
Chris@42 2379 iio[WS(vs, 1) + WS(rs, 6)] = FNMS(T8F, T8E, T8D * T8G);
Chris@42 2380 }
Chris@42 2381 }
Chris@42 2382 }
Chris@42 2383 }
Chris@42 2384 }
Chris@42 2385
Chris@42 2386 static const tw_instr twinstr[] = {
Chris@42 2387 {TW_FULL, 0, 8},
Chris@42 2388 {TW_NEXT, 1, 0}
Chris@42 2389 };
Chris@42 2390
Chris@42 2391 static const ct_desc desc = { 8, "q1_8", twinstr, &GENUS, {416, 144, 112, 0}, 0, 0, 0 };
Chris@42 2392
Chris@42 2393 void X(codelet_q1_8) (planner *p) {
Chris@42 2394 X(kdft_difsq_register) (p, q1_8, &desc);
Chris@42 2395 }
Chris@42 2396 #endif /* HAVE_FMA */