annotate src/fftw-3.3.8/dft/scalar/codelets/q1_8.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:30 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_twidsq.native -fma -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 8 -name q1_8 -include dft/scalar/q.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 528 FP additions, 288 FP multiplications,
Chris@82 32 * (or, 352 additions, 112 multiplications, 176 fused multiply/add),
Chris@82 33 * 152 stack variables, 1 constants, and 256 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/q.h"
Chris@82 36
Chris@82 37 static void q1_8(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
Chris@82 38 {
Chris@82 39 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 40 {
Chris@82 41 INT m;
Chris@82 42 for (m = mb, W = W + (mb * 14); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
Chris@82 43 E T7, T1d, T1t, Tk, TD, TV, T18, TQ, T4F, T5L, T61, T4S, T5b, T5t, T5G;
Chris@82 44 E T5o, T6b, T7h, T7x, T6o, T6H, T6Z, T7c, T6U, TaJ, TbP, Tc5, TaW, Tbf, Tbx;
Chris@82 45 E TbK, Tbs, T1D, T2J, T2Z, T1Q, T29, T2r, T2E, T2m, T39, T4f, T4v, T3m, T3F;
Chris@82 46 E T3X, T4a, T3S, T7H, T8N, T93, T7U, T8d, T8v, T8I, T8q, T9d, Taj, Taz, T9q;
Chris@82 47 E T9J, Ta1, Tae, T9W, Te, T19, T1u, T1g, TE, TF, TW, Tv, TR, T4M, T5H;
Chris@82 48 E T62, T5O, T5c, T5d, T5u, T53, T5p, T6i, T7d, T7y, T7k, T6I, T6J, T70, T6z;
Chris@82 49 E T6V, TaQ, TbL, Tc6, TbS, Tbg, Tbh, Tby, Tb7, Tbt, T1K, T2F, T30, T2M, T2a;
Chris@82 50 E T2b, T2s, T21, T2n, T3g, T4b, T4w, T4i, T3G, T3H, T3Y, T3x, T3T, T7O, T8J;
Chris@82 51 E T94, T8Q, T8e, T8f, T8w, T85, T8r, T9k, Taf, TaA, Tam, T9K, T9L, Ta2, T9B;
Chris@82 52 E T9X;
Chris@82 53 {
Chris@82 54 E T3, Tz, Tj, T16, T6, Tg, TC, T17;
Chris@82 55 {
Chris@82 56 E T1, T2, Th, Ti;
Chris@82 57 T1 = rio[0];
Chris@82 58 T2 = rio[WS(rs, 4)];
Chris@82 59 T3 = T1 + T2;
Chris@82 60 Tz = T1 - T2;
Chris@82 61 Th = iio[0];
Chris@82 62 Ti = iio[WS(rs, 4)];
Chris@82 63 Tj = Th - Ti;
Chris@82 64 T16 = Th + Ti;
Chris@82 65 }
Chris@82 66 {
Chris@82 67 E T4, T5, TA, TB;
Chris@82 68 T4 = rio[WS(rs, 2)];
Chris@82 69 T5 = rio[WS(rs, 6)];
Chris@82 70 T6 = T4 + T5;
Chris@82 71 Tg = T4 - T5;
Chris@82 72 TA = iio[WS(rs, 2)];
Chris@82 73 TB = iio[WS(rs, 6)];
Chris@82 74 TC = TA - TB;
Chris@82 75 T17 = TA + TB;
Chris@82 76 }
Chris@82 77 T7 = T3 + T6;
Chris@82 78 T1d = T3 - T6;
Chris@82 79 T1t = T16 + T17;
Chris@82 80 Tk = Tg + Tj;
Chris@82 81 TD = Tz - TC;
Chris@82 82 TV = Tj - Tg;
Chris@82 83 T18 = T16 - T17;
Chris@82 84 TQ = Tz + TC;
Chris@82 85 }
Chris@82 86 {
Chris@82 87 E T4B, T57, T4R, T5E, T4E, T4O, T5a, T5F;
Chris@82 88 {
Chris@82 89 E T4z, T4A, T4P, T4Q;
Chris@82 90 T4z = rio[WS(vs, 3)];
Chris@82 91 T4A = rio[WS(vs, 3) + WS(rs, 4)];
Chris@82 92 T4B = T4z + T4A;
Chris@82 93 T57 = T4z - T4A;
Chris@82 94 T4P = iio[WS(vs, 3)];
Chris@82 95 T4Q = iio[WS(vs, 3) + WS(rs, 4)];
Chris@82 96 T4R = T4P - T4Q;
Chris@82 97 T5E = T4P + T4Q;
Chris@82 98 }
Chris@82 99 {
Chris@82 100 E T4C, T4D, T58, T59;
Chris@82 101 T4C = rio[WS(vs, 3) + WS(rs, 2)];
Chris@82 102 T4D = rio[WS(vs, 3) + WS(rs, 6)];
Chris@82 103 T4E = T4C + T4D;
Chris@82 104 T4O = T4C - T4D;
Chris@82 105 T58 = iio[WS(vs, 3) + WS(rs, 2)];
Chris@82 106 T59 = iio[WS(vs, 3) + WS(rs, 6)];
Chris@82 107 T5a = T58 - T59;
Chris@82 108 T5F = T58 + T59;
Chris@82 109 }
Chris@82 110 T4F = T4B + T4E;
Chris@82 111 T5L = T4B - T4E;
Chris@82 112 T61 = T5E + T5F;
Chris@82 113 T4S = T4O + T4R;
Chris@82 114 T5b = T57 - T5a;
Chris@82 115 T5t = T4R - T4O;
Chris@82 116 T5G = T5E - T5F;
Chris@82 117 T5o = T57 + T5a;
Chris@82 118 }
Chris@82 119 {
Chris@82 120 E T67, T6D, T6n, T7a, T6a, T6k, T6G, T7b;
Chris@82 121 {
Chris@82 122 E T65, T66, T6l, T6m;
Chris@82 123 T65 = rio[WS(vs, 4)];
Chris@82 124 T66 = rio[WS(vs, 4) + WS(rs, 4)];
Chris@82 125 T67 = T65 + T66;
Chris@82 126 T6D = T65 - T66;
Chris@82 127 T6l = iio[WS(vs, 4)];
Chris@82 128 T6m = iio[WS(vs, 4) + WS(rs, 4)];
Chris@82 129 T6n = T6l - T6m;
Chris@82 130 T7a = T6l + T6m;
Chris@82 131 }
Chris@82 132 {
Chris@82 133 E T68, T69, T6E, T6F;
Chris@82 134 T68 = rio[WS(vs, 4) + WS(rs, 2)];
Chris@82 135 T69 = rio[WS(vs, 4) + WS(rs, 6)];
Chris@82 136 T6a = T68 + T69;
Chris@82 137 T6k = T68 - T69;
Chris@82 138 T6E = iio[WS(vs, 4) + WS(rs, 2)];
Chris@82 139 T6F = iio[WS(vs, 4) + WS(rs, 6)];
Chris@82 140 T6G = T6E - T6F;
Chris@82 141 T7b = T6E + T6F;
Chris@82 142 }
Chris@82 143 T6b = T67 + T6a;
Chris@82 144 T7h = T67 - T6a;
Chris@82 145 T7x = T7a + T7b;
Chris@82 146 T6o = T6k + T6n;
Chris@82 147 T6H = T6D - T6G;
Chris@82 148 T6Z = T6n - T6k;
Chris@82 149 T7c = T7a - T7b;
Chris@82 150 T6U = T6D + T6G;
Chris@82 151 }
Chris@82 152 {
Chris@82 153 E TaF, Tbb, TaV, TbI, TaI, TaS, Tbe, TbJ;
Chris@82 154 {
Chris@82 155 E TaD, TaE, TaT, TaU;
Chris@82 156 TaD = rio[WS(vs, 7)];
Chris@82 157 TaE = rio[WS(vs, 7) + WS(rs, 4)];
Chris@82 158 TaF = TaD + TaE;
Chris@82 159 Tbb = TaD - TaE;
Chris@82 160 TaT = iio[WS(vs, 7)];
Chris@82 161 TaU = iio[WS(vs, 7) + WS(rs, 4)];
Chris@82 162 TaV = TaT - TaU;
Chris@82 163 TbI = TaT + TaU;
Chris@82 164 }
Chris@82 165 {
Chris@82 166 E TaG, TaH, Tbc, Tbd;
Chris@82 167 TaG = rio[WS(vs, 7) + WS(rs, 2)];
Chris@82 168 TaH = rio[WS(vs, 7) + WS(rs, 6)];
Chris@82 169 TaI = TaG + TaH;
Chris@82 170 TaS = TaG - TaH;
Chris@82 171 Tbc = iio[WS(vs, 7) + WS(rs, 2)];
Chris@82 172 Tbd = iio[WS(vs, 7) + WS(rs, 6)];
Chris@82 173 Tbe = Tbc - Tbd;
Chris@82 174 TbJ = Tbc + Tbd;
Chris@82 175 }
Chris@82 176 TaJ = TaF + TaI;
Chris@82 177 TbP = TaF - TaI;
Chris@82 178 Tc5 = TbI + TbJ;
Chris@82 179 TaW = TaS + TaV;
Chris@82 180 Tbf = Tbb - Tbe;
Chris@82 181 Tbx = TaV - TaS;
Chris@82 182 TbK = TbI - TbJ;
Chris@82 183 Tbs = Tbb + Tbe;
Chris@82 184 }
Chris@82 185 {
Chris@82 186 E T1z, T25, T1P, T2C, T1C, T1M, T28, T2D;
Chris@82 187 {
Chris@82 188 E T1x, T1y, T1N, T1O;
Chris@82 189 T1x = rio[WS(vs, 1)];
Chris@82 190 T1y = rio[WS(vs, 1) + WS(rs, 4)];
Chris@82 191 T1z = T1x + T1y;
Chris@82 192 T25 = T1x - T1y;
Chris@82 193 T1N = iio[WS(vs, 1)];
Chris@82 194 T1O = iio[WS(vs, 1) + WS(rs, 4)];
Chris@82 195 T1P = T1N - T1O;
Chris@82 196 T2C = T1N + T1O;
Chris@82 197 }
Chris@82 198 {
Chris@82 199 E T1A, T1B, T26, T27;
Chris@82 200 T1A = rio[WS(vs, 1) + WS(rs, 2)];
Chris@82 201 T1B = rio[WS(vs, 1) + WS(rs, 6)];
Chris@82 202 T1C = T1A + T1B;
Chris@82 203 T1M = T1A - T1B;
Chris@82 204 T26 = iio[WS(vs, 1) + WS(rs, 2)];
Chris@82 205 T27 = iio[WS(vs, 1) + WS(rs, 6)];
Chris@82 206 T28 = T26 - T27;
Chris@82 207 T2D = T26 + T27;
Chris@82 208 }
Chris@82 209 T1D = T1z + T1C;
Chris@82 210 T2J = T1z - T1C;
Chris@82 211 T2Z = T2C + T2D;
Chris@82 212 T1Q = T1M + T1P;
Chris@82 213 T29 = T25 - T28;
Chris@82 214 T2r = T1P - T1M;
Chris@82 215 T2E = T2C - T2D;
Chris@82 216 T2m = T25 + T28;
Chris@82 217 }
Chris@82 218 {
Chris@82 219 E T35, T3B, T3l, T48, T38, T3i, T3E, T49;
Chris@82 220 {
Chris@82 221 E T33, T34, T3j, T3k;
Chris@82 222 T33 = rio[WS(vs, 2)];
Chris@82 223 T34 = rio[WS(vs, 2) + WS(rs, 4)];
Chris@82 224 T35 = T33 + T34;
Chris@82 225 T3B = T33 - T34;
Chris@82 226 T3j = iio[WS(vs, 2)];
Chris@82 227 T3k = iio[WS(vs, 2) + WS(rs, 4)];
Chris@82 228 T3l = T3j - T3k;
Chris@82 229 T48 = T3j + T3k;
Chris@82 230 }
Chris@82 231 {
Chris@82 232 E T36, T37, T3C, T3D;
Chris@82 233 T36 = rio[WS(vs, 2) + WS(rs, 2)];
Chris@82 234 T37 = rio[WS(vs, 2) + WS(rs, 6)];
Chris@82 235 T38 = T36 + T37;
Chris@82 236 T3i = T36 - T37;
Chris@82 237 T3C = iio[WS(vs, 2) + WS(rs, 2)];
Chris@82 238 T3D = iio[WS(vs, 2) + WS(rs, 6)];
Chris@82 239 T3E = T3C - T3D;
Chris@82 240 T49 = T3C + T3D;
Chris@82 241 }
Chris@82 242 T39 = T35 + T38;
Chris@82 243 T4f = T35 - T38;
Chris@82 244 T4v = T48 + T49;
Chris@82 245 T3m = T3i + T3l;
Chris@82 246 T3F = T3B - T3E;
Chris@82 247 T3X = T3l - T3i;
Chris@82 248 T4a = T48 - T49;
Chris@82 249 T3S = T3B + T3E;
Chris@82 250 }
Chris@82 251 {
Chris@82 252 E T7D, T89, T7T, T8G, T7G, T7Q, T8c, T8H;
Chris@82 253 {
Chris@82 254 E T7B, T7C, T7R, T7S;
Chris@82 255 T7B = rio[WS(vs, 5)];
Chris@82 256 T7C = rio[WS(vs, 5) + WS(rs, 4)];
Chris@82 257 T7D = T7B + T7C;
Chris@82 258 T89 = T7B - T7C;
Chris@82 259 T7R = iio[WS(vs, 5)];
Chris@82 260 T7S = iio[WS(vs, 5) + WS(rs, 4)];
Chris@82 261 T7T = T7R - T7S;
Chris@82 262 T8G = T7R + T7S;
Chris@82 263 }
Chris@82 264 {
Chris@82 265 E T7E, T7F, T8a, T8b;
Chris@82 266 T7E = rio[WS(vs, 5) + WS(rs, 2)];
Chris@82 267 T7F = rio[WS(vs, 5) + WS(rs, 6)];
Chris@82 268 T7G = T7E + T7F;
Chris@82 269 T7Q = T7E - T7F;
Chris@82 270 T8a = iio[WS(vs, 5) + WS(rs, 2)];
Chris@82 271 T8b = iio[WS(vs, 5) + WS(rs, 6)];
Chris@82 272 T8c = T8a - T8b;
Chris@82 273 T8H = T8a + T8b;
Chris@82 274 }
Chris@82 275 T7H = T7D + T7G;
Chris@82 276 T8N = T7D - T7G;
Chris@82 277 T93 = T8G + T8H;
Chris@82 278 T7U = T7Q + T7T;
Chris@82 279 T8d = T89 - T8c;
Chris@82 280 T8v = T7T - T7Q;
Chris@82 281 T8I = T8G - T8H;
Chris@82 282 T8q = T89 + T8c;
Chris@82 283 }
Chris@82 284 {
Chris@82 285 E T99, T9F, T9p, Tac, T9c, T9m, T9I, Tad;
Chris@82 286 {
Chris@82 287 E T97, T98, T9n, T9o;
Chris@82 288 T97 = rio[WS(vs, 6)];
Chris@82 289 T98 = rio[WS(vs, 6) + WS(rs, 4)];
Chris@82 290 T99 = T97 + T98;
Chris@82 291 T9F = T97 - T98;
Chris@82 292 T9n = iio[WS(vs, 6)];
Chris@82 293 T9o = iio[WS(vs, 6) + WS(rs, 4)];
Chris@82 294 T9p = T9n - T9o;
Chris@82 295 Tac = T9n + T9o;
Chris@82 296 }
Chris@82 297 {
Chris@82 298 E T9a, T9b, T9G, T9H;
Chris@82 299 T9a = rio[WS(vs, 6) + WS(rs, 2)];
Chris@82 300 T9b = rio[WS(vs, 6) + WS(rs, 6)];
Chris@82 301 T9c = T9a + T9b;
Chris@82 302 T9m = T9a - T9b;
Chris@82 303 T9G = iio[WS(vs, 6) + WS(rs, 2)];
Chris@82 304 T9H = iio[WS(vs, 6) + WS(rs, 6)];
Chris@82 305 T9I = T9G - T9H;
Chris@82 306 Tad = T9G + T9H;
Chris@82 307 }
Chris@82 308 T9d = T99 + T9c;
Chris@82 309 Taj = T99 - T9c;
Chris@82 310 Taz = Tac + Tad;
Chris@82 311 T9q = T9m + T9p;
Chris@82 312 T9J = T9F - T9I;
Chris@82 313 Ta1 = T9p - T9m;
Chris@82 314 Tae = Tac - Tad;
Chris@82 315 T9W = T9F + T9I;
Chris@82 316 }
Chris@82 317 {
Chris@82 318 E Ta, Tq, Tt, T1e, Td, Tl, To, T1f, Tp, Tu;
Chris@82 319 {
Chris@82 320 E T8, T9, Tr, Ts;
Chris@82 321 T8 = rio[WS(rs, 1)];
Chris@82 322 T9 = rio[WS(rs, 5)];
Chris@82 323 Ta = T8 + T9;
Chris@82 324 Tq = T8 - T9;
Chris@82 325 Tr = iio[WS(rs, 1)];
Chris@82 326 Ts = iio[WS(rs, 5)];
Chris@82 327 Tt = Tr - Ts;
Chris@82 328 T1e = Tr + Ts;
Chris@82 329 }
Chris@82 330 {
Chris@82 331 E Tb, Tc, Tm, Tn;
Chris@82 332 Tb = rio[WS(rs, 7)];
Chris@82 333 Tc = rio[WS(rs, 3)];
Chris@82 334 Td = Tb + Tc;
Chris@82 335 Tl = Tb - Tc;
Chris@82 336 Tm = iio[WS(rs, 7)];
Chris@82 337 Tn = iio[WS(rs, 3)];
Chris@82 338 To = Tm - Tn;
Chris@82 339 T1f = Tm + Tn;
Chris@82 340 }
Chris@82 341 Te = Ta + Td;
Chris@82 342 T19 = Td - Ta;
Chris@82 343 T1u = T1e + T1f;
Chris@82 344 T1g = T1e - T1f;
Chris@82 345 TE = Tt - Tq;
Chris@82 346 TF = Tl + To;
Chris@82 347 TW = TE + TF;
Chris@82 348 Tp = Tl - To;
Chris@82 349 Tu = Tq + Tt;
Chris@82 350 Tv = Tp - Tu;
Chris@82 351 TR = Tu + Tp;
Chris@82 352 }
Chris@82 353 {
Chris@82 354 E T4I, T4Y, T51, T5M, T4L, T4T, T4W, T5N, T4X, T52;
Chris@82 355 {
Chris@82 356 E T4G, T4H, T4Z, T50;
Chris@82 357 T4G = rio[WS(vs, 3) + WS(rs, 1)];
Chris@82 358 T4H = rio[WS(vs, 3) + WS(rs, 5)];
Chris@82 359 T4I = T4G + T4H;
Chris@82 360 T4Y = T4G - T4H;
Chris@82 361 T4Z = iio[WS(vs, 3) + WS(rs, 1)];
Chris@82 362 T50 = iio[WS(vs, 3) + WS(rs, 5)];
Chris@82 363 T51 = T4Z - T50;
Chris@82 364 T5M = T4Z + T50;
Chris@82 365 }
Chris@82 366 {
Chris@82 367 E T4J, T4K, T4U, T4V;
Chris@82 368 T4J = rio[WS(vs, 3) + WS(rs, 7)];
Chris@82 369 T4K = rio[WS(vs, 3) + WS(rs, 3)];
Chris@82 370 T4L = T4J + T4K;
Chris@82 371 T4T = T4J - T4K;
Chris@82 372 T4U = iio[WS(vs, 3) + WS(rs, 7)];
Chris@82 373 T4V = iio[WS(vs, 3) + WS(rs, 3)];
Chris@82 374 T4W = T4U - T4V;
Chris@82 375 T5N = T4U + T4V;
Chris@82 376 }
Chris@82 377 T4M = T4I + T4L;
Chris@82 378 T5H = T4L - T4I;
Chris@82 379 T62 = T5M + T5N;
Chris@82 380 T5O = T5M - T5N;
Chris@82 381 T5c = T51 - T4Y;
Chris@82 382 T5d = T4T + T4W;
Chris@82 383 T5u = T5c + T5d;
Chris@82 384 T4X = T4T - T4W;
Chris@82 385 T52 = T4Y + T51;
Chris@82 386 T53 = T4X - T52;
Chris@82 387 T5p = T52 + T4X;
Chris@82 388 }
Chris@82 389 {
Chris@82 390 E T6e, T6u, T6x, T7i, T6h, T6p, T6s, T7j, T6t, T6y;
Chris@82 391 {
Chris@82 392 E T6c, T6d, T6v, T6w;
Chris@82 393 T6c = rio[WS(vs, 4) + WS(rs, 1)];
Chris@82 394 T6d = rio[WS(vs, 4) + WS(rs, 5)];
Chris@82 395 T6e = T6c + T6d;
Chris@82 396 T6u = T6c - T6d;
Chris@82 397 T6v = iio[WS(vs, 4) + WS(rs, 1)];
Chris@82 398 T6w = iio[WS(vs, 4) + WS(rs, 5)];
Chris@82 399 T6x = T6v - T6w;
Chris@82 400 T7i = T6v + T6w;
Chris@82 401 }
Chris@82 402 {
Chris@82 403 E T6f, T6g, T6q, T6r;
Chris@82 404 T6f = rio[WS(vs, 4) + WS(rs, 7)];
Chris@82 405 T6g = rio[WS(vs, 4) + WS(rs, 3)];
Chris@82 406 T6h = T6f + T6g;
Chris@82 407 T6p = T6f - T6g;
Chris@82 408 T6q = iio[WS(vs, 4) + WS(rs, 7)];
Chris@82 409 T6r = iio[WS(vs, 4) + WS(rs, 3)];
Chris@82 410 T6s = T6q - T6r;
Chris@82 411 T7j = T6q + T6r;
Chris@82 412 }
Chris@82 413 T6i = T6e + T6h;
Chris@82 414 T7d = T6h - T6e;
Chris@82 415 T7y = T7i + T7j;
Chris@82 416 T7k = T7i - T7j;
Chris@82 417 T6I = T6x - T6u;
Chris@82 418 T6J = T6p + T6s;
Chris@82 419 T70 = T6I + T6J;
Chris@82 420 T6t = T6p - T6s;
Chris@82 421 T6y = T6u + T6x;
Chris@82 422 T6z = T6t - T6y;
Chris@82 423 T6V = T6y + T6t;
Chris@82 424 }
Chris@82 425 {
Chris@82 426 E TaM, Tb2, Tb5, TbQ, TaP, TaX, Tb0, TbR, Tb1, Tb6;
Chris@82 427 {
Chris@82 428 E TaK, TaL, Tb3, Tb4;
Chris@82 429 TaK = rio[WS(vs, 7) + WS(rs, 1)];
Chris@82 430 TaL = rio[WS(vs, 7) + WS(rs, 5)];
Chris@82 431 TaM = TaK + TaL;
Chris@82 432 Tb2 = TaK - TaL;
Chris@82 433 Tb3 = iio[WS(vs, 7) + WS(rs, 1)];
Chris@82 434 Tb4 = iio[WS(vs, 7) + WS(rs, 5)];
Chris@82 435 Tb5 = Tb3 - Tb4;
Chris@82 436 TbQ = Tb3 + Tb4;
Chris@82 437 }
Chris@82 438 {
Chris@82 439 E TaN, TaO, TaY, TaZ;
Chris@82 440 TaN = rio[WS(vs, 7) + WS(rs, 7)];
Chris@82 441 TaO = rio[WS(vs, 7) + WS(rs, 3)];
Chris@82 442 TaP = TaN + TaO;
Chris@82 443 TaX = TaN - TaO;
Chris@82 444 TaY = iio[WS(vs, 7) + WS(rs, 7)];
Chris@82 445 TaZ = iio[WS(vs, 7) + WS(rs, 3)];
Chris@82 446 Tb0 = TaY - TaZ;
Chris@82 447 TbR = TaY + TaZ;
Chris@82 448 }
Chris@82 449 TaQ = TaM + TaP;
Chris@82 450 TbL = TaP - TaM;
Chris@82 451 Tc6 = TbQ + TbR;
Chris@82 452 TbS = TbQ - TbR;
Chris@82 453 Tbg = Tb5 - Tb2;
Chris@82 454 Tbh = TaX + Tb0;
Chris@82 455 Tby = Tbg + Tbh;
Chris@82 456 Tb1 = TaX - Tb0;
Chris@82 457 Tb6 = Tb2 + Tb5;
Chris@82 458 Tb7 = Tb1 - Tb6;
Chris@82 459 Tbt = Tb6 + Tb1;
Chris@82 460 }
Chris@82 461 {
Chris@82 462 E T1G, T1W, T1Z, T2K, T1J, T1R, T1U, T2L, T1V, T20;
Chris@82 463 {
Chris@82 464 E T1E, T1F, T1X, T1Y;
Chris@82 465 T1E = rio[WS(vs, 1) + WS(rs, 1)];
Chris@82 466 T1F = rio[WS(vs, 1) + WS(rs, 5)];
Chris@82 467 T1G = T1E + T1F;
Chris@82 468 T1W = T1E - T1F;
Chris@82 469 T1X = iio[WS(vs, 1) + WS(rs, 1)];
Chris@82 470 T1Y = iio[WS(vs, 1) + WS(rs, 5)];
Chris@82 471 T1Z = T1X - T1Y;
Chris@82 472 T2K = T1X + T1Y;
Chris@82 473 }
Chris@82 474 {
Chris@82 475 E T1H, T1I, T1S, T1T;
Chris@82 476 T1H = rio[WS(vs, 1) + WS(rs, 7)];
Chris@82 477 T1I = rio[WS(vs, 1) + WS(rs, 3)];
Chris@82 478 T1J = T1H + T1I;
Chris@82 479 T1R = T1H - T1I;
Chris@82 480 T1S = iio[WS(vs, 1) + WS(rs, 7)];
Chris@82 481 T1T = iio[WS(vs, 1) + WS(rs, 3)];
Chris@82 482 T1U = T1S - T1T;
Chris@82 483 T2L = T1S + T1T;
Chris@82 484 }
Chris@82 485 T1K = T1G + T1J;
Chris@82 486 T2F = T1J - T1G;
Chris@82 487 T30 = T2K + T2L;
Chris@82 488 T2M = T2K - T2L;
Chris@82 489 T2a = T1Z - T1W;
Chris@82 490 T2b = T1R + T1U;
Chris@82 491 T2s = T2a + T2b;
Chris@82 492 T1V = T1R - T1U;
Chris@82 493 T20 = T1W + T1Z;
Chris@82 494 T21 = T1V - T20;
Chris@82 495 T2n = T20 + T1V;
Chris@82 496 }
Chris@82 497 {
Chris@82 498 E T3c, T3s, T3v, T4g, T3f, T3n, T3q, T4h, T3r, T3w;
Chris@82 499 {
Chris@82 500 E T3a, T3b, T3t, T3u;
Chris@82 501 T3a = rio[WS(vs, 2) + WS(rs, 1)];
Chris@82 502 T3b = rio[WS(vs, 2) + WS(rs, 5)];
Chris@82 503 T3c = T3a + T3b;
Chris@82 504 T3s = T3a - T3b;
Chris@82 505 T3t = iio[WS(vs, 2) + WS(rs, 1)];
Chris@82 506 T3u = iio[WS(vs, 2) + WS(rs, 5)];
Chris@82 507 T3v = T3t - T3u;
Chris@82 508 T4g = T3t + T3u;
Chris@82 509 }
Chris@82 510 {
Chris@82 511 E T3d, T3e, T3o, T3p;
Chris@82 512 T3d = rio[WS(vs, 2) + WS(rs, 7)];
Chris@82 513 T3e = rio[WS(vs, 2) + WS(rs, 3)];
Chris@82 514 T3f = T3d + T3e;
Chris@82 515 T3n = T3d - T3e;
Chris@82 516 T3o = iio[WS(vs, 2) + WS(rs, 7)];
Chris@82 517 T3p = iio[WS(vs, 2) + WS(rs, 3)];
Chris@82 518 T3q = T3o - T3p;
Chris@82 519 T4h = T3o + T3p;
Chris@82 520 }
Chris@82 521 T3g = T3c + T3f;
Chris@82 522 T4b = T3f - T3c;
Chris@82 523 T4w = T4g + T4h;
Chris@82 524 T4i = T4g - T4h;
Chris@82 525 T3G = T3v - T3s;
Chris@82 526 T3H = T3n + T3q;
Chris@82 527 T3Y = T3G + T3H;
Chris@82 528 T3r = T3n - T3q;
Chris@82 529 T3w = T3s + T3v;
Chris@82 530 T3x = T3r - T3w;
Chris@82 531 T3T = T3w + T3r;
Chris@82 532 }
Chris@82 533 {
Chris@82 534 E T7K, T80, T83, T8O, T7N, T7V, T7Y, T8P, T7Z, T84;
Chris@82 535 {
Chris@82 536 E T7I, T7J, T81, T82;
Chris@82 537 T7I = rio[WS(vs, 5) + WS(rs, 1)];
Chris@82 538 T7J = rio[WS(vs, 5) + WS(rs, 5)];
Chris@82 539 T7K = T7I + T7J;
Chris@82 540 T80 = T7I - T7J;
Chris@82 541 T81 = iio[WS(vs, 5) + WS(rs, 1)];
Chris@82 542 T82 = iio[WS(vs, 5) + WS(rs, 5)];
Chris@82 543 T83 = T81 - T82;
Chris@82 544 T8O = T81 + T82;
Chris@82 545 }
Chris@82 546 {
Chris@82 547 E T7L, T7M, T7W, T7X;
Chris@82 548 T7L = rio[WS(vs, 5) + WS(rs, 7)];
Chris@82 549 T7M = rio[WS(vs, 5) + WS(rs, 3)];
Chris@82 550 T7N = T7L + T7M;
Chris@82 551 T7V = T7L - T7M;
Chris@82 552 T7W = iio[WS(vs, 5) + WS(rs, 7)];
Chris@82 553 T7X = iio[WS(vs, 5) + WS(rs, 3)];
Chris@82 554 T7Y = T7W - T7X;
Chris@82 555 T8P = T7W + T7X;
Chris@82 556 }
Chris@82 557 T7O = T7K + T7N;
Chris@82 558 T8J = T7N - T7K;
Chris@82 559 T94 = T8O + T8P;
Chris@82 560 T8Q = T8O - T8P;
Chris@82 561 T8e = T83 - T80;
Chris@82 562 T8f = T7V + T7Y;
Chris@82 563 T8w = T8e + T8f;
Chris@82 564 T7Z = T7V - T7Y;
Chris@82 565 T84 = T80 + T83;
Chris@82 566 T85 = T7Z - T84;
Chris@82 567 T8r = T84 + T7Z;
Chris@82 568 }
Chris@82 569 {
Chris@82 570 E T9g, T9w, T9z, Tak, T9j, T9r, T9u, Tal, T9v, T9A;
Chris@82 571 {
Chris@82 572 E T9e, T9f, T9x, T9y;
Chris@82 573 T9e = rio[WS(vs, 6) + WS(rs, 1)];
Chris@82 574 T9f = rio[WS(vs, 6) + WS(rs, 5)];
Chris@82 575 T9g = T9e + T9f;
Chris@82 576 T9w = T9e - T9f;
Chris@82 577 T9x = iio[WS(vs, 6) + WS(rs, 1)];
Chris@82 578 T9y = iio[WS(vs, 6) + WS(rs, 5)];
Chris@82 579 T9z = T9x - T9y;
Chris@82 580 Tak = T9x + T9y;
Chris@82 581 }
Chris@82 582 {
Chris@82 583 E T9h, T9i, T9s, T9t;
Chris@82 584 T9h = rio[WS(vs, 6) + WS(rs, 7)];
Chris@82 585 T9i = rio[WS(vs, 6) + WS(rs, 3)];
Chris@82 586 T9j = T9h + T9i;
Chris@82 587 T9r = T9h - T9i;
Chris@82 588 T9s = iio[WS(vs, 6) + WS(rs, 7)];
Chris@82 589 T9t = iio[WS(vs, 6) + WS(rs, 3)];
Chris@82 590 T9u = T9s - T9t;
Chris@82 591 Tal = T9s + T9t;
Chris@82 592 }
Chris@82 593 T9k = T9g + T9j;
Chris@82 594 Taf = T9j - T9g;
Chris@82 595 TaA = Tak + Tal;
Chris@82 596 Tam = Tak - Tal;
Chris@82 597 T9K = T9z - T9w;
Chris@82 598 T9L = T9r + T9u;
Chris@82 599 Ta2 = T9K + T9L;
Chris@82 600 T9v = T9r - T9u;
Chris@82 601 T9A = T9w + T9z;
Chris@82 602 T9B = T9v - T9A;
Chris@82 603 T9X = T9A + T9v;
Chris@82 604 }
Chris@82 605 rio[0] = T7 + Te;
Chris@82 606 iio[0] = T1t + T1u;
Chris@82 607 rio[WS(rs, 1)] = T1D + T1K;
Chris@82 608 iio[WS(rs, 1)] = T2Z + T30;
Chris@82 609 rio[WS(rs, 2)] = T39 + T3g;
Chris@82 610 iio[WS(rs, 2)] = T4v + T4w;
Chris@82 611 rio[WS(rs, 3)] = T4F + T4M;
Chris@82 612 iio[WS(rs, 3)] = T61 + T62;
Chris@82 613 rio[WS(rs, 4)] = T6b + T6i;
Chris@82 614 iio[WS(rs, 4)] = T7x + T7y;
Chris@82 615 rio[WS(rs, 5)] = T7H + T7O;
Chris@82 616 iio[WS(rs, 5)] = T93 + T94;
Chris@82 617 rio[WS(rs, 6)] = T9d + T9k;
Chris@82 618 iio[WS(rs, 6)] = Taz + TaA;
Chris@82 619 rio[WS(rs, 7)] = TaJ + TaQ;
Chris@82 620 iio[WS(rs, 7)] = Tc5 + Tc6;
Chris@82 621 {
Chris@82 622 E TS, TX, TT, TY, TP, TU;
Chris@82 623 TS = FNMS(KP707106781, TR, TQ);
Chris@82 624 TX = FNMS(KP707106781, TW, TV);
Chris@82 625 TP = W[8];
Chris@82 626 TT = TP * TS;
Chris@82 627 TY = TP * TX;
Chris@82 628 TU = W[9];
Chris@82 629 rio[WS(vs, 5)] = FMA(TU, TX, TT);
Chris@82 630 iio[WS(vs, 5)] = FNMS(TU, TS, TY);
Chris@82 631 }
Chris@82 632 {
Chris@82 633 E T2N, T2B, T2H, T2I, T2O, T2G;
Chris@82 634 T2N = T2J - T2M;
Chris@82 635 T2G = T2E - T2F;
Chris@82 636 T2B = W[10];
Chris@82 637 T2H = T2B * T2G;
Chris@82 638 T2I = W[11];
Chris@82 639 T2O = T2I * T2G;
Chris@82 640 iio[WS(vs, 6) + WS(rs, 1)] = FNMS(T2I, T2N, T2H);
Chris@82 641 rio[WS(vs, 6) + WS(rs, 1)] = FMA(T2B, T2N, T2O);
Chris@82 642 }
Chris@82 643 {
Chris@82 644 E T1n, T1j, T1l, T1m, T1o, T1k;
Chris@82 645 T1n = T1d + T1g;
Chris@82 646 T1k = T19 + T18;
Chris@82 647 T1j = W[2];
Chris@82 648 T1l = T1j * T1k;
Chris@82 649 T1m = W[3];
Chris@82 650 T1o = T1m * T1k;
Chris@82 651 iio[WS(vs, 2)] = FNMS(T1m, T1n, T1l);
Chris@82 652 rio[WS(vs, 2)] = FMA(T1j, T1n, T1o);
Chris@82 653 }
Chris@82 654 {
Chris@82 655 E T1q, T1v, T1r, T1w, T1p, T1s;
Chris@82 656 T1q = T7 - Te;
Chris@82 657 T1v = T1t - T1u;
Chris@82 658 T1p = W[6];
Chris@82 659 T1r = T1p * T1q;
Chris@82 660 T1w = T1p * T1v;
Chris@82 661 T1s = W[7];
Chris@82 662 rio[WS(vs, 4)] = FMA(T1s, T1v, T1r);
Chris@82 663 iio[WS(vs, 4)] = FNMS(T1s, T1q, T1w);
Chris@82 664 }
Chris@82 665 {
Chris@82 666 E Tan, Tab, Tah, Tai, Tao, Tag;
Chris@82 667 Tan = Taj - Tam;
Chris@82 668 Tag = Tae - Taf;
Chris@82 669 Tab = W[10];
Chris@82 670 Tah = Tab * Tag;
Chris@82 671 Tai = W[11];
Chris@82 672 Tao = Tai * Tag;
Chris@82 673 iio[WS(vs, 6) + WS(rs, 6)] = FNMS(Tai, Tan, Tah);
Chris@82 674 rio[WS(vs, 6) + WS(rs, 6)] = FMA(Tab, Tan, Tao);
Chris@82 675 }
Chris@82 676 {
Chris@82 677 E Tc2, Tc7, Tc3, Tc8, Tc1, Tc4;
Chris@82 678 Tc2 = TaJ - TaQ;
Chris@82 679 Tc7 = Tc5 - Tc6;
Chris@82 680 Tc1 = W[6];
Chris@82 681 Tc3 = Tc1 * Tc2;
Chris@82 682 Tc8 = Tc1 * Tc7;
Chris@82 683 Tc4 = W[7];
Chris@82 684 rio[WS(vs, 4) + WS(rs, 7)] = FMA(Tc4, Tc7, Tc3);
Chris@82 685 iio[WS(vs, 4) + WS(rs, 7)] = FNMS(Tc4, Tc2, Tc8);
Chris@82 686 }
Chris@82 687 {
Chris@82 688 E Tbu, Tbz, Tbv, TbA, Tbr, Tbw;
Chris@82 689 Tbu = FNMS(KP707106781, Tbt, Tbs);
Chris@82 690 Tbz = FNMS(KP707106781, Tby, Tbx);
Chris@82 691 Tbr = W[8];
Chris@82 692 Tbv = Tbr * Tbu;
Chris@82 693 TbA = Tbr * Tbz;
Chris@82 694 Tbw = W[9];
Chris@82 695 rio[WS(vs, 5) + WS(rs, 7)] = FMA(Tbw, Tbz, Tbv);
Chris@82 696 iio[WS(vs, 5) + WS(rs, 7)] = FNMS(Tbw, Tbu, TbA);
Chris@82 697 }
Chris@82 698 {
Chris@82 699 E TbC, TbF, TbD, TbG, TbB, TbE;
Chris@82 700 TbC = FMA(KP707106781, Tbt, Tbs);
Chris@82 701 TbF = FMA(KP707106781, Tby, Tbx);
Chris@82 702 TbB = W[0];
Chris@82 703 TbD = TbB * TbC;
Chris@82 704 TbG = TbB * TbF;
Chris@82 705 TbE = W[1];
Chris@82 706 rio[WS(vs, 1) + WS(rs, 7)] = FMA(TbE, TbF, TbD);
Chris@82 707 iio[WS(vs, 1) + WS(rs, 7)] = FNMS(TbE, TbC, TbG);
Chris@82 708 }
Chris@82 709 {
Chris@82 710 E T10, T13, T11, T14, TZ, T12;
Chris@82 711 T10 = FMA(KP707106781, TR, TQ);
Chris@82 712 T13 = FMA(KP707106781, TW, TV);
Chris@82 713 TZ = W[0];
Chris@82 714 T11 = TZ * T10;
Chris@82 715 T14 = TZ * T13;
Chris@82 716 T12 = W[1];
Chris@82 717 rio[WS(vs, 1)] = FMA(T12, T13, T11);
Chris@82 718 iio[WS(vs, 1)] = FNMS(T12, T10, T14);
Chris@82 719 }
Chris@82 720 {
Chris@82 721 E T2w, T2z, T2x, T2A, T2v, T2y;
Chris@82 722 T2w = FMA(KP707106781, T2n, T2m);
Chris@82 723 T2z = FMA(KP707106781, T2s, T2r);
Chris@82 724 T2v = W[0];
Chris@82 725 T2x = T2v * T2w;
Chris@82 726 T2A = T2v * T2z;
Chris@82 727 T2y = W[1];
Chris@82 728 rio[WS(vs, 1) + WS(rs, 1)] = FMA(T2y, T2z, T2x);
Chris@82 729 iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T2y, T2w, T2A);
Chris@82 730 }
Chris@82 731 {
Chris@82 732 E T1h, T15, T1b, T1c, T1i, T1a;
Chris@82 733 T1h = T1d - T1g;
Chris@82 734 T1a = T18 - T19;
Chris@82 735 T15 = W[10];
Chris@82 736 T1b = T15 * T1a;
Chris@82 737 T1c = W[11];
Chris@82 738 T1i = T1c * T1a;
Chris@82 739 iio[WS(vs, 6)] = FNMS(T1c, T1h, T1b);
Chris@82 740 rio[WS(vs, 6)] = FMA(T15, T1h, T1i);
Chris@82 741 }
Chris@82 742 {
Chris@82 743 E T2o, T2t, T2p, T2u, T2l, T2q;
Chris@82 744 T2o = FNMS(KP707106781, T2n, T2m);
Chris@82 745 T2t = FNMS(KP707106781, T2s, T2r);
Chris@82 746 T2l = W[8];
Chris@82 747 T2p = T2l * T2o;
Chris@82 748 T2u = T2l * T2t;
Chris@82 749 T2q = W[9];
Chris@82 750 rio[WS(vs, 5) + WS(rs, 1)] = FMA(T2q, T2t, T2p);
Chris@82 751 iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T2q, T2o, T2u);
Chris@82 752 }
Chris@82 753 {
Chris@82 754 E Tat, Tap, Tar, Tas, Tau, Taq;
Chris@82 755 Tat = Taj + Tam;
Chris@82 756 Taq = Taf + Tae;
Chris@82 757 Tap = W[2];
Chris@82 758 Tar = Tap * Taq;
Chris@82 759 Tas = W[3];
Chris@82 760 Tau = Tas * Taq;
Chris@82 761 iio[WS(vs, 2) + WS(rs, 6)] = FNMS(Tas, Tat, Tar);
Chris@82 762 rio[WS(vs, 2) + WS(rs, 6)] = FMA(Tap, Tat, Tau);
Chris@82 763 }
Chris@82 764 {
Chris@82 765 E TbZ, TbV, TbX, TbY, Tc0, TbW;
Chris@82 766 TbZ = TbP + TbS;
Chris@82 767 TbW = TbL + TbK;
Chris@82 768 TbV = W[2];
Chris@82 769 TbX = TbV * TbW;
Chris@82 770 TbY = W[3];
Chris@82 771 Tc0 = TbY * TbW;
Chris@82 772 iio[WS(vs, 2) + WS(rs, 7)] = FNMS(TbY, TbZ, TbX);
Chris@82 773 rio[WS(vs, 2) + WS(rs, 7)] = FMA(TbV, TbZ, Tc0);
Chris@82 774 }
Chris@82 775 {
Chris@82 776 E Taw, TaB, Tax, TaC, Tav, Tay;
Chris@82 777 Taw = T9d - T9k;
Chris@82 778 TaB = Taz - TaA;
Chris@82 779 Tav = W[6];
Chris@82 780 Tax = Tav * Taw;
Chris@82 781 TaC = Tav * TaB;
Chris@82 782 Tay = W[7];
Chris@82 783 rio[WS(vs, 4) + WS(rs, 6)] = FMA(Tay, TaB, Tax);
Chris@82 784 iio[WS(vs, 4) + WS(rs, 6)] = FNMS(Tay, Taw, TaC);
Chris@82 785 }
Chris@82 786 {
Chris@82 787 E TbT, TbH, TbN, TbO, TbU, TbM;
Chris@82 788 TbT = TbP - TbS;
Chris@82 789 TbM = TbK - TbL;
Chris@82 790 TbH = W[10];
Chris@82 791 TbN = TbH * TbM;
Chris@82 792 TbO = W[11];
Chris@82 793 TbU = TbO * TbM;
Chris@82 794 iio[WS(vs, 6) + WS(rs, 7)] = FNMS(TbO, TbT, TbN);
Chris@82 795 rio[WS(vs, 6) + WS(rs, 7)] = FMA(TbH, TbT, TbU);
Chris@82 796 }
Chris@82 797 {
Chris@82 798 E T2T, T2P, T2R, T2S, T2U, T2Q;
Chris@82 799 T2T = T2J + T2M;
Chris@82 800 T2Q = T2F + T2E;
Chris@82 801 T2P = W[2];
Chris@82 802 T2R = T2P * T2Q;
Chris@82 803 T2S = W[3];
Chris@82 804 T2U = T2S * T2Q;
Chris@82 805 iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T2S, T2T, T2R);
Chris@82 806 rio[WS(vs, 2) + WS(rs, 1)] = FMA(T2P, T2T, T2U);
Chris@82 807 }
Chris@82 808 {
Chris@82 809 E T5Y, T63, T5Z, T64, T5X, T60;
Chris@82 810 T5Y = T4F - T4M;
Chris@82 811 T63 = T61 - T62;
Chris@82 812 T5X = W[6];
Chris@82 813 T5Z = T5X * T5Y;
Chris@82 814 T64 = T5X * T63;
Chris@82 815 T60 = W[7];
Chris@82 816 rio[WS(vs, 4) + WS(rs, 3)] = FMA(T60, T63, T5Z);
Chris@82 817 iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T60, T5Y, T64);
Chris@82 818 }
Chris@82 819 {
Chris@82 820 E T42, T45, T43, T46, T41, T44;
Chris@82 821 T42 = FMA(KP707106781, T3T, T3S);
Chris@82 822 T45 = FMA(KP707106781, T3Y, T3X);
Chris@82 823 T41 = W[0];
Chris@82 824 T43 = T41 * T42;
Chris@82 825 T46 = T41 * T45;
Chris@82 826 T44 = W[1];
Chris@82 827 rio[WS(vs, 1) + WS(rs, 2)] = FMA(T44, T45, T43);
Chris@82 828 iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T44, T42, T46);
Chris@82 829 }
Chris@82 830 {
Chris@82 831 E T5y, T5B, T5z, T5C, T5x, T5A;
Chris@82 832 T5y = FMA(KP707106781, T5p, T5o);
Chris@82 833 T5B = FMA(KP707106781, T5u, T5t);
Chris@82 834 T5x = W[0];
Chris@82 835 T5z = T5x * T5y;
Chris@82 836 T5C = T5x * T5B;
Chris@82 837 T5A = W[1];
Chris@82 838 rio[WS(vs, 1) + WS(rs, 3)] = FMA(T5A, T5B, T5z);
Chris@82 839 iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T5A, T5y, T5C);
Chris@82 840 }
Chris@82 841 {
Chris@82 842 E T6W, T71, T6X, T72, T6T, T6Y;
Chris@82 843 T6W = FNMS(KP707106781, T6V, T6U);
Chris@82 844 T71 = FNMS(KP707106781, T70, T6Z);
Chris@82 845 T6T = W[8];
Chris@82 846 T6X = T6T * T6W;
Chris@82 847 T72 = T6T * T71;
Chris@82 848 T6Y = W[9];
Chris@82 849 rio[WS(vs, 5) + WS(rs, 4)] = FMA(T6Y, T71, T6X);
Chris@82 850 iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T6Y, T6W, T72);
Chris@82 851 }
Chris@82 852 {
Chris@82 853 E Ta6, Ta9, Ta7, Taa, Ta5, Ta8;
Chris@82 854 Ta6 = FMA(KP707106781, T9X, T9W);
Chris@82 855 Ta9 = FMA(KP707106781, Ta2, Ta1);
Chris@82 856 Ta5 = W[0];
Chris@82 857 Ta7 = Ta5 * Ta6;
Chris@82 858 Taa = Ta5 * Ta9;
Chris@82 859 Ta8 = W[1];
Chris@82 860 rio[WS(vs, 1) + WS(rs, 6)] = FMA(Ta8, Ta9, Ta7);
Chris@82 861 iio[WS(vs, 1) + WS(rs, 6)] = FNMS(Ta8, Ta6, Taa);
Chris@82 862 }
Chris@82 863 {
Chris@82 864 E T7r, T7n, T7p, T7q, T7s, T7o;
Chris@82 865 T7r = T7h + T7k;
Chris@82 866 T7o = T7d + T7c;
Chris@82 867 T7n = W[2];
Chris@82 868 T7p = T7n * T7o;
Chris@82 869 T7q = W[3];
Chris@82 870 T7s = T7q * T7o;
Chris@82 871 iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T7q, T7r, T7p);
Chris@82 872 rio[WS(vs, 2) + WS(rs, 4)] = FMA(T7n, T7r, T7s);
Chris@82 873 }
Chris@82 874 {
Chris@82 875 E T8X, T8T, T8V, T8W, T8Y, T8U;
Chris@82 876 T8X = T8N + T8Q;
Chris@82 877 T8U = T8J + T8I;
Chris@82 878 T8T = W[2];
Chris@82 879 T8V = T8T * T8U;
Chris@82 880 T8W = W[3];
Chris@82 881 T8Y = T8W * T8U;
Chris@82 882 iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T8W, T8X, T8V);
Chris@82 883 rio[WS(vs, 2) + WS(rs, 5)] = FMA(T8T, T8X, T8Y);
Chris@82 884 }
Chris@82 885 {
Chris@82 886 E T2W, T31, T2X, T32, T2V, T2Y;
Chris@82 887 T2W = T1D - T1K;
Chris@82 888 T31 = T2Z - T30;
Chris@82 889 T2V = W[6];
Chris@82 890 T2X = T2V * T2W;
Chris@82 891 T32 = T2V * T31;
Chris@82 892 T2Y = W[7];
Chris@82 893 rio[WS(vs, 4) + WS(rs, 1)] = FMA(T2Y, T31, T2X);
Chris@82 894 iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T2Y, T2W, T32);
Chris@82 895 }
Chris@82 896 {
Chris@82 897 E T5V, T5R, T5T, T5U, T5W, T5S;
Chris@82 898 T5V = T5L + T5O;
Chris@82 899 T5S = T5H + T5G;
Chris@82 900 T5R = W[2];
Chris@82 901 T5T = T5R * T5S;
Chris@82 902 T5U = W[3];
Chris@82 903 T5W = T5U * T5S;
Chris@82 904 iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T5U, T5V, T5T);
Chris@82 905 rio[WS(vs, 2) + WS(rs, 3)] = FMA(T5R, T5V, T5W);
Chris@82 906 }
Chris@82 907 {
Chris@82 908 E T3U, T3Z, T3V, T40, T3R, T3W;
Chris@82 909 T3U = FNMS(KP707106781, T3T, T3S);
Chris@82 910 T3Z = FNMS(KP707106781, T3Y, T3X);
Chris@82 911 T3R = W[8];
Chris@82 912 T3V = T3R * T3U;
Chris@82 913 T40 = T3R * T3Z;
Chris@82 914 T3W = W[9];
Chris@82 915 rio[WS(vs, 5) + WS(rs, 2)] = FMA(T3W, T3Z, T3V);
Chris@82 916 iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T3W, T3U, T40);
Chris@82 917 }
Chris@82 918 {
Chris@82 919 E T5P, T5D, T5J, T5K, T5Q, T5I;
Chris@82 920 T5P = T5L - T5O;
Chris@82 921 T5I = T5G - T5H;
Chris@82 922 T5D = W[10];
Chris@82 923 T5J = T5D * T5I;
Chris@82 924 T5K = W[11];
Chris@82 925 T5Q = T5K * T5I;
Chris@82 926 iio[WS(vs, 6) + WS(rs, 3)] = FNMS(T5K, T5P, T5J);
Chris@82 927 rio[WS(vs, 6) + WS(rs, 3)] = FMA(T5D, T5P, T5Q);
Chris@82 928 }
Chris@82 929 {
Chris@82 930 E T74, T77, T75, T78, T73, T76;
Chris@82 931 T74 = FMA(KP707106781, T6V, T6U);
Chris@82 932 T77 = FMA(KP707106781, T70, T6Z);
Chris@82 933 T73 = W[0];
Chris@82 934 T75 = T73 * T74;
Chris@82 935 T78 = T73 * T77;
Chris@82 936 T76 = W[1];
Chris@82 937 rio[WS(vs, 1) + WS(rs, 4)] = FMA(T76, T77, T75);
Chris@82 938 iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T76, T74, T78);
Chris@82 939 }
Chris@82 940 {
Chris@82 941 E T9Y, Ta3, T9Z, Ta4, T9V, Ta0;
Chris@82 942 T9Y = FNMS(KP707106781, T9X, T9W);
Chris@82 943 Ta3 = FNMS(KP707106781, Ta2, Ta1);
Chris@82 944 T9V = W[8];
Chris@82 945 T9Z = T9V * T9Y;
Chris@82 946 Ta4 = T9V * Ta3;
Chris@82 947 Ta0 = W[9];
Chris@82 948 rio[WS(vs, 5) + WS(rs, 6)] = FMA(Ta0, Ta3, T9Z);
Chris@82 949 iio[WS(vs, 5) + WS(rs, 6)] = FNMS(Ta0, T9Y, Ta4);
Chris@82 950 }
Chris@82 951 {
Chris@82 952 E T7l, T79, T7f, T7g, T7m, T7e;
Chris@82 953 T7l = T7h - T7k;
Chris@82 954 T7e = T7c - T7d;
Chris@82 955 T79 = W[10];
Chris@82 956 T7f = T79 * T7e;
Chris@82 957 T7g = W[11];
Chris@82 958 T7m = T7g * T7e;
Chris@82 959 iio[WS(vs, 6) + WS(rs, 4)] = FNMS(T7g, T7l, T7f);
Chris@82 960 rio[WS(vs, 6) + WS(rs, 4)] = FMA(T79, T7l, T7m);
Chris@82 961 }
Chris@82 962 {
Chris@82 963 E T90, T95, T91, T96, T8Z, T92;
Chris@82 964 T90 = T7H - T7O;
Chris@82 965 T95 = T93 - T94;
Chris@82 966 T8Z = W[6];
Chris@82 967 T91 = T8Z * T90;
Chris@82 968 T96 = T8Z * T95;
Chris@82 969 T92 = W[7];
Chris@82 970 rio[WS(vs, 4) + WS(rs, 5)] = FMA(T92, T95, T91);
Chris@82 971 iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T92, T90, T96);
Chris@82 972 }
Chris@82 973 {
Chris@82 974 E T4j, T47, T4d, T4e, T4k, T4c;
Chris@82 975 T4j = T4f - T4i;
Chris@82 976 T4c = T4a - T4b;
Chris@82 977 T47 = W[10];
Chris@82 978 T4d = T47 * T4c;
Chris@82 979 T4e = W[11];
Chris@82 980 T4k = T4e * T4c;
Chris@82 981 iio[WS(vs, 6) + WS(rs, 2)] = FNMS(T4e, T4j, T4d);
Chris@82 982 rio[WS(vs, 6) + WS(rs, 2)] = FMA(T47, T4j, T4k);
Chris@82 983 }
Chris@82 984 {
Chris@82 985 E T5q, T5v, T5r, T5w, T5n, T5s;
Chris@82 986 T5q = FNMS(KP707106781, T5p, T5o);
Chris@82 987 T5v = FNMS(KP707106781, T5u, T5t);
Chris@82 988 T5n = W[8];
Chris@82 989 T5r = T5n * T5q;
Chris@82 990 T5w = T5n * T5v;
Chris@82 991 T5s = W[9];
Chris@82 992 rio[WS(vs, 5) + WS(rs, 3)] = FMA(T5s, T5v, T5r);
Chris@82 993 iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T5s, T5q, T5w);
Chris@82 994 }
Chris@82 995 {
Chris@82 996 E T4p, T4l, T4n, T4o, T4q, T4m;
Chris@82 997 T4p = T4f + T4i;
Chris@82 998 T4m = T4b + T4a;
Chris@82 999 T4l = W[2];
Chris@82 1000 T4n = T4l * T4m;
Chris@82 1001 T4o = W[3];
Chris@82 1002 T4q = T4o * T4m;
Chris@82 1003 iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T4o, T4p, T4n);
Chris@82 1004 rio[WS(vs, 2) + WS(rs, 2)] = FMA(T4l, T4p, T4q);
Chris@82 1005 }
Chris@82 1006 {
Chris@82 1007 E T4s, T4x, T4t, T4y, T4r, T4u;
Chris@82 1008 T4s = T39 - T3g;
Chris@82 1009 T4x = T4v - T4w;
Chris@82 1010 T4r = W[6];
Chris@82 1011 T4t = T4r * T4s;
Chris@82 1012 T4y = T4r * T4x;
Chris@82 1013 T4u = W[7];
Chris@82 1014 rio[WS(vs, 4) + WS(rs, 2)] = FMA(T4u, T4x, T4t);
Chris@82 1015 iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T4u, T4s, T4y);
Chris@82 1016 }
Chris@82 1017 {
Chris@82 1018 E T7u, T7z, T7v, T7A, T7t, T7w;
Chris@82 1019 T7u = T6b - T6i;
Chris@82 1020 T7z = T7x - T7y;
Chris@82 1021 T7t = W[6];
Chris@82 1022 T7v = T7t * T7u;
Chris@82 1023 T7A = T7t * T7z;
Chris@82 1024 T7w = W[7];
Chris@82 1025 rio[WS(vs, 4) + WS(rs, 4)] = FMA(T7w, T7z, T7v);
Chris@82 1026 iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T7w, T7u, T7A);
Chris@82 1027 }
Chris@82 1028 {
Chris@82 1029 E T8R, T8F, T8L, T8M, T8S, T8K;
Chris@82 1030 T8R = T8N - T8Q;
Chris@82 1031 T8K = T8I - T8J;
Chris@82 1032 T8F = W[10];
Chris@82 1033 T8L = T8F * T8K;
Chris@82 1034 T8M = W[11];
Chris@82 1035 T8S = T8M * T8K;
Chris@82 1036 iio[WS(vs, 6) + WS(rs, 5)] = FNMS(T8M, T8R, T8L);
Chris@82 1037 rio[WS(vs, 6) + WS(rs, 5)] = FMA(T8F, T8R, T8S);
Chris@82 1038 }
Chris@82 1039 {
Chris@82 1040 E T8s, T8x, T8t, T8y, T8p, T8u;
Chris@82 1041 T8s = FNMS(KP707106781, T8r, T8q);
Chris@82 1042 T8x = FNMS(KP707106781, T8w, T8v);
Chris@82 1043 T8p = W[8];
Chris@82 1044 T8t = T8p * T8s;
Chris@82 1045 T8y = T8p * T8x;
Chris@82 1046 T8u = W[9];
Chris@82 1047 rio[WS(vs, 5) + WS(rs, 5)] = FMA(T8u, T8x, T8t);
Chris@82 1048 iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T8u, T8s, T8y);
Chris@82 1049 }
Chris@82 1050 {
Chris@82 1051 E T8A, T8D, T8B, T8E, T8z, T8C;
Chris@82 1052 T8A = FMA(KP707106781, T8r, T8q);
Chris@82 1053 T8D = FMA(KP707106781, T8w, T8v);
Chris@82 1054 T8z = W[0];
Chris@82 1055 T8B = T8z * T8A;
Chris@82 1056 T8E = T8z * T8D;
Chris@82 1057 T8C = W[1];
Chris@82 1058 rio[WS(vs, 1) + WS(rs, 5)] = FMA(T8C, T8D, T8B);
Chris@82 1059 iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T8C, T8A, T8E);
Chris@82 1060 }
Chris@82 1061 {
Chris@82 1062 E TH, TN, TJ, TL, TM, TO, Tf, Tx, Ty, TI, TG, TK, Tw;
Chris@82 1063 TG = TE - TF;
Chris@82 1064 TH = FNMS(KP707106781, TG, TD);
Chris@82 1065 TN = FMA(KP707106781, TG, TD);
Chris@82 1066 TK = FMA(KP707106781, Tv, Tk);
Chris@82 1067 TJ = W[4];
Chris@82 1068 TL = TJ * TK;
Chris@82 1069 TM = W[5];
Chris@82 1070 TO = TM * TK;
Chris@82 1071 Tw = FNMS(KP707106781, Tv, Tk);
Chris@82 1072 Tf = W[12];
Chris@82 1073 Tx = Tf * Tw;
Chris@82 1074 Ty = W[13];
Chris@82 1075 TI = Ty * Tw;
Chris@82 1076 iio[WS(vs, 7)] = FNMS(Ty, TH, Tx);
Chris@82 1077 rio[WS(vs, 7)] = FMA(Tf, TH, TI);
Chris@82 1078 iio[WS(vs, 3)] = FNMS(TM, TN, TL);
Chris@82 1079 rio[WS(vs, 3)] = FMA(TJ, TN, TO);
Chris@82 1080 }
Chris@82 1081 {
Chris@82 1082 E T5f, T5l, T5h, T5j, T5k, T5m, T4N, T55, T56, T5g, T5e, T5i, T54;
Chris@82 1083 T5e = T5c - T5d;
Chris@82 1084 T5f = FNMS(KP707106781, T5e, T5b);
Chris@82 1085 T5l = FMA(KP707106781, T5e, T5b);
Chris@82 1086 T5i = FMA(KP707106781, T53, T4S);
Chris@82 1087 T5h = W[4];
Chris@82 1088 T5j = T5h * T5i;
Chris@82 1089 T5k = W[5];
Chris@82 1090 T5m = T5k * T5i;
Chris@82 1091 T54 = FNMS(KP707106781, T53, T4S);
Chris@82 1092 T4N = W[12];
Chris@82 1093 T55 = T4N * T54;
Chris@82 1094 T56 = W[13];
Chris@82 1095 T5g = T56 * T54;
Chris@82 1096 iio[WS(vs, 7) + WS(rs, 3)] = FNMS(T56, T5f, T55);
Chris@82 1097 rio[WS(vs, 7) + WS(rs, 3)] = FMA(T4N, T5f, T5g);
Chris@82 1098 iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T5k, T5l, T5j);
Chris@82 1099 rio[WS(vs, 3) + WS(rs, 3)] = FMA(T5h, T5l, T5m);
Chris@82 1100 }
Chris@82 1101 {
Chris@82 1102 E T2d, T2j, T2f, T2h, T2i, T2k, T1L, T23, T24, T2e, T2c, T2g, T22;
Chris@82 1103 T2c = T2a - T2b;
Chris@82 1104 T2d = FNMS(KP707106781, T2c, T29);
Chris@82 1105 T2j = FMA(KP707106781, T2c, T29);
Chris@82 1106 T2g = FMA(KP707106781, T21, T1Q);
Chris@82 1107 T2f = W[4];
Chris@82 1108 T2h = T2f * T2g;
Chris@82 1109 T2i = W[5];
Chris@82 1110 T2k = T2i * T2g;
Chris@82 1111 T22 = FNMS(KP707106781, T21, T1Q);
Chris@82 1112 T1L = W[12];
Chris@82 1113 T23 = T1L * T22;
Chris@82 1114 T24 = W[13];
Chris@82 1115 T2e = T24 * T22;
Chris@82 1116 iio[WS(vs, 7) + WS(rs, 1)] = FNMS(T24, T2d, T23);
Chris@82 1117 rio[WS(vs, 7) + WS(rs, 1)] = FMA(T1L, T2d, T2e);
Chris@82 1118 iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T2i, T2j, T2h);
Chris@82 1119 rio[WS(vs, 3) + WS(rs, 1)] = FMA(T2f, T2j, T2k);
Chris@82 1120 }
Chris@82 1121 {
Chris@82 1122 E T3J, T3P, T3L, T3N, T3O, T3Q, T3h, T3z, T3A, T3K, T3I, T3M, T3y;
Chris@82 1123 T3I = T3G - T3H;
Chris@82 1124 T3J = FNMS(KP707106781, T3I, T3F);
Chris@82 1125 T3P = FMA(KP707106781, T3I, T3F);
Chris@82 1126 T3M = FMA(KP707106781, T3x, T3m);
Chris@82 1127 T3L = W[4];
Chris@82 1128 T3N = T3L * T3M;
Chris@82 1129 T3O = W[5];
Chris@82 1130 T3Q = T3O * T3M;
Chris@82 1131 T3y = FNMS(KP707106781, T3x, T3m);
Chris@82 1132 T3h = W[12];
Chris@82 1133 T3z = T3h * T3y;
Chris@82 1134 T3A = W[13];
Chris@82 1135 T3K = T3A * T3y;
Chris@82 1136 iio[WS(vs, 7) + WS(rs, 2)] = FNMS(T3A, T3J, T3z);
Chris@82 1137 rio[WS(vs, 7) + WS(rs, 2)] = FMA(T3h, T3J, T3K);
Chris@82 1138 iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T3O, T3P, T3N);
Chris@82 1139 rio[WS(vs, 3) + WS(rs, 2)] = FMA(T3L, T3P, T3Q);
Chris@82 1140 }
Chris@82 1141 {
Chris@82 1142 E T6L, T6R, T6N, T6P, T6Q, T6S, T6j, T6B, T6C, T6M, T6K, T6O, T6A;
Chris@82 1143 T6K = T6I - T6J;
Chris@82 1144 T6L = FNMS(KP707106781, T6K, T6H);
Chris@82 1145 T6R = FMA(KP707106781, T6K, T6H);
Chris@82 1146 T6O = FMA(KP707106781, T6z, T6o);
Chris@82 1147 T6N = W[4];
Chris@82 1148 T6P = T6N * T6O;
Chris@82 1149 T6Q = W[5];
Chris@82 1150 T6S = T6Q * T6O;
Chris@82 1151 T6A = FNMS(KP707106781, T6z, T6o);
Chris@82 1152 T6j = W[12];
Chris@82 1153 T6B = T6j * T6A;
Chris@82 1154 T6C = W[13];
Chris@82 1155 T6M = T6C * T6A;
Chris@82 1156 iio[WS(vs, 7) + WS(rs, 4)] = FNMS(T6C, T6L, T6B);
Chris@82 1157 rio[WS(vs, 7) + WS(rs, 4)] = FMA(T6j, T6L, T6M);
Chris@82 1158 iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T6Q, T6R, T6P);
Chris@82 1159 rio[WS(vs, 3) + WS(rs, 4)] = FMA(T6N, T6R, T6S);
Chris@82 1160 }
Chris@82 1161 {
Chris@82 1162 E Tbj, Tbp, Tbl, Tbn, Tbo, Tbq, TaR, Tb9, Tba, Tbk, Tbi, Tbm, Tb8;
Chris@82 1163 Tbi = Tbg - Tbh;
Chris@82 1164 Tbj = FNMS(KP707106781, Tbi, Tbf);
Chris@82 1165 Tbp = FMA(KP707106781, Tbi, Tbf);
Chris@82 1166 Tbm = FMA(KP707106781, Tb7, TaW);
Chris@82 1167 Tbl = W[4];
Chris@82 1168 Tbn = Tbl * Tbm;
Chris@82 1169 Tbo = W[5];
Chris@82 1170 Tbq = Tbo * Tbm;
Chris@82 1171 Tb8 = FNMS(KP707106781, Tb7, TaW);
Chris@82 1172 TaR = W[12];
Chris@82 1173 Tb9 = TaR * Tb8;
Chris@82 1174 Tba = W[13];
Chris@82 1175 Tbk = Tba * Tb8;
Chris@82 1176 iio[WS(vs, 7) + WS(rs, 7)] = FNMS(Tba, Tbj, Tb9);
Chris@82 1177 rio[WS(vs, 7) + WS(rs, 7)] = FMA(TaR, Tbj, Tbk);
Chris@82 1178 iio[WS(vs, 3) + WS(rs, 7)] = FNMS(Tbo, Tbp, Tbn);
Chris@82 1179 rio[WS(vs, 3) + WS(rs, 7)] = FMA(Tbl, Tbp, Tbq);
Chris@82 1180 }
Chris@82 1181 {
Chris@82 1182 E T8h, T8n, T8j, T8l, T8m, T8o, T7P, T87, T88, T8i, T8g, T8k, T86;
Chris@82 1183 T8g = T8e - T8f;
Chris@82 1184 T8h = FNMS(KP707106781, T8g, T8d);
Chris@82 1185 T8n = FMA(KP707106781, T8g, T8d);
Chris@82 1186 T8k = FMA(KP707106781, T85, T7U);
Chris@82 1187 T8j = W[4];
Chris@82 1188 T8l = T8j * T8k;
Chris@82 1189 T8m = W[5];
Chris@82 1190 T8o = T8m * T8k;
Chris@82 1191 T86 = FNMS(KP707106781, T85, T7U);
Chris@82 1192 T7P = W[12];
Chris@82 1193 T87 = T7P * T86;
Chris@82 1194 T88 = W[13];
Chris@82 1195 T8i = T88 * T86;
Chris@82 1196 iio[WS(vs, 7) + WS(rs, 5)] = FNMS(T88, T8h, T87);
Chris@82 1197 rio[WS(vs, 7) + WS(rs, 5)] = FMA(T7P, T8h, T8i);
Chris@82 1198 iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T8m, T8n, T8l);
Chris@82 1199 rio[WS(vs, 3) + WS(rs, 5)] = FMA(T8j, T8n, T8o);
Chris@82 1200 }
Chris@82 1201 {
Chris@82 1202 E T9N, T9T, T9P, T9R, T9S, T9U, T9l, T9D, T9E, T9O, T9M, T9Q, T9C;
Chris@82 1203 T9M = T9K - T9L;
Chris@82 1204 T9N = FNMS(KP707106781, T9M, T9J);
Chris@82 1205 T9T = FMA(KP707106781, T9M, T9J);
Chris@82 1206 T9Q = FMA(KP707106781, T9B, T9q);
Chris@82 1207 T9P = W[4];
Chris@82 1208 T9R = T9P * T9Q;
Chris@82 1209 T9S = W[5];
Chris@82 1210 T9U = T9S * T9Q;
Chris@82 1211 T9C = FNMS(KP707106781, T9B, T9q);
Chris@82 1212 T9l = W[12];
Chris@82 1213 T9D = T9l * T9C;
Chris@82 1214 T9E = W[13];
Chris@82 1215 T9O = T9E * T9C;
Chris@82 1216 iio[WS(vs, 7) + WS(rs, 6)] = FNMS(T9E, T9N, T9D);
Chris@82 1217 rio[WS(vs, 7) + WS(rs, 6)] = FMA(T9l, T9N, T9O);
Chris@82 1218 iio[WS(vs, 3) + WS(rs, 6)] = FNMS(T9S, T9T, T9R);
Chris@82 1219 rio[WS(vs, 3) + WS(rs, 6)] = FMA(T9P, T9T, T9U);
Chris@82 1220 }
Chris@82 1221 }
Chris@82 1222 }
Chris@82 1223 }
Chris@82 1224
Chris@82 1225 static const tw_instr twinstr[] = {
Chris@82 1226 {TW_FULL, 0, 8},
Chris@82 1227 {TW_NEXT, 1, 0}
Chris@82 1228 };
Chris@82 1229
Chris@82 1230 static const ct_desc desc = { 8, "q1_8", twinstr, &GENUS, {352, 112, 176, 0}, 0, 0, 0 };
Chris@82 1231
Chris@82 1232 void X(codelet_q1_8) (planner *p) {
Chris@82 1233 X(kdft_difsq_register) (p, q1_8, &desc);
Chris@82 1234 }
Chris@82 1235 #else
Chris@82 1236
Chris@82 1237 /* Generated by: ../../../genfft/gen_twidsq.native -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 8 -name q1_8 -include dft/scalar/q.h */
Chris@82 1238
Chris@82 1239 /*
Chris@82 1240 * This function contains 528 FP additions, 256 FP multiplications,
Chris@82 1241 * (or, 416 additions, 144 multiplications, 112 fused multiply/add),
Chris@82 1242 * 142 stack variables, 1 constants, and 256 memory accesses
Chris@82 1243 */
Chris@82 1244 #include "dft/scalar/q.h"
Chris@82 1245
Chris@82 1246 static void q1_8(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
Chris@82 1247 {
Chris@82 1248 DK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@82 1249 {
Chris@82 1250 INT m;
Chris@82 1251 for (m = mb, W = W + (mb * 14); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 14, MAKE_VOLATILE_STRIDE(16, rs), MAKE_VOLATILE_STRIDE(0, vs)) {
Chris@82 1252 E T7, T14, T1g, Tk, TC, TQ, T10, TM, T1w, T2p, T2z, T1H, T1M, T1W, T2j;
Chris@82 1253 E T1V, T7R, T8O, T90, T84, T8m, T8A, T8K, T8w, T9g, Ta9, Taj, T9r, T9w, T9G;
Chris@82 1254 E Ta3, T9F, Te, T17, T1h, Tp, Tu, TE, T11, TD, T1p, T2m, T2y, T1C, T1U;
Chris@82 1255 E T28, T2i, T24, T7Y, T8R, T91, T89, T8e, T8o, T8L, T8n, T99, Ta6, Tai, T9m;
Chris@82 1256 E T9E, T9S, Ta2, T9O, T2H, T3E, T3Q, T2U, T3c, T3q, T3A, T3m, T46, T4Z, T59;
Chris@82 1257 E T4h, T4m, T4w, T4T, T4v, T5h, T6e, T6q, T5u, T5M, T60, T6a, T5W, T6G, T7z;
Chris@82 1258 E T7J, T6R, T6W, T76, T7t, T75, T2O, T3H, T3R, T2Z, T34, T3e, T3B, T3d, T3Z;
Chris@82 1259 E T4W, T58, T4c, T4u, T4I, T4S, T4E, T5o, T6h, T6r, T5z, T5E, T5O, T6b, T5N;
Chris@82 1260 E T6z, T7w, T7I, T6M, T74, T7i, T7s, T7e;
Chris@82 1261 {
Chris@82 1262 E T3, Ty, Tj, TY, T6, Tg, TB, TZ;
Chris@82 1263 {
Chris@82 1264 E T1, T2, Th, Ti;
Chris@82 1265 T1 = rio[0];
Chris@82 1266 T2 = rio[WS(rs, 4)];
Chris@82 1267 T3 = T1 + T2;
Chris@82 1268 Ty = T1 - T2;
Chris@82 1269 Th = iio[0];
Chris@82 1270 Ti = iio[WS(rs, 4)];
Chris@82 1271 Tj = Th - Ti;
Chris@82 1272 TY = Th + Ti;
Chris@82 1273 }
Chris@82 1274 {
Chris@82 1275 E T4, T5, Tz, TA;
Chris@82 1276 T4 = rio[WS(rs, 2)];
Chris@82 1277 T5 = rio[WS(rs, 6)];
Chris@82 1278 T6 = T4 + T5;
Chris@82 1279 Tg = T4 - T5;
Chris@82 1280 Tz = iio[WS(rs, 2)];
Chris@82 1281 TA = iio[WS(rs, 6)];
Chris@82 1282 TB = Tz - TA;
Chris@82 1283 TZ = Tz + TA;
Chris@82 1284 }
Chris@82 1285 T7 = T3 + T6;
Chris@82 1286 T14 = T3 - T6;
Chris@82 1287 T1g = TY + TZ;
Chris@82 1288 Tk = Tg + Tj;
Chris@82 1289 TC = Ty - TB;
Chris@82 1290 TQ = Tj - Tg;
Chris@82 1291 T10 = TY - TZ;
Chris@82 1292 TM = Ty + TB;
Chris@82 1293 }
Chris@82 1294 {
Chris@82 1295 E T1s, T1I, T1L, T2n, T1v, T1D, T1G, T2o;
Chris@82 1296 {
Chris@82 1297 E T1q, T1r, T1J, T1K;
Chris@82 1298 T1q = rio[WS(vs, 1) + WS(rs, 1)];
Chris@82 1299 T1r = rio[WS(vs, 1) + WS(rs, 5)];
Chris@82 1300 T1s = T1q + T1r;
Chris@82 1301 T1I = T1q - T1r;
Chris@82 1302 T1J = iio[WS(vs, 1) + WS(rs, 1)];
Chris@82 1303 T1K = iio[WS(vs, 1) + WS(rs, 5)];
Chris@82 1304 T1L = T1J - T1K;
Chris@82 1305 T2n = T1J + T1K;
Chris@82 1306 }
Chris@82 1307 {
Chris@82 1308 E T1t, T1u, T1E, T1F;
Chris@82 1309 T1t = rio[WS(vs, 1) + WS(rs, 7)];
Chris@82 1310 T1u = rio[WS(vs, 1) + WS(rs, 3)];
Chris@82 1311 T1v = T1t + T1u;
Chris@82 1312 T1D = T1t - T1u;
Chris@82 1313 T1E = iio[WS(vs, 1) + WS(rs, 7)];
Chris@82 1314 T1F = iio[WS(vs, 1) + WS(rs, 3)];
Chris@82 1315 T1G = T1E - T1F;
Chris@82 1316 T2o = T1E + T1F;
Chris@82 1317 }
Chris@82 1318 T1w = T1s + T1v;
Chris@82 1319 T2p = T2n - T2o;
Chris@82 1320 T2z = T2n + T2o;
Chris@82 1321 T1H = T1D - T1G;
Chris@82 1322 T1M = T1I + T1L;
Chris@82 1323 T1W = T1D + T1G;
Chris@82 1324 T2j = T1v - T1s;
Chris@82 1325 T1V = T1L - T1I;
Chris@82 1326 }
Chris@82 1327 {
Chris@82 1328 E T7N, T8i, T83, T8I, T7Q, T80, T8l, T8J;
Chris@82 1329 {
Chris@82 1330 E T7L, T7M, T81, T82;
Chris@82 1331 T7L = rio[WS(vs, 6)];
Chris@82 1332 T7M = rio[WS(vs, 6) + WS(rs, 4)];
Chris@82 1333 T7N = T7L + T7M;
Chris@82 1334 T8i = T7L - T7M;
Chris@82 1335 T81 = iio[WS(vs, 6)];
Chris@82 1336 T82 = iio[WS(vs, 6) + WS(rs, 4)];
Chris@82 1337 T83 = T81 - T82;
Chris@82 1338 T8I = T81 + T82;
Chris@82 1339 }
Chris@82 1340 {
Chris@82 1341 E T7O, T7P, T8j, T8k;
Chris@82 1342 T7O = rio[WS(vs, 6) + WS(rs, 2)];
Chris@82 1343 T7P = rio[WS(vs, 6) + WS(rs, 6)];
Chris@82 1344 T7Q = T7O + T7P;
Chris@82 1345 T80 = T7O - T7P;
Chris@82 1346 T8j = iio[WS(vs, 6) + WS(rs, 2)];
Chris@82 1347 T8k = iio[WS(vs, 6) + WS(rs, 6)];
Chris@82 1348 T8l = T8j - T8k;
Chris@82 1349 T8J = T8j + T8k;
Chris@82 1350 }
Chris@82 1351 T7R = T7N + T7Q;
Chris@82 1352 T8O = T7N - T7Q;
Chris@82 1353 T90 = T8I + T8J;
Chris@82 1354 T84 = T80 + T83;
Chris@82 1355 T8m = T8i - T8l;
Chris@82 1356 T8A = T83 - T80;
Chris@82 1357 T8K = T8I - T8J;
Chris@82 1358 T8w = T8i + T8l;
Chris@82 1359 }
Chris@82 1360 {
Chris@82 1361 E T9c, T9s, T9v, Ta7, T9f, T9n, T9q, Ta8;
Chris@82 1362 {
Chris@82 1363 E T9a, T9b, T9t, T9u;
Chris@82 1364 T9a = rio[WS(vs, 7) + WS(rs, 1)];
Chris@82 1365 T9b = rio[WS(vs, 7) + WS(rs, 5)];
Chris@82 1366 T9c = T9a + T9b;
Chris@82 1367 T9s = T9a - T9b;
Chris@82 1368 T9t = iio[WS(vs, 7) + WS(rs, 1)];
Chris@82 1369 T9u = iio[WS(vs, 7) + WS(rs, 5)];
Chris@82 1370 T9v = T9t - T9u;
Chris@82 1371 Ta7 = T9t + T9u;
Chris@82 1372 }
Chris@82 1373 {
Chris@82 1374 E T9d, T9e, T9o, T9p;
Chris@82 1375 T9d = rio[WS(vs, 7) + WS(rs, 7)];
Chris@82 1376 T9e = rio[WS(vs, 7) + WS(rs, 3)];
Chris@82 1377 T9f = T9d + T9e;
Chris@82 1378 T9n = T9d - T9e;
Chris@82 1379 T9o = iio[WS(vs, 7) + WS(rs, 7)];
Chris@82 1380 T9p = iio[WS(vs, 7) + WS(rs, 3)];
Chris@82 1381 T9q = T9o - T9p;
Chris@82 1382 Ta8 = T9o + T9p;
Chris@82 1383 }
Chris@82 1384 T9g = T9c + T9f;
Chris@82 1385 Ta9 = Ta7 - Ta8;
Chris@82 1386 Taj = Ta7 + Ta8;
Chris@82 1387 T9r = T9n - T9q;
Chris@82 1388 T9w = T9s + T9v;
Chris@82 1389 T9G = T9n + T9q;
Chris@82 1390 Ta3 = T9f - T9c;
Chris@82 1391 T9F = T9v - T9s;
Chris@82 1392 }
Chris@82 1393 {
Chris@82 1394 E Ta, Tq, Tt, T15, Td, Tl, To, T16;
Chris@82 1395 {
Chris@82 1396 E T8, T9, Tr, Ts;
Chris@82 1397 T8 = rio[WS(rs, 1)];
Chris@82 1398 T9 = rio[WS(rs, 5)];
Chris@82 1399 Ta = T8 + T9;
Chris@82 1400 Tq = T8 - T9;
Chris@82 1401 Tr = iio[WS(rs, 1)];
Chris@82 1402 Ts = iio[WS(rs, 5)];
Chris@82 1403 Tt = Tr - Ts;
Chris@82 1404 T15 = Tr + Ts;
Chris@82 1405 }
Chris@82 1406 {
Chris@82 1407 E Tb, Tc, Tm, Tn;
Chris@82 1408 Tb = rio[WS(rs, 7)];
Chris@82 1409 Tc = rio[WS(rs, 3)];
Chris@82 1410 Td = Tb + Tc;
Chris@82 1411 Tl = Tb - Tc;
Chris@82 1412 Tm = iio[WS(rs, 7)];
Chris@82 1413 Tn = iio[WS(rs, 3)];
Chris@82 1414 To = Tm - Tn;
Chris@82 1415 T16 = Tm + Tn;
Chris@82 1416 }
Chris@82 1417 Te = Ta + Td;
Chris@82 1418 T17 = T15 - T16;
Chris@82 1419 T1h = T15 + T16;
Chris@82 1420 Tp = Tl - To;
Chris@82 1421 Tu = Tq + Tt;
Chris@82 1422 TE = Tl + To;
Chris@82 1423 T11 = Td - Ta;
Chris@82 1424 TD = Tt - Tq;
Chris@82 1425 }
Chris@82 1426 {
Chris@82 1427 E T1l, T1Q, T1B, T2g, T1o, T1y, T1T, T2h;
Chris@82 1428 {
Chris@82 1429 E T1j, T1k, T1z, T1A;
Chris@82 1430 T1j = rio[WS(vs, 1)];
Chris@82 1431 T1k = rio[WS(vs, 1) + WS(rs, 4)];
Chris@82 1432 T1l = T1j + T1k;
Chris@82 1433 T1Q = T1j - T1k;
Chris@82 1434 T1z = iio[WS(vs, 1)];
Chris@82 1435 T1A = iio[WS(vs, 1) + WS(rs, 4)];
Chris@82 1436 T1B = T1z - T1A;
Chris@82 1437 T2g = T1z + T1A;
Chris@82 1438 }
Chris@82 1439 {
Chris@82 1440 E T1m, T1n, T1R, T1S;
Chris@82 1441 T1m = rio[WS(vs, 1) + WS(rs, 2)];
Chris@82 1442 T1n = rio[WS(vs, 1) + WS(rs, 6)];
Chris@82 1443 T1o = T1m + T1n;
Chris@82 1444 T1y = T1m - T1n;
Chris@82 1445 T1R = iio[WS(vs, 1) + WS(rs, 2)];
Chris@82 1446 T1S = iio[WS(vs, 1) + WS(rs, 6)];
Chris@82 1447 T1T = T1R - T1S;
Chris@82 1448 T2h = T1R + T1S;
Chris@82 1449 }
Chris@82 1450 T1p = T1l + T1o;
Chris@82 1451 T2m = T1l - T1o;
Chris@82 1452 T2y = T2g + T2h;
Chris@82 1453 T1C = T1y + T1B;
Chris@82 1454 T1U = T1Q - T1T;
Chris@82 1455 T28 = T1B - T1y;
Chris@82 1456 T2i = T2g - T2h;
Chris@82 1457 T24 = T1Q + T1T;
Chris@82 1458 }
Chris@82 1459 {
Chris@82 1460 E T7U, T8a, T8d, T8P, T7X, T85, T88, T8Q;
Chris@82 1461 {
Chris@82 1462 E T7S, T7T, T8b, T8c;
Chris@82 1463 T7S = rio[WS(vs, 6) + WS(rs, 1)];
Chris@82 1464 T7T = rio[WS(vs, 6) + WS(rs, 5)];
Chris@82 1465 T7U = T7S + T7T;
Chris@82 1466 T8a = T7S - T7T;
Chris@82 1467 T8b = iio[WS(vs, 6) + WS(rs, 1)];
Chris@82 1468 T8c = iio[WS(vs, 6) + WS(rs, 5)];
Chris@82 1469 T8d = T8b - T8c;
Chris@82 1470 T8P = T8b + T8c;
Chris@82 1471 }
Chris@82 1472 {
Chris@82 1473 E T7V, T7W, T86, T87;
Chris@82 1474 T7V = rio[WS(vs, 6) + WS(rs, 7)];
Chris@82 1475 T7W = rio[WS(vs, 6) + WS(rs, 3)];
Chris@82 1476 T7X = T7V + T7W;
Chris@82 1477 T85 = T7V - T7W;
Chris@82 1478 T86 = iio[WS(vs, 6) + WS(rs, 7)];
Chris@82 1479 T87 = iio[WS(vs, 6) + WS(rs, 3)];
Chris@82 1480 T88 = T86 - T87;
Chris@82 1481 T8Q = T86 + T87;
Chris@82 1482 }
Chris@82 1483 T7Y = T7U + T7X;
Chris@82 1484 T8R = T8P - T8Q;
Chris@82 1485 T91 = T8P + T8Q;
Chris@82 1486 T89 = T85 - T88;
Chris@82 1487 T8e = T8a + T8d;
Chris@82 1488 T8o = T85 + T88;
Chris@82 1489 T8L = T7X - T7U;
Chris@82 1490 T8n = T8d - T8a;
Chris@82 1491 }
Chris@82 1492 {
Chris@82 1493 E T95, T9A, T9l, Ta0, T98, T9i, T9D, Ta1;
Chris@82 1494 {
Chris@82 1495 E T93, T94, T9j, T9k;
Chris@82 1496 T93 = rio[WS(vs, 7)];
Chris@82 1497 T94 = rio[WS(vs, 7) + WS(rs, 4)];
Chris@82 1498 T95 = T93 + T94;
Chris@82 1499 T9A = T93 - T94;
Chris@82 1500 T9j = iio[WS(vs, 7)];
Chris@82 1501 T9k = iio[WS(vs, 7) + WS(rs, 4)];
Chris@82 1502 T9l = T9j - T9k;
Chris@82 1503 Ta0 = T9j + T9k;
Chris@82 1504 }
Chris@82 1505 {
Chris@82 1506 E T96, T97, T9B, T9C;
Chris@82 1507 T96 = rio[WS(vs, 7) + WS(rs, 2)];
Chris@82 1508 T97 = rio[WS(vs, 7) + WS(rs, 6)];
Chris@82 1509 T98 = T96 + T97;
Chris@82 1510 T9i = T96 - T97;
Chris@82 1511 T9B = iio[WS(vs, 7) + WS(rs, 2)];
Chris@82 1512 T9C = iio[WS(vs, 7) + WS(rs, 6)];
Chris@82 1513 T9D = T9B - T9C;
Chris@82 1514 Ta1 = T9B + T9C;
Chris@82 1515 }
Chris@82 1516 T99 = T95 + T98;
Chris@82 1517 Ta6 = T95 - T98;
Chris@82 1518 Tai = Ta0 + Ta1;
Chris@82 1519 T9m = T9i + T9l;
Chris@82 1520 T9E = T9A - T9D;
Chris@82 1521 T9S = T9l - T9i;
Chris@82 1522 Ta2 = Ta0 - Ta1;
Chris@82 1523 T9O = T9A + T9D;
Chris@82 1524 }
Chris@82 1525 {
Chris@82 1526 E T2D, T38, T2T, T3y, T2G, T2Q, T3b, T3z;
Chris@82 1527 {
Chris@82 1528 E T2B, T2C, T2R, T2S;
Chris@82 1529 T2B = rio[WS(vs, 2)];
Chris@82 1530 T2C = rio[WS(vs, 2) + WS(rs, 4)];
Chris@82 1531 T2D = T2B + T2C;
Chris@82 1532 T38 = T2B - T2C;
Chris@82 1533 T2R = iio[WS(vs, 2)];
Chris@82 1534 T2S = iio[WS(vs, 2) + WS(rs, 4)];
Chris@82 1535 T2T = T2R - T2S;
Chris@82 1536 T3y = T2R + T2S;
Chris@82 1537 }
Chris@82 1538 {
Chris@82 1539 E T2E, T2F, T39, T3a;
Chris@82 1540 T2E = rio[WS(vs, 2) + WS(rs, 2)];
Chris@82 1541 T2F = rio[WS(vs, 2) + WS(rs, 6)];
Chris@82 1542 T2G = T2E + T2F;
Chris@82 1543 T2Q = T2E - T2F;
Chris@82 1544 T39 = iio[WS(vs, 2) + WS(rs, 2)];
Chris@82 1545 T3a = iio[WS(vs, 2) + WS(rs, 6)];
Chris@82 1546 T3b = T39 - T3a;
Chris@82 1547 T3z = T39 + T3a;
Chris@82 1548 }
Chris@82 1549 T2H = T2D + T2G;
Chris@82 1550 T3E = T2D - T2G;
Chris@82 1551 T3Q = T3y + T3z;
Chris@82 1552 T2U = T2Q + T2T;
Chris@82 1553 T3c = T38 - T3b;
Chris@82 1554 T3q = T2T - T2Q;
Chris@82 1555 T3A = T3y - T3z;
Chris@82 1556 T3m = T38 + T3b;
Chris@82 1557 }
Chris@82 1558 {
Chris@82 1559 E T42, T4i, T4l, T4X, T45, T4d, T4g, T4Y;
Chris@82 1560 {
Chris@82 1561 E T40, T41, T4j, T4k;
Chris@82 1562 T40 = rio[WS(vs, 3) + WS(rs, 1)];
Chris@82 1563 T41 = rio[WS(vs, 3) + WS(rs, 5)];
Chris@82 1564 T42 = T40 + T41;
Chris@82 1565 T4i = T40 - T41;
Chris@82 1566 T4j = iio[WS(vs, 3) + WS(rs, 1)];
Chris@82 1567 T4k = iio[WS(vs, 3) + WS(rs, 5)];
Chris@82 1568 T4l = T4j - T4k;
Chris@82 1569 T4X = T4j + T4k;
Chris@82 1570 }
Chris@82 1571 {
Chris@82 1572 E T43, T44, T4e, T4f;
Chris@82 1573 T43 = rio[WS(vs, 3) + WS(rs, 7)];
Chris@82 1574 T44 = rio[WS(vs, 3) + WS(rs, 3)];
Chris@82 1575 T45 = T43 + T44;
Chris@82 1576 T4d = T43 - T44;
Chris@82 1577 T4e = iio[WS(vs, 3) + WS(rs, 7)];
Chris@82 1578 T4f = iio[WS(vs, 3) + WS(rs, 3)];
Chris@82 1579 T4g = T4e - T4f;
Chris@82 1580 T4Y = T4e + T4f;
Chris@82 1581 }
Chris@82 1582 T46 = T42 + T45;
Chris@82 1583 T4Z = T4X - T4Y;
Chris@82 1584 T59 = T4X + T4Y;
Chris@82 1585 T4h = T4d - T4g;
Chris@82 1586 T4m = T4i + T4l;
Chris@82 1587 T4w = T4d + T4g;
Chris@82 1588 T4T = T45 - T42;
Chris@82 1589 T4v = T4l - T4i;
Chris@82 1590 }
Chris@82 1591 {
Chris@82 1592 E T5d, T5I, T5t, T68, T5g, T5q, T5L, T69;
Chris@82 1593 {
Chris@82 1594 E T5b, T5c, T5r, T5s;
Chris@82 1595 T5b = rio[WS(vs, 4)];
Chris@82 1596 T5c = rio[WS(vs, 4) + WS(rs, 4)];
Chris@82 1597 T5d = T5b + T5c;
Chris@82 1598 T5I = T5b - T5c;
Chris@82 1599 T5r = iio[WS(vs, 4)];
Chris@82 1600 T5s = iio[WS(vs, 4) + WS(rs, 4)];
Chris@82 1601 T5t = T5r - T5s;
Chris@82 1602 T68 = T5r + T5s;
Chris@82 1603 }
Chris@82 1604 {
Chris@82 1605 E T5e, T5f, T5J, T5K;
Chris@82 1606 T5e = rio[WS(vs, 4) + WS(rs, 2)];
Chris@82 1607 T5f = rio[WS(vs, 4) + WS(rs, 6)];
Chris@82 1608 T5g = T5e + T5f;
Chris@82 1609 T5q = T5e - T5f;
Chris@82 1610 T5J = iio[WS(vs, 4) + WS(rs, 2)];
Chris@82 1611 T5K = iio[WS(vs, 4) + WS(rs, 6)];
Chris@82 1612 T5L = T5J - T5K;
Chris@82 1613 T69 = T5J + T5K;
Chris@82 1614 }
Chris@82 1615 T5h = T5d + T5g;
Chris@82 1616 T6e = T5d - T5g;
Chris@82 1617 T6q = T68 + T69;
Chris@82 1618 T5u = T5q + T5t;
Chris@82 1619 T5M = T5I - T5L;
Chris@82 1620 T60 = T5t - T5q;
Chris@82 1621 T6a = T68 - T69;
Chris@82 1622 T5W = T5I + T5L;
Chris@82 1623 }
Chris@82 1624 {
Chris@82 1625 E T6C, T6S, T6V, T7x, T6F, T6N, T6Q, T7y;
Chris@82 1626 {
Chris@82 1627 E T6A, T6B, T6T, T6U;
Chris@82 1628 T6A = rio[WS(vs, 5) + WS(rs, 1)];
Chris@82 1629 T6B = rio[WS(vs, 5) + WS(rs, 5)];
Chris@82 1630 T6C = T6A + T6B;
Chris@82 1631 T6S = T6A - T6B;
Chris@82 1632 T6T = iio[WS(vs, 5) + WS(rs, 1)];
Chris@82 1633 T6U = iio[WS(vs, 5) + WS(rs, 5)];
Chris@82 1634 T6V = T6T - T6U;
Chris@82 1635 T7x = T6T + T6U;
Chris@82 1636 }
Chris@82 1637 {
Chris@82 1638 E T6D, T6E, T6O, T6P;
Chris@82 1639 T6D = rio[WS(vs, 5) + WS(rs, 7)];
Chris@82 1640 T6E = rio[WS(vs, 5) + WS(rs, 3)];
Chris@82 1641 T6F = T6D + T6E;
Chris@82 1642 T6N = T6D - T6E;
Chris@82 1643 T6O = iio[WS(vs, 5) + WS(rs, 7)];
Chris@82 1644 T6P = iio[WS(vs, 5) + WS(rs, 3)];
Chris@82 1645 T6Q = T6O - T6P;
Chris@82 1646 T7y = T6O + T6P;
Chris@82 1647 }
Chris@82 1648 T6G = T6C + T6F;
Chris@82 1649 T7z = T7x - T7y;
Chris@82 1650 T7J = T7x + T7y;
Chris@82 1651 T6R = T6N - T6Q;
Chris@82 1652 T6W = T6S + T6V;
Chris@82 1653 T76 = T6N + T6Q;
Chris@82 1654 T7t = T6F - T6C;
Chris@82 1655 T75 = T6V - T6S;
Chris@82 1656 }
Chris@82 1657 {
Chris@82 1658 E T2K, T30, T33, T3F, T2N, T2V, T2Y, T3G;
Chris@82 1659 {
Chris@82 1660 E T2I, T2J, T31, T32;
Chris@82 1661 T2I = rio[WS(vs, 2) + WS(rs, 1)];
Chris@82 1662 T2J = rio[WS(vs, 2) + WS(rs, 5)];
Chris@82 1663 T2K = T2I + T2J;
Chris@82 1664 T30 = T2I - T2J;
Chris@82 1665 T31 = iio[WS(vs, 2) + WS(rs, 1)];
Chris@82 1666 T32 = iio[WS(vs, 2) + WS(rs, 5)];
Chris@82 1667 T33 = T31 - T32;
Chris@82 1668 T3F = T31 + T32;
Chris@82 1669 }
Chris@82 1670 {
Chris@82 1671 E T2L, T2M, T2W, T2X;
Chris@82 1672 T2L = rio[WS(vs, 2) + WS(rs, 7)];
Chris@82 1673 T2M = rio[WS(vs, 2) + WS(rs, 3)];
Chris@82 1674 T2N = T2L + T2M;
Chris@82 1675 T2V = T2L - T2M;
Chris@82 1676 T2W = iio[WS(vs, 2) + WS(rs, 7)];
Chris@82 1677 T2X = iio[WS(vs, 2) + WS(rs, 3)];
Chris@82 1678 T2Y = T2W - T2X;
Chris@82 1679 T3G = T2W + T2X;
Chris@82 1680 }
Chris@82 1681 T2O = T2K + T2N;
Chris@82 1682 T3H = T3F - T3G;
Chris@82 1683 T3R = T3F + T3G;
Chris@82 1684 T2Z = T2V - T2Y;
Chris@82 1685 T34 = T30 + T33;
Chris@82 1686 T3e = T2V + T2Y;
Chris@82 1687 T3B = T2N - T2K;
Chris@82 1688 T3d = T33 - T30;
Chris@82 1689 }
Chris@82 1690 {
Chris@82 1691 E T3V, T4q, T4b, T4Q, T3Y, T48, T4t, T4R;
Chris@82 1692 {
Chris@82 1693 E T3T, T3U, T49, T4a;
Chris@82 1694 T3T = rio[WS(vs, 3)];
Chris@82 1695 T3U = rio[WS(vs, 3) + WS(rs, 4)];
Chris@82 1696 T3V = T3T + T3U;
Chris@82 1697 T4q = T3T - T3U;
Chris@82 1698 T49 = iio[WS(vs, 3)];
Chris@82 1699 T4a = iio[WS(vs, 3) + WS(rs, 4)];
Chris@82 1700 T4b = T49 - T4a;
Chris@82 1701 T4Q = T49 + T4a;
Chris@82 1702 }
Chris@82 1703 {
Chris@82 1704 E T3W, T3X, T4r, T4s;
Chris@82 1705 T3W = rio[WS(vs, 3) + WS(rs, 2)];
Chris@82 1706 T3X = rio[WS(vs, 3) + WS(rs, 6)];
Chris@82 1707 T3Y = T3W + T3X;
Chris@82 1708 T48 = T3W - T3X;
Chris@82 1709 T4r = iio[WS(vs, 3) + WS(rs, 2)];
Chris@82 1710 T4s = iio[WS(vs, 3) + WS(rs, 6)];
Chris@82 1711 T4t = T4r - T4s;
Chris@82 1712 T4R = T4r + T4s;
Chris@82 1713 }
Chris@82 1714 T3Z = T3V + T3Y;
Chris@82 1715 T4W = T3V - T3Y;
Chris@82 1716 T58 = T4Q + T4R;
Chris@82 1717 T4c = T48 + T4b;
Chris@82 1718 T4u = T4q - T4t;
Chris@82 1719 T4I = T4b - T48;
Chris@82 1720 T4S = T4Q - T4R;
Chris@82 1721 T4E = T4q + T4t;
Chris@82 1722 }
Chris@82 1723 {
Chris@82 1724 E T5k, T5A, T5D, T6f, T5n, T5v, T5y, T6g;
Chris@82 1725 {
Chris@82 1726 E T5i, T5j, T5B, T5C;
Chris@82 1727 T5i = rio[WS(vs, 4) + WS(rs, 1)];
Chris@82 1728 T5j = rio[WS(vs, 4) + WS(rs, 5)];
Chris@82 1729 T5k = T5i + T5j;
Chris@82 1730 T5A = T5i - T5j;
Chris@82 1731 T5B = iio[WS(vs, 4) + WS(rs, 1)];
Chris@82 1732 T5C = iio[WS(vs, 4) + WS(rs, 5)];
Chris@82 1733 T5D = T5B - T5C;
Chris@82 1734 T6f = T5B + T5C;
Chris@82 1735 }
Chris@82 1736 {
Chris@82 1737 E T5l, T5m, T5w, T5x;
Chris@82 1738 T5l = rio[WS(vs, 4) + WS(rs, 7)];
Chris@82 1739 T5m = rio[WS(vs, 4) + WS(rs, 3)];
Chris@82 1740 T5n = T5l + T5m;
Chris@82 1741 T5v = T5l - T5m;
Chris@82 1742 T5w = iio[WS(vs, 4) + WS(rs, 7)];
Chris@82 1743 T5x = iio[WS(vs, 4) + WS(rs, 3)];
Chris@82 1744 T5y = T5w - T5x;
Chris@82 1745 T6g = T5w + T5x;
Chris@82 1746 }
Chris@82 1747 T5o = T5k + T5n;
Chris@82 1748 T6h = T6f - T6g;
Chris@82 1749 T6r = T6f + T6g;
Chris@82 1750 T5z = T5v - T5y;
Chris@82 1751 T5E = T5A + T5D;
Chris@82 1752 T5O = T5v + T5y;
Chris@82 1753 T6b = T5n - T5k;
Chris@82 1754 T5N = T5D - T5A;
Chris@82 1755 }
Chris@82 1756 {
Chris@82 1757 E T6v, T70, T6L, T7q, T6y, T6I, T73, T7r;
Chris@82 1758 {
Chris@82 1759 E T6t, T6u, T6J, T6K;
Chris@82 1760 T6t = rio[WS(vs, 5)];
Chris@82 1761 T6u = rio[WS(vs, 5) + WS(rs, 4)];
Chris@82 1762 T6v = T6t + T6u;
Chris@82 1763 T70 = T6t - T6u;
Chris@82 1764 T6J = iio[WS(vs, 5)];
Chris@82 1765 T6K = iio[WS(vs, 5) + WS(rs, 4)];
Chris@82 1766 T6L = T6J - T6K;
Chris@82 1767 T7q = T6J + T6K;
Chris@82 1768 }
Chris@82 1769 {
Chris@82 1770 E T6w, T6x, T71, T72;
Chris@82 1771 T6w = rio[WS(vs, 5) + WS(rs, 2)];
Chris@82 1772 T6x = rio[WS(vs, 5) + WS(rs, 6)];
Chris@82 1773 T6y = T6w + T6x;
Chris@82 1774 T6I = T6w - T6x;
Chris@82 1775 T71 = iio[WS(vs, 5) + WS(rs, 2)];
Chris@82 1776 T72 = iio[WS(vs, 5) + WS(rs, 6)];
Chris@82 1777 T73 = T71 - T72;
Chris@82 1778 T7r = T71 + T72;
Chris@82 1779 }
Chris@82 1780 T6z = T6v + T6y;
Chris@82 1781 T7w = T6v - T6y;
Chris@82 1782 T7I = T7q + T7r;
Chris@82 1783 T6M = T6I + T6L;
Chris@82 1784 T74 = T70 - T73;
Chris@82 1785 T7i = T6L - T6I;
Chris@82 1786 T7s = T7q - T7r;
Chris@82 1787 T7e = T70 + T73;
Chris@82 1788 }
Chris@82 1789 rio[0] = T7 + Te;
Chris@82 1790 iio[0] = T1g + T1h;
Chris@82 1791 rio[WS(rs, 1)] = T1p + T1w;
Chris@82 1792 iio[WS(rs, 1)] = T2y + T2z;
Chris@82 1793 rio[WS(rs, 3)] = T3Z + T46;
Chris@82 1794 rio[WS(rs, 2)] = T2H + T2O;
Chris@82 1795 iio[WS(rs, 2)] = T3Q + T3R;
Chris@82 1796 iio[WS(rs, 3)] = T58 + T59;
Chris@82 1797 rio[WS(rs, 6)] = T7R + T7Y;
Chris@82 1798 iio[WS(rs, 6)] = T90 + T91;
Chris@82 1799 iio[WS(rs, 5)] = T7I + T7J;
Chris@82 1800 rio[WS(rs, 5)] = T6z + T6G;
Chris@82 1801 iio[WS(rs, 4)] = T6q + T6r;
Chris@82 1802 rio[WS(rs, 4)] = T5h + T5o;
Chris@82 1803 rio[WS(rs, 7)] = T99 + T9g;
Chris@82 1804 iio[WS(rs, 7)] = Tai + Taj;
Chris@82 1805 {
Chris@82 1806 E T12, T18, TX, T13;
Chris@82 1807 T12 = T10 - T11;
Chris@82 1808 T18 = T14 - T17;
Chris@82 1809 TX = W[10];
Chris@82 1810 T13 = W[11];
Chris@82 1811 iio[WS(vs, 6)] = FNMS(T13, T18, TX * T12);
Chris@82 1812 rio[WS(vs, 6)] = FMA(T13, T12, TX * T18);
Chris@82 1813 }
Chris@82 1814 {
Chris@82 1815 E Tag, Tak, Taf, Tah;
Chris@82 1816 Tag = T99 - T9g;
Chris@82 1817 Tak = Tai - Taj;
Chris@82 1818 Taf = W[6];
Chris@82 1819 Tah = W[7];
Chris@82 1820 rio[WS(vs, 4) + WS(rs, 7)] = FMA(Taf, Tag, Tah * Tak);
Chris@82 1821 iio[WS(vs, 4) + WS(rs, 7)] = FNMS(Tah, Tag, Taf * Tak);
Chris@82 1822 }
Chris@82 1823 {
Chris@82 1824 E T8M, T8S, T8H, T8N;
Chris@82 1825 T8M = T8K - T8L;
Chris@82 1826 T8S = T8O - T8R;
Chris@82 1827 T8H = W[10];
Chris@82 1828 T8N = W[11];
Chris@82 1829 iio[WS(vs, 6) + WS(rs, 6)] = FNMS(T8N, T8S, T8H * T8M);
Chris@82 1830 rio[WS(vs, 6) + WS(rs, 6)] = FMA(T8N, T8M, T8H * T8S);
Chris@82 1831 }
Chris@82 1832 {
Chris@82 1833 E T2k, T2q, T2f, T2l;
Chris@82 1834 T2k = T2i - T2j;
Chris@82 1835 T2q = T2m - T2p;
Chris@82 1836 T2f = W[10];
Chris@82 1837 T2l = W[11];
Chris@82 1838 iio[WS(vs, 6) + WS(rs, 1)] = FNMS(T2l, T2q, T2f * T2k);
Chris@82 1839 rio[WS(vs, 6) + WS(rs, 1)] = FMA(T2l, T2k, T2f * T2q);
Chris@82 1840 }
Chris@82 1841 {
Chris@82 1842 E Ta4, Taa, T9Z, Ta5;
Chris@82 1843 Ta4 = Ta2 - Ta3;
Chris@82 1844 Taa = Ta6 - Ta9;
Chris@82 1845 T9Z = W[10];
Chris@82 1846 Ta5 = W[11];
Chris@82 1847 iio[WS(vs, 6) + WS(rs, 7)] = FNMS(Ta5, Taa, T9Z * Ta4);
Chris@82 1848 rio[WS(vs, 6) + WS(rs, 7)] = FMA(Ta5, Ta4, T9Z * Taa);
Chris@82 1849 }
Chris@82 1850 {
Chris@82 1851 E T8Y, T92, T8X, T8Z;
Chris@82 1852 T8Y = T7R - T7Y;
Chris@82 1853 T92 = T90 - T91;
Chris@82 1854 T8X = W[6];
Chris@82 1855 T8Z = W[7];
Chris@82 1856 rio[WS(vs, 4) + WS(rs, 6)] = FMA(T8X, T8Y, T8Z * T92);
Chris@82 1857 iio[WS(vs, 4) + WS(rs, 6)] = FNMS(T8Z, T8Y, T8X * T92);
Chris@82 1858 }
Chris@82 1859 {
Chris@82 1860 E T2w, T2A, T2v, T2x;
Chris@82 1861 T2w = T1p - T1w;
Chris@82 1862 T2A = T2y - T2z;
Chris@82 1863 T2v = W[6];
Chris@82 1864 T2x = W[7];
Chris@82 1865 rio[WS(vs, 4) + WS(rs, 1)] = FMA(T2v, T2w, T2x * T2A);
Chris@82 1866 iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T2x, T2w, T2v * T2A);
Chris@82 1867 }
Chris@82 1868 {
Chris@82 1869 E Tac, Tae, Tab, Tad;
Chris@82 1870 Tac = Ta3 + Ta2;
Chris@82 1871 Tae = Ta6 + Ta9;
Chris@82 1872 Tab = W[2];
Chris@82 1873 Tad = W[3];
Chris@82 1874 iio[WS(vs, 2) + WS(rs, 7)] = FNMS(Tad, Tae, Tab * Tac);
Chris@82 1875 rio[WS(vs, 2) + WS(rs, 7)] = FMA(Tad, Tac, Tab * Tae);
Chris@82 1876 }
Chris@82 1877 {
Chris@82 1878 E T8U, T8W, T8T, T8V;
Chris@82 1879 T8U = T8L + T8K;
Chris@82 1880 T8W = T8O + T8R;
Chris@82 1881 T8T = W[2];
Chris@82 1882 T8V = W[3];
Chris@82 1883 iio[WS(vs, 2) + WS(rs, 6)] = FNMS(T8V, T8W, T8T * T8U);
Chris@82 1884 rio[WS(vs, 2) + WS(rs, 6)] = FMA(T8V, T8U, T8T * T8W);
Chris@82 1885 }
Chris@82 1886 {
Chris@82 1887 E T1a, T1c, T19, T1b;
Chris@82 1888 T1a = T11 + T10;
Chris@82 1889 T1c = T14 + T17;
Chris@82 1890 T19 = W[2];
Chris@82 1891 T1b = W[3];
Chris@82 1892 iio[WS(vs, 2)] = FNMS(T1b, T1c, T19 * T1a);
Chris@82 1893 rio[WS(vs, 2)] = FMA(T1b, T1a, T19 * T1c);
Chris@82 1894 }
Chris@82 1895 {
Chris@82 1896 E T1e, T1i, T1d, T1f;
Chris@82 1897 T1e = T7 - Te;
Chris@82 1898 T1i = T1g - T1h;
Chris@82 1899 T1d = W[6];
Chris@82 1900 T1f = W[7];
Chris@82 1901 rio[WS(vs, 4)] = FMA(T1d, T1e, T1f * T1i);
Chris@82 1902 iio[WS(vs, 4)] = FNMS(T1f, T1e, T1d * T1i);
Chris@82 1903 }
Chris@82 1904 {
Chris@82 1905 E T2s, T2u, T2r, T2t;
Chris@82 1906 T2s = T2j + T2i;
Chris@82 1907 T2u = T2m + T2p;
Chris@82 1908 T2r = W[2];
Chris@82 1909 T2t = W[3];
Chris@82 1910 iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T2t, T2u, T2r * T2s);
Chris@82 1911 rio[WS(vs, 2) + WS(rs, 1)] = FMA(T2t, T2s, T2r * T2u);
Chris@82 1912 }
Chris@82 1913 {
Chris@82 1914 E T3C, T3I, T3x, T3D;
Chris@82 1915 T3C = T3A - T3B;
Chris@82 1916 T3I = T3E - T3H;
Chris@82 1917 T3x = W[10];
Chris@82 1918 T3D = W[11];
Chris@82 1919 iio[WS(vs, 6) + WS(rs, 2)] = FNMS(T3D, T3I, T3x * T3C);
Chris@82 1920 rio[WS(vs, 6) + WS(rs, 2)] = FMA(T3D, T3C, T3x * T3I);
Chris@82 1921 }
Chris@82 1922 {
Chris@82 1923 E T4U, T50, T4P, T4V;
Chris@82 1924 T4U = T4S - T4T;
Chris@82 1925 T50 = T4W - T4Z;
Chris@82 1926 T4P = W[10];
Chris@82 1927 T4V = W[11];
Chris@82 1928 iio[WS(vs, 6) + WS(rs, 3)] = FNMS(T4V, T50, T4P * T4U);
Chris@82 1929 rio[WS(vs, 6) + WS(rs, 3)] = FMA(T4V, T4U, T4P * T50);
Chris@82 1930 }
Chris@82 1931 {
Chris@82 1932 E T56, T5a, T55, T57;
Chris@82 1933 T56 = T3Z - T46;
Chris@82 1934 T5a = T58 - T59;
Chris@82 1935 T55 = W[6];
Chris@82 1936 T57 = W[7];
Chris@82 1937 rio[WS(vs, 4) + WS(rs, 3)] = FMA(T55, T56, T57 * T5a);
Chris@82 1938 iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T57, T56, T55 * T5a);
Chris@82 1939 }
Chris@82 1940 {
Chris@82 1941 E T6o, T6s, T6n, T6p;
Chris@82 1942 T6o = T5h - T5o;
Chris@82 1943 T6s = T6q - T6r;
Chris@82 1944 T6n = W[6];
Chris@82 1945 T6p = W[7];
Chris@82 1946 rio[WS(vs, 4) + WS(rs, 4)] = FMA(T6n, T6o, T6p * T6s);
Chris@82 1947 iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T6p, T6o, T6n * T6s);
Chris@82 1948 }
Chris@82 1949 {
Chris@82 1950 E T7u, T7A, T7p, T7v;
Chris@82 1951 T7u = T7s - T7t;
Chris@82 1952 T7A = T7w - T7z;
Chris@82 1953 T7p = W[10];
Chris@82 1954 T7v = W[11];
Chris@82 1955 iio[WS(vs, 6) + WS(rs, 5)] = FNMS(T7v, T7A, T7p * T7u);
Chris@82 1956 rio[WS(vs, 6) + WS(rs, 5)] = FMA(T7v, T7u, T7p * T7A);
Chris@82 1957 }
Chris@82 1958 {
Chris@82 1959 E T6c, T6i, T67, T6d;
Chris@82 1960 T6c = T6a - T6b;
Chris@82 1961 T6i = T6e - T6h;
Chris@82 1962 T67 = W[10];
Chris@82 1963 T6d = W[11];
Chris@82 1964 iio[WS(vs, 6) + WS(rs, 4)] = FNMS(T6d, T6i, T67 * T6c);
Chris@82 1965 rio[WS(vs, 6) + WS(rs, 4)] = FMA(T6d, T6c, T67 * T6i);
Chris@82 1966 }
Chris@82 1967 {
Chris@82 1968 E T7G, T7K, T7F, T7H;
Chris@82 1969 T7G = T6z - T6G;
Chris@82 1970 T7K = T7I - T7J;
Chris@82 1971 T7F = W[6];
Chris@82 1972 T7H = W[7];
Chris@82 1973 rio[WS(vs, 4) + WS(rs, 5)] = FMA(T7F, T7G, T7H * T7K);
Chris@82 1974 iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T7H, T7G, T7F * T7K);
Chris@82 1975 }
Chris@82 1976 {
Chris@82 1977 E T3O, T3S, T3N, T3P;
Chris@82 1978 T3O = T2H - T2O;
Chris@82 1979 T3S = T3Q - T3R;
Chris@82 1980 T3N = W[6];
Chris@82 1981 T3P = W[7];
Chris@82 1982 rio[WS(vs, 4) + WS(rs, 2)] = FMA(T3N, T3O, T3P * T3S);
Chris@82 1983 iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T3P, T3O, T3N * T3S);
Chris@82 1984 }
Chris@82 1985 {
Chris@82 1986 E T3K, T3M, T3J, T3L;
Chris@82 1987 T3K = T3B + T3A;
Chris@82 1988 T3M = T3E + T3H;
Chris@82 1989 T3J = W[2];
Chris@82 1990 T3L = W[3];
Chris@82 1991 iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T3L, T3M, T3J * T3K);
Chris@82 1992 rio[WS(vs, 2) + WS(rs, 2)] = FMA(T3L, T3K, T3J * T3M);
Chris@82 1993 }
Chris@82 1994 {
Chris@82 1995 E T7C, T7E, T7B, T7D;
Chris@82 1996 T7C = T7t + T7s;
Chris@82 1997 T7E = T7w + T7z;
Chris@82 1998 T7B = W[2];
Chris@82 1999 T7D = W[3];
Chris@82 2000 iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T7D, T7E, T7B * T7C);
Chris@82 2001 rio[WS(vs, 2) + WS(rs, 5)] = FMA(T7D, T7C, T7B * T7E);
Chris@82 2002 }
Chris@82 2003 {
Chris@82 2004 E T6k, T6m, T6j, T6l;
Chris@82 2005 T6k = T6b + T6a;
Chris@82 2006 T6m = T6e + T6h;
Chris@82 2007 T6j = W[2];
Chris@82 2008 T6l = W[3];
Chris@82 2009 iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T6l, T6m, T6j * T6k);
Chris@82 2010 rio[WS(vs, 2) + WS(rs, 4)] = FMA(T6l, T6k, T6j * T6m);
Chris@82 2011 }
Chris@82 2012 {
Chris@82 2013 E T52, T54, T51, T53;
Chris@82 2014 T52 = T4T + T4S;
Chris@82 2015 T54 = T4W + T4Z;
Chris@82 2016 T51 = W[2];
Chris@82 2017 T53 = W[3];
Chris@82 2018 iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T53, T54, T51 * T52);
Chris@82 2019 rio[WS(vs, 2) + WS(rs, 3)] = FMA(T53, T52, T51 * T54);
Chris@82 2020 }
Chris@82 2021 {
Chris@82 2022 E T5G, T5S, T5Q, T5U, T5F, T5P;
Chris@82 2023 T5F = KP707106781 * (T5z - T5E);
Chris@82 2024 T5G = T5u - T5F;
Chris@82 2025 T5S = T5u + T5F;
Chris@82 2026 T5P = KP707106781 * (T5N - T5O);
Chris@82 2027 T5Q = T5M - T5P;
Chris@82 2028 T5U = T5M + T5P;
Chris@82 2029 {
Chris@82 2030 E T5p, T5H, T5R, T5T;
Chris@82 2031 T5p = W[12];
Chris@82 2032 T5H = W[13];
Chris@82 2033 iio[WS(vs, 7) + WS(rs, 4)] = FNMS(T5H, T5Q, T5p * T5G);
Chris@82 2034 rio[WS(vs, 7) + WS(rs, 4)] = FMA(T5H, T5G, T5p * T5Q);
Chris@82 2035 T5R = W[4];
Chris@82 2036 T5T = W[5];
Chris@82 2037 iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T5T, T5U, T5R * T5S);
Chris@82 2038 rio[WS(vs, 3) + WS(rs, 4)] = FMA(T5T, T5S, T5R * T5U);
Chris@82 2039 }
Chris@82 2040 }
Chris@82 2041 {
Chris@82 2042 E Tw, TI, TG, TK, Tv, TF;
Chris@82 2043 Tv = KP707106781 * (Tp - Tu);
Chris@82 2044 Tw = Tk - Tv;
Chris@82 2045 TI = Tk + Tv;
Chris@82 2046 TF = KP707106781 * (TD - TE);
Chris@82 2047 TG = TC - TF;
Chris@82 2048 TK = TC + TF;
Chris@82 2049 {
Chris@82 2050 E Tf, Tx, TH, TJ;
Chris@82 2051 Tf = W[12];
Chris@82 2052 Tx = W[13];
Chris@82 2053 iio[WS(vs, 7)] = FNMS(Tx, TG, Tf * Tw);
Chris@82 2054 rio[WS(vs, 7)] = FMA(Tx, Tw, Tf * TG);
Chris@82 2055 TH = W[4];
Chris@82 2056 TJ = W[5];
Chris@82 2057 iio[WS(vs, 3)] = FNMS(TJ, TK, TH * TI);
Chris@82 2058 rio[WS(vs, 3)] = FMA(TJ, TI, TH * TK);
Chris@82 2059 }
Chris@82 2060 }
Chris@82 2061 {
Chris@82 2062 E T9Q, T9W, T9U, T9Y, T9P, T9T;
Chris@82 2063 T9P = KP707106781 * (T9w + T9r);
Chris@82 2064 T9Q = T9O - T9P;
Chris@82 2065 T9W = T9O + T9P;
Chris@82 2066 T9T = KP707106781 * (T9F + T9G);
Chris@82 2067 T9U = T9S - T9T;
Chris@82 2068 T9Y = T9S + T9T;
Chris@82 2069 {
Chris@82 2070 E T9N, T9R, T9V, T9X;
Chris@82 2071 T9N = W[8];
Chris@82 2072 T9R = W[9];
Chris@82 2073 rio[WS(vs, 5) + WS(rs, 7)] = FMA(T9N, T9Q, T9R * T9U);
Chris@82 2074 iio[WS(vs, 5) + WS(rs, 7)] = FNMS(T9R, T9Q, T9N * T9U);
Chris@82 2075 T9V = W[0];
Chris@82 2076 T9X = W[1];
Chris@82 2077 rio[WS(vs, 1) + WS(rs, 7)] = FMA(T9V, T9W, T9X * T9Y);
Chris@82 2078 iio[WS(vs, 1) + WS(rs, 7)] = FNMS(T9X, T9W, T9V * T9Y);
Chris@82 2079 }
Chris@82 2080 }
Chris@82 2081 {
Chris@82 2082 E T36, T3i, T3g, T3k, T35, T3f;
Chris@82 2083 T35 = KP707106781 * (T2Z - T34);
Chris@82 2084 T36 = T2U - T35;
Chris@82 2085 T3i = T2U + T35;
Chris@82 2086 T3f = KP707106781 * (T3d - T3e);
Chris@82 2087 T3g = T3c - T3f;
Chris@82 2088 T3k = T3c + T3f;
Chris@82 2089 {
Chris@82 2090 E T2P, T37, T3h, T3j;
Chris@82 2091 T2P = W[12];
Chris@82 2092 T37 = W[13];
Chris@82 2093 iio[WS(vs, 7) + WS(rs, 2)] = FNMS(T37, T3g, T2P * T36);
Chris@82 2094 rio[WS(vs, 7) + WS(rs, 2)] = FMA(T37, T36, T2P * T3g);
Chris@82 2095 T3h = W[4];
Chris@82 2096 T3j = W[5];
Chris@82 2097 iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T3j, T3k, T3h * T3i);
Chris@82 2098 rio[WS(vs, 3) + WS(rs, 2)] = FMA(T3j, T3i, T3h * T3k);
Chris@82 2099 }
Chris@82 2100 }
Chris@82 2101 {
Chris@82 2102 E T5Y, T64, T62, T66, T5X, T61;
Chris@82 2103 T5X = KP707106781 * (T5E + T5z);
Chris@82 2104 T5Y = T5W - T5X;
Chris@82 2105 T64 = T5W + T5X;
Chris@82 2106 T61 = KP707106781 * (T5N + T5O);
Chris@82 2107 T62 = T60 - T61;
Chris@82 2108 T66 = T60 + T61;
Chris@82 2109 {
Chris@82 2110 E T5V, T5Z, T63, T65;
Chris@82 2111 T5V = W[8];
Chris@82 2112 T5Z = W[9];
Chris@82 2113 rio[WS(vs, 5) + WS(rs, 4)] = FMA(T5V, T5Y, T5Z * T62);
Chris@82 2114 iio[WS(vs, 5) + WS(rs, 4)] = FNMS(T5Z, T5Y, T5V * T62);
Chris@82 2115 T63 = W[0];
Chris@82 2116 T65 = W[1];
Chris@82 2117 rio[WS(vs, 1) + WS(rs, 4)] = FMA(T63, T64, T65 * T66);
Chris@82 2118 iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T65, T64, T63 * T66);
Chris@82 2119 }
Chris@82 2120 }
Chris@82 2121 {
Chris@82 2122 E T7g, T7m, T7k, T7o, T7f, T7j;
Chris@82 2123 T7f = KP707106781 * (T6W + T6R);
Chris@82 2124 T7g = T7e - T7f;
Chris@82 2125 T7m = T7e + T7f;
Chris@82 2126 T7j = KP707106781 * (T75 + T76);
Chris@82 2127 T7k = T7i - T7j;
Chris@82 2128 T7o = T7i + T7j;
Chris@82 2129 {
Chris@82 2130 E T7d, T7h, T7l, T7n;
Chris@82 2131 T7d = W[8];
Chris@82 2132 T7h = W[9];
Chris@82 2133 rio[WS(vs, 5) + WS(rs, 5)] = FMA(T7d, T7g, T7h * T7k);
Chris@82 2134 iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T7h, T7g, T7d * T7k);
Chris@82 2135 T7l = W[0];
Chris@82 2136 T7n = W[1];
Chris@82 2137 rio[WS(vs, 1) + WS(rs, 5)] = FMA(T7l, T7m, T7n * T7o);
Chris@82 2138 iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T7n, T7m, T7l * T7o);
Chris@82 2139 }
Chris@82 2140 }
Chris@82 2141 {
Chris@82 2142 E T8g, T8s, T8q, T8u, T8f, T8p;
Chris@82 2143 T8f = KP707106781 * (T89 - T8e);
Chris@82 2144 T8g = T84 - T8f;
Chris@82 2145 T8s = T84 + T8f;
Chris@82 2146 T8p = KP707106781 * (T8n - T8o);
Chris@82 2147 T8q = T8m - T8p;
Chris@82 2148 T8u = T8m + T8p;
Chris@82 2149 {
Chris@82 2150 E T7Z, T8h, T8r, T8t;
Chris@82 2151 T7Z = W[12];
Chris@82 2152 T8h = W[13];
Chris@82 2153 iio[WS(vs, 7) + WS(rs, 6)] = FNMS(T8h, T8q, T7Z * T8g);
Chris@82 2154 rio[WS(vs, 7) + WS(rs, 6)] = FMA(T8h, T8g, T7Z * T8q);
Chris@82 2155 T8r = W[4];
Chris@82 2156 T8t = W[5];
Chris@82 2157 iio[WS(vs, 3) + WS(rs, 6)] = FNMS(T8t, T8u, T8r * T8s);
Chris@82 2158 rio[WS(vs, 3) + WS(rs, 6)] = FMA(T8t, T8s, T8r * T8u);
Chris@82 2159 }
Chris@82 2160 }
Chris@82 2161 {
Chris@82 2162 E T4G, T4M, T4K, T4O, T4F, T4J;
Chris@82 2163 T4F = KP707106781 * (T4m + T4h);
Chris@82 2164 T4G = T4E - T4F;
Chris@82 2165 T4M = T4E + T4F;
Chris@82 2166 T4J = KP707106781 * (T4v + T4w);
Chris@82 2167 T4K = T4I - T4J;
Chris@82 2168 T4O = T4I + T4J;
Chris@82 2169 {
Chris@82 2170 E T4D, T4H, T4L, T4N;
Chris@82 2171 T4D = W[8];
Chris@82 2172 T4H = W[9];
Chris@82 2173 rio[WS(vs, 5) + WS(rs, 3)] = FMA(T4D, T4G, T4H * T4K);
Chris@82 2174 iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T4H, T4G, T4D * T4K);
Chris@82 2175 T4L = W[0];
Chris@82 2176 T4N = W[1];
Chris@82 2177 rio[WS(vs, 1) + WS(rs, 3)] = FMA(T4L, T4M, T4N * T4O);
Chris@82 2178 iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T4N, T4M, T4L * T4O);
Chris@82 2179 }
Chris@82 2180 }
Chris@82 2181 {
Chris@82 2182 E TO, TU, TS, TW, TN, TR;
Chris@82 2183 TN = KP707106781 * (Tu + Tp);
Chris@82 2184 TO = TM - TN;
Chris@82 2185 TU = TM + TN;
Chris@82 2186 TR = KP707106781 * (TD + TE);
Chris@82 2187 TS = TQ - TR;
Chris@82 2188 TW = TQ + TR;
Chris@82 2189 {
Chris@82 2190 E TL, TP, TT, TV;
Chris@82 2191 TL = W[8];
Chris@82 2192 TP = W[9];
Chris@82 2193 rio[WS(vs, 5)] = FMA(TL, TO, TP * TS);
Chris@82 2194 iio[WS(vs, 5)] = FNMS(TP, TO, TL * TS);
Chris@82 2195 TT = W[0];
Chris@82 2196 TV = W[1];
Chris@82 2197 rio[WS(vs, 1)] = FMA(TT, TU, TV * TW);
Chris@82 2198 iio[WS(vs, 1)] = FNMS(TV, TU, TT * TW);
Chris@82 2199 }
Chris@82 2200 }
Chris@82 2201 {
Chris@82 2202 E T26, T2c, T2a, T2e, T25, T29;
Chris@82 2203 T25 = KP707106781 * (T1M + T1H);
Chris@82 2204 T26 = T24 - T25;
Chris@82 2205 T2c = T24 + T25;
Chris@82 2206 T29 = KP707106781 * (T1V + T1W);
Chris@82 2207 T2a = T28 - T29;
Chris@82 2208 T2e = T28 + T29;
Chris@82 2209 {
Chris@82 2210 E T23, T27, T2b, T2d;
Chris@82 2211 T23 = W[8];
Chris@82 2212 T27 = W[9];
Chris@82 2213 rio[WS(vs, 5) + WS(rs, 1)] = FMA(T23, T26, T27 * T2a);
Chris@82 2214 iio[WS(vs, 5) + WS(rs, 1)] = FNMS(T27, T26, T23 * T2a);
Chris@82 2215 T2b = W[0];
Chris@82 2216 T2d = W[1];
Chris@82 2217 rio[WS(vs, 1) + WS(rs, 1)] = FMA(T2b, T2c, T2d * T2e);
Chris@82 2218 iio[WS(vs, 1) + WS(rs, 1)] = FNMS(T2d, T2c, T2b * T2e);
Chris@82 2219 }
Chris@82 2220 }
Chris@82 2221 {
Chris@82 2222 E T9y, T9K, T9I, T9M, T9x, T9H;
Chris@82 2223 T9x = KP707106781 * (T9r - T9w);
Chris@82 2224 T9y = T9m - T9x;
Chris@82 2225 T9K = T9m + T9x;
Chris@82 2226 T9H = KP707106781 * (T9F - T9G);
Chris@82 2227 T9I = T9E - T9H;
Chris@82 2228 T9M = T9E + T9H;
Chris@82 2229 {
Chris@82 2230 E T9h, T9z, T9J, T9L;
Chris@82 2231 T9h = W[12];
Chris@82 2232 T9z = W[13];
Chris@82 2233 iio[WS(vs, 7) + WS(rs, 7)] = FNMS(T9z, T9I, T9h * T9y);
Chris@82 2234 rio[WS(vs, 7) + WS(rs, 7)] = FMA(T9z, T9y, T9h * T9I);
Chris@82 2235 T9J = W[4];
Chris@82 2236 T9L = W[5];
Chris@82 2237 iio[WS(vs, 3) + WS(rs, 7)] = FNMS(T9L, T9M, T9J * T9K);
Chris@82 2238 rio[WS(vs, 3) + WS(rs, 7)] = FMA(T9L, T9K, T9J * T9M);
Chris@82 2239 }
Chris@82 2240 }
Chris@82 2241 {
Chris@82 2242 E T6Y, T7a, T78, T7c, T6X, T77;
Chris@82 2243 T6X = KP707106781 * (T6R - T6W);
Chris@82 2244 T6Y = T6M - T6X;
Chris@82 2245 T7a = T6M + T6X;
Chris@82 2246 T77 = KP707106781 * (T75 - T76);
Chris@82 2247 T78 = T74 - T77;
Chris@82 2248 T7c = T74 + T77;
Chris@82 2249 {
Chris@82 2250 E T6H, T6Z, T79, T7b;
Chris@82 2251 T6H = W[12];
Chris@82 2252 T6Z = W[13];
Chris@82 2253 iio[WS(vs, 7) + WS(rs, 5)] = FNMS(T6Z, T78, T6H * T6Y);
Chris@82 2254 rio[WS(vs, 7) + WS(rs, 5)] = FMA(T6Z, T6Y, T6H * T78);
Chris@82 2255 T79 = W[4];
Chris@82 2256 T7b = W[5];
Chris@82 2257 iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T7b, T7c, T79 * T7a);
Chris@82 2258 rio[WS(vs, 3) + WS(rs, 5)] = FMA(T7b, T7a, T79 * T7c);
Chris@82 2259 }
Chris@82 2260 }
Chris@82 2261 {
Chris@82 2262 E T1O, T20, T1Y, T22, T1N, T1X;
Chris@82 2263 T1N = KP707106781 * (T1H - T1M);
Chris@82 2264 T1O = T1C - T1N;
Chris@82 2265 T20 = T1C + T1N;
Chris@82 2266 T1X = KP707106781 * (T1V - T1W);
Chris@82 2267 T1Y = T1U - T1X;
Chris@82 2268 T22 = T1U + T1X;
Chris@82 2269 {
Chris@82 2270 E T1x, T1P, T1Z, T21;
Chris@82 2271 T1x = W[12];
Chris@82 2272 T1P = W[13];
Chris@82 2273 iio[WS(vs, 7) + WS(rs, 1)] = FNMS(T1P, T1Y, T1x * T1O);
Chris@82 2274 rio[WS(vs, 7) + WS(rs, 1)] = FMA(T1P, T1O, T1x * T1Y);
Chris@82 2275 T1Z = W[4];
Chris@82 2276 T21 = W[5];
Chris@82 2277 iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T21, T22, T1Z * T20);
Chris@82 2278 rio[WS(vs, 3) + WS(rs, 1)] = FMA(T21, T20, T1Z * T22);
Chris@82 2279 }
Chris@82 2280 }
Chris@82 2281 {
Chris@82 2282 E T4o, T4A, T4y, T4C, T4n, T4x;
Chris@82 2283 T4n = KP707106781 * (T4h - T4m);
Chris@82 2284 T4o = T4c - T4n;
Chris@82 2285 T4A = T4c + T4n;
Chris@82 2286 T4x = KP707106781 * (T4v - T4w);
Chris@82 2287 T4y = T4u - T4x;
Chris@82 2288 T4C = T4u + T4x;
Chris@82 2289 {
Chris@82 2290 E T47, T4p, T4z, T4B;
Chris@82 2291 T47 = W[12];
Chris@82 2292 T4p = W[13];
Chris@82 2293 iio[WS(vs, 7) + WS(rs, 3)] = FNMS(T4p, T4y, T47 * T4o);
Chris@82 2294 rio[WS(vs, 7) + WS(rs, 3)] = FMA(T4p, T4o, T47 * T4y);
Chris@82 2295 T4z = W[4];
Chris@82 2296 T4B = W[5];
Chris@82 2297 iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T4B, T4C, T4z * T4A);
Chris@82 2298 rio[WS(vs, 3) + WS(rs, 3)] = FMA(T4B, T4A, T4z * T4C);
Chris@82 2299 }
Chris@82 2300 }
Chris@82 2301 {
Chris@82 2302 E T3o, T3u, T3s, T3w, T3n, T3r;
Chris@82 2303 T3n = KP707106781 * (T34 + T2Z);
Chris@82 2304 T3o = T3m - T3n;
Chris@82 2305 T3u = T3m + T3n;
Chris@82 2306 T3r = KP707106781 * (T3d + T3e);
Chris@82 2307 T3s = T3q - T3r;
Chris@82 2308 T3w = T3q + T3r;
Chris@82 2309 {
Chris@82 2310 E T3l, T3p, T3t, T3v;
Chris@82 2311 T3l = W[8];
Chris@82 2312 T3p = W[9];
Chris@82 2313 rio[WS(vs, 5) + WS(rs, 2)] = FMA(T3l, T3o, T3p * T3s);
Chris@82 2314 iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T3p, T3o, T3l * T3s);
Chris@82 2315 T3t = W[0];
Chris@82 2316 T3v = W[1];
Chris@82 2317 rio[WS(vs, 1) + WS(rs, 2)] = FMA(T3t, T3u, T3v * T3w);
Chris@82 2318 iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T3v, T3u, T3t * T3w);
Chris@82 2319 }
Chris@82 2320 }
Chris@82 2321 {
Chris@82 2322 E T8y, T8E, T8C, T8G, T8x, T8B;
Chris@82 2323 T8x = KP707106781 * (T8e + T89);
Chris@82 2324 T8y = T8w - T8x;
Chris@82 2325 T8E = T8w + T8x;
Chris@82 2326 T8B = KP707106781 * (T8n + T8o);
Chris@82 2327 T8C = T8A - T8B;
Chris@82 2328 T8G = T8A + T8B;
Chris@82 2329 {
Chris@82 2330 E T8v, T8z, T8D, T8F;
Chris@82 2331 T8v = W[8];
Chris@82 2332 T8z = W[9];
Chris@82 2333 rio[WS(vs, 5) + WS(rs, 6)] = FMA(T8v, T8y, T8z * T8C);
Chris@82 2334 iio[WS(vs, 5) + WS(rs, 6)] = FNMS(T8z, T8y, T8v * T8C);
Chris@82 2335 T8D = W[0];
Chris@82 2336 T8F = W[1];
Chris@82 2337 rio[WS(vs, 1) + WS(rs, 6)] = FMA(T8D, T8E, T8F * T8G);
Chris@82 2338 iio[WS(vs, 1) + WS(rs, 6)] = FNMS(T8F, T8E, T8D * T8G);
Chris@82 2339 }
Chris@82 2340 }
Chris@82 2341 }
Chris@82 2342 }
Chris@82 2343 }
Chris@82 2344
Chris@82 2345 static const tw_instr twinstr[] = {
Chris@82 2346 {TW_FULL, 0, 8},
Chris@82 2347 {TW_NEXT, 1, 0}
Chris@82 2348 };
Chris@82 2349
Chris@82 2350 static const ct_desc desc = { 8, "q1_8", twinstr, &GENUS, {416, 144, 112, 0}, 0, 0, 0 };
Chris@82 2351
Chris@82 2352 void X(codelet_q1_8) (planner *p) {
Chris@82 2353 X(kdft_difsq_register) (p, q1_8, &desc);
Chris@82 2354 }
Chris@82 2355 #endif