annotate src/fftw-3.3.5/dft/simd/common/n2sv_64.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 2cd0e3b3e1fd
children
rev   line source
Chris@42 1 /*
Chris@42 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@42 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@42 4 *
Chris@42 5 * This program is free software; you can redistribute it and/or modify
Chris@42 6 * it under the terms of the GNU General Public License as published by
Chris@42 7 * the Free Software Foundation; either version 2 of the License, or
Chris@42 8 * (at your option) any later version.
Chris@42 9 *
Chris@42 10 * This program is distributed in the hope that it will be useful,
Chris@42 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@42 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@42 13 * GNU General Public License for more details.
Chris@42 14 *
Chris@42 15 * You should have received a copy of the GNU General Public License
Chris@42 16 * along with this program; if not, write to the Free Software
Chris@42 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@42 18 *
Chris@42 19 */
Chris@42 20
Chris@42 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@42 22 /* Generated on Sat Jul 30 16:41:43 EDT 2016 */
Chris@42 23
Chris@42 24 #include "codelet-dft.h"
Chris@42 25
Chris@42 26 #ifdef HAVE_FMA
Chris@42 27
Chris@42 28 /* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2sv_64 -with-ostride 1 -include n2s.h -store-multiple 4 */
Chris@42 29
Chris@42 30 /*
Chris@42 31 * This function contains 912 FP additions, 392 FP multiplications,
Chris@42 32 * (or, 520 additions, 0 multiplications, 392 fused multiply/add),
Chris@42 33 * 310 stack variables, 15 constants, and 288 memory accesses
Chris@42 34 */
Chris@42 35 #include "n2s.h"
Chris@42 36
Chris@42 37 static void n2sv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 38 {
Chris@42 39 DVK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@42 40 DVK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@42 41 DVK(KP534511135, +0.534511135950791641089685961295362908582039528);
Chris@42 42 DVK(KP303346683, +0.303346683607342391675883946941299872384187453);
Chris@42 43 DVK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@42 44 DVK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@42 45 DVK(KP820678790, +0.820678790828660330972281985331011598767386482);
Chris@42 46 DVK(KP098491403, +0.098491403357164253077197521291327432293052451);
Chris@42 47 DVK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 48 DVK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 49 DVK(KP668178637, +0.668178637919298919997757686523080761552472251);
Chris@42 50 DVK(KP198912367, +0.198912367379658006911597622644676228597850501);
Chris@42 51 DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 52 DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 53 DVK(KP414213562, +0.414213562373095048801688724209698078569671875);
Chris@42 54 {
Chris@42 55 INT i;
Chris@42 56 for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) {
Chris@42 57 V TeJ, TeK, TeP, TeQ, TfH, TfI, TfJ, TfK, Tgj, Tgk, Tgv, Tgw, T9a, T99, T9e;
Chris@42 58 V T9b;
Chris@42 59 {
Chris@42 60 V T7B, T37, T5Z, T8F, TbB, TcB, Tf, Td9, T62, T7C, T2i, TdH, Tcb, Tah, T8G;
Chris@42 61 V T3e, Tak, TbC, T65, T3m, TdI, Tu, Tda, T2x, TbD, Tan, T8I, T7G, T8J, T7J;
Chris@42 62 V T64, T3t, Tas, Tce, TK, Tdd, Tav, Tcf, Tdc, T2N, T3G, T6G, T9k, T7O, T9l;
Chris@42 63 V T7R, T6H, T3N, T1L, TdA, Tdx, Teo, Tbs, Tct, T5Q, T6V, T8y, T9z, T5j, T6Y;
Chris@42 64 V Tbb, Tcw, T8n, T9C, Tch, Taz, Tdf, TZ, Tdg, T32, Tci, TaC, T6J, T3Z, T9n;
Chris@42 65 V T7V, T9o, T7Y, T6K, T46, Tdp, T1g, Tej, Tdm, Tcm, Tb1, Tcp, TaK, T6O, T4X;
Chris@42 66 V T9s, T8f, T6R, T4q, T9v, T84, Tdn, T1v, Tek, Tds, Tcn, TaV, Tcq, Tb4, T9t;
Chris@42 67 V T8b, T9w, T8i, T6S, T50, T6P, T4N, T5k, T1V, T1S, TdB, Tbi, T5s, Tbt, Tbg;
Chris@42 68 V T5F, T5R, T5p, T1Y, Tbj, T5n, T8z, T8q;
Chris@42 69 {
Chris@42 70 V Tba, T57, T8l, Tb7, T5M, T8w, T8m, T5P, T8x, T5i;
Chris@42 71 {
Chris@42 72 V T2p, T7F, T7E, Tal, T2w, Tam, T3s, T7H, T7I, T3p, T3d, T3a;
Chris@42 73 {
Chris@42 74 V T8, T35, T3, T5Y, T26, T5X, T6, T36, T29, T9, T2b, T2c, Tb, Tc, T2e;
Chris@42 75 V T2f;
Chris@42 76 {
Chris@42 77 V T1, T2, T24, T25, T4, T5, T27, T28;
Chris@42 78 T1 = LD(&(ri[0]), ivs, &(ri[0]));
Chris@42 79 T2 = LD(&(ri[WS(is, 32)]), ivs, &(ri[0]));
Chris@42 80 T24 = LD(&(ii[0]), ivs, &(ii[0]));
Chris@42 81 T25 = LD(&(ii[WS(is, 32)]), ivs, &(ii[0]));
Chris@42 82 T4 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0]));
Chris@42 83 T5 = LD(&(ri[WS(is, 48)]), ivs, &(ri[0]));
Chris@42 84 T27 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0]));
Chris@42 85 T28 = LD(&(ii[WS(is, 48)]), ivs, &(ii[0]));
Chris@42 86 T8 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0]));
Chris@42 87 T35 = VSUB(T1, T2);
Chris@42 88 T3 = VADD(T1, T2);
Chris@42 89 T5Y = VSUB(T24, T25);
Chris@42 90 T26 = VADD(T24, T25);
Chris@42 91 T5X = VSUB(T4, T5);
Chris@42 92 T6 = VADD(T4, T5);
Chris@42 93 T36 = VSUB(T27, T28);
Chris@42 94 T29 = VADD(T27, T28);
Chris@42 95 T9 = LD(&(ri[WS(is, 40)]), ivs, &(ri[0]));
Chris@42 96 T2b = LD(&(ii[WS(is, 8)]), ivs, &(ii[0]));
Chris@42 97 T2c = LD(&(ii[WS(is, 40)]), ivs, &(ii[0]));
Chris@42 98 Tb = LD(&(ri[WS(is, 56)]), ivs, &(ri[0]));
Chris@42 99 Tc = LD(&(ri[WS(is, 24)]), ivs, &(ri[0]));
Chris@42 100 T2e = LD(&(ii[WS(is, 56)]), ivs, &(ii[0]));
Chris@42 101 T2f = LD(&(ii[WS(is, 24)]), ivs, &(ii[0]));
Chris@42 102 }
Chris@42 103 {
Chris@42 104 V T39, Ta, T38, T2d, T3b, Td, T3c, T2g, Taf, T7;
Chris@42 105 T7B = VADD(T35, T36);
Chris@42 106 T37 = VSUB(T35, T36);
Chris@42 107 T39 = VSUB(T8, T9);
Chris@42 108 Ta = VADD(T8, T9);
Chris@42 109 T38 = VSUB(T2b, T2c);
Chris@42 110 T2d = VADD(T2b, T2c);
Chris@42 111 T3b = VSUB(Tb, Tc);
Chris@42 112 Td = VADD(Tb, Tc);
Chris@42 113 T3c = VSUB(T2e, T2f);
Chris@42 114 T2g = VADD(T2e, T2f);
Chris@42 115 T5Z = VADD(T5X, T5Y);
Chris@42 116 T8F = VSUB(T5Y, T5X);
Chris@42 117 Taf = VSUB(T3, T6);
Chris@42 118 T7 = VADD(T3, T6);
Chris@42 119 {
Chris@42 120 V TbA, T2a, Te, Tbz, T60, T61, T2h, Tag;
Chris@42 121 TbA = VSUB(T26, T29);
Chris@42 122 T2a = VADD(T26, T29);
Chris@42 123 Te = VADD(Ta, Td);
Chris@42 124 Tbz = VSUB(Td, Ta);
Chris@42 125 T3d = VADD(T3b, T3c);
Chris@42 126 T60 = VSUB(T3b, T3c);
Chris@42 127 T61 = VADD(T39, T38);
Chris@42 128 T3a = VSUB(T38, T39);
Chris@42 129 T2h = VADD(T2d, T2g);
Chris@42 130 Tag = VSUB(T2d, T2g);
Chris@42 131 TbB = VADD(Tbz, TbA);
Chris@42 132 TcB = VSUB(TbA, Tbz);
Chris@42 133 Tf = VADD(T7, Te);
Chris@42 134 Td9 = VSUB(T7, Te);
Chris@42 135 T62 = VSUB(T60, T61);
Chris@42 136 T7C = VADD(T61, T60);
Chris@42 137 T2i = VADD(T2a, T2h);
Chris@42 138 TdH = VSUB(T2a, T2h);
Chris@42 139 Tcb = VSUB(Taf, Tag);
Chris@42 140 Tah = VADD(Taf, Tag);
Chris@42 141 }
Chris@42 142 }
Chris@42 143 }
Chris@42 144 {
Chris@42 145 V T3j, Ti, T3h, T2l, T3g, Tl, T2t, T3k, T2o, T3q, Tp, T3o, T2s, T3n, Ts;
Chris@42 146 V T2u, T2m, T2n;
Chris@42 147 {
Chris@42 148 V Tg, Th, T2j, T2k, Tj, Tk;
Chris@42 149 Tg = LD(&(ri[WS(is, 4)]), ivs, &(ri[0]));
Chris@42 150 Th = LD(&(ri[WS(is, 36)]), ivs, &(ri[0]));
Chris@42 151 T2j = LD(&(ii[WS(is, 4)]), ivs, &(ii[0]));
Chris@42 152 T2k = LD(&(ii[WS(is, 36)]), ivs, &(ii[0]));
Chris@42 153 Tj = LD(&(ri[WS(is, 20)]), ivs, &(ri[0]));
Chris@42 154 Tk = LD(&(ri[WS(is, 52)]), ivs, &(ri[0]));
Chris@42 155 T2m = LD(&(ii[WS(is, 20)]), ivs, &(ii[0]));
Chris@42 156 T8G = VADD(T3a, T3d);
Chris@42 157 T3e = VSUB(T3a, T3d);
Chris@42 158 T3j = VSUB(Tg, Th);
Chris@42 159 Ti = VADD(Tg, Th);
Chris@42 160 T3h = VSUB(T2j, T2k);
Chris@42 161 T2l = VADD(T2j, T2k);
Chris@42 162 T3g = VSUB(Tj, Tk);
Chris@42 163 Tl = VADD(Tj, Tk);
Chris@42 164 T2n = LD(&(ii[WS(is, 52)]), ivs, &(ii[0]));
Chris@42 165 }
Chris@42 166 {
Chris@42 167 V Tn, To, T2q, T2r, Tq, Tr;
Chris@42 168 Tn = LD(&(ri[WS(is, 60)]), ivs, &(ri[0]));
Chris@42 169 To = LD(&(ri[WS(is, 28)]), ivs, &(ri[0]));
Chris@42 170 T2q = LD(&(ii[WS(is, 60)]), ivs, &(ii[0]));
Chris@42 171 T2r = LD(&(ii[WS(is, 28)]), ivs, &(ii[0]));
Chris@42 172 Tq = LD(&(ri[WS(is, 12)]), ivs, &(ri[0]));
Chris@42 173 Tr = LD(&(ri[WS(is, 44)]), ivs, &(ri[0]));
Chris@42 174 T2t = LD(&(ii[WS(is, 12)]), ivs, &(ii[0]));
Chris@42 175 T3k = VSUB(T2m, T2n);
Chris@42 176 T2o = VADD(T2m, T2n);
Chris@42 177 T3q = VSUB(Tn, To);
Chris@42 178 Tp = VADD(Tn, To);
Chris@42 179 T3o = VSUB(T2q, T2r);
Chris@42 180 T2s = VADD(T2q, T2r);
Chris@42 181 T3n = VSUB(Tq, Tr);
Chris@42 182 Ts = VADD(Tq, Tr);
Chris@42 183 T2u = LD(&(ii[WS(is, 44)]), ivs, &(ii[0]));
Chris@42 184 }
Chris@42 185 {
Chris@42 186 V Tai, Tm, Taj, T3r;
Chris@42 187 Tai = VSUB(Ti, Tl);
Chris@42 188 Tm = VADD(Ti, Tl);
Chris@42 189 T2p = VADD(T2l, T2o);
Chris@42 190 Taj = VSUB(T2l, T2o);
Chris@42 191 {
Chris@42 192 V T3i, T3l, Tt, T2v;
Chris@42 193 T7F = VSUB(T3h, T3g);
Chris@42 194 T3i = VADD(T3g, T3h);
Chris@42 195 T3l = VSUB(T3j, T3k);
Chris@42 196 T7E = VADD(T3j, T3k);
Chris@42 197 Tt = VADD(Tp, Ts);
Chris@42 198 Tal = VSUB(Tp, Ts);
Chris@42 199 T2v = VADD(T2t, T2u);
Chris@42 200 T3r = VSUB(T2t, T2u);
Chris@42 201 Tak = VADD(Tai, Taj);
Chris@42 202 TbC = VSUB(Taj, Tai);
Chris@42 203 T65 = VFNMS(LDK(KP414213562), T3i, T3l);
Chris@42 204 T3m = VFMA(LDK(KP414213562), T3l, T3i);
Chris@42 205 TdI = VSUB(Tt, Tm);
Chris@42 206 Tu = VADD(Tm, Tt);
Chris@42 207 T2w = VADD(T2s, T2v);
Chris@42 208 Tam = VSUB(T2s, T2v);
Chris@42 209 }
Chris@42 210 T3s = VSUB(T3q, T3r);
Chris@42 211 T7H = VADD(T3q, T3r);
Chris@42 212 T7I = VSUB(T3o, T3n);
Chris@42 213 T3p = VADD(T3n, T3o);
Chris@42 214 }
Chris@42 215 }
Chris@42 216 {
Chris@42 217 V T7M, T7Q, T7N, T3M, T3J, T7P;
Chris@42 218 {
Chris@42 219 V TG, T3H, Ty, T3x, T2B, T3w, TB, T3I, T2E, TH, T2J, T2K, TD, TE, T2G;
Chris@42 220 V T2H;
Chris@42 221 {
Chris@42 222 V Tw, Tx, T2z, T2A, Tz, TA, T2C, T2D;
Chris@42 223 Tw = LD(&(ri[WS(is, 2)]), ivs, &(ri[0]));
Chris@42 224 Tda = VSUB(T2p, T2w);
Chris@42 225 T2x = VADD(T2p, T2w);
Chris@42 226 TbD = VADD(Tal, Tam);
Chris@42 227 Tan = VSUB(Tal, Tam);
Chris@42 228 T8I = VFNMS(LDK(KP414213562), T7E, T7F);
Chris@42 229 T7G = VFMA(LDK(KP414213562), T7F, T7E);
Chris@42 230 T8J = VFMA(LDK(KP414213562), T7H, T7I);
Chris@42 231 T7J = VFNMS(LDK(KP414213562), T7I, T7H);
Chris@42 232 T64 = VFMA(LDK(KP414213562), T3p, T3s);
Chris@42 233 T3t = VFNMS(LDK(KP414213562), T3s, T3p);
Chris@42 234 Tx = LD(&(ri[WS(is, 34)]), ivs, &(ri[0]));
Chris@42 235 T2z = LD(&(ii[WS(is, 2)]), ivs, &(ii[0]));
Chris@42 236 T2A = LD(&(ii[WS(is, 34)]), ivs, &(ii[0]));
Chris@42 237 Tz = LD(&(ri[WS(is, 18)]), ivs, &(ri[0]));
Chris@42 238 TA = LD(&(ri[WS(is, 50)]), ivs, &(ri[0]));
Chris@42 239 T2C = LD(&(ii[WS(is, 18)]), ivs, &(ii[0]));
Chris@42 240 T2D = LD(&(ii[WS(is, 50)]), ivs, &(ii[0]));
Chris@42 241 TG = LD(&(ri[WS(is, 58)]), ivs, &(ri[0]));
Chris@42 242 T3H = VSUB(Tw, Tx);
Chris@42 243 Ty = VADD(Tw, Tx);
Chris@42 244 T3x = VSUB(T2z, T2A);
Chris@42 245 T2B = VADD(T2z, T2A);
Chris@42 246 T3w = VSUB(Tz, TA);
Chris@42 247 TB = VADD(Tz, TA);
Chris@42 248 T3I = VSUB(T2C, T2D);
Chris@42 249 T2E = VADD(T2C, T2D);
Chris@42 250 TH = LD(&(ri[WS(is, 26)]), ivs, &(ri[0]));
Chris@42 251 T2J = LD(&(ii[WS(is, 58)]), ivs, &(ii[0]));
Chris@42 252 T2K = LD(&(ii[WS(is, 26)]), ivs, &(ii[0]));
Chris@42 253 TD = LD(&(ri[WS(is, 10)]), ivs, &(ri[0]));
Chris@42 254 TE = LD(&(ri[WS(is, 42)]), ivs, &(ri[0]));
Chris@42 255 T2G = LD(&(ii[WS(is, 10)]), ivs, &(ii[0]));
Chris@42 256 T2H = LD(&(ii[WS(is, 42)]), ivs, &(ii[0]));
Chris@42 257 }
Chris@42 258 {
Chris@42 259 V Tat, TC, Tar, T2F, T3K, T3E, TJ, Taq, T2M, Tau, T3B, T3L, T3y, T3F;
Chris@42 260 {
Chris@42 261 V TI, T3C, T2L, T3D, TF, T3z, T2I, T3A;
Chris@42 262 Tat = VSUB(Ty, TB);
Chris@42 263 TC = VADD(Ty, TB);
Chris@42 264 TI = VADD(TG, TH);
Chris@42 265 T3C = VSUB(TG, TH);
Chris@42 266 T2L = VADD(T2J, T2K);
Chris@42 267 T3D = VSUB(T2J, T2K);
Chris@42 268 TF = VADD(TD, TE);
Chris@42 269 T3z = VSUB(TD, TE);
Chris@42 270 T2I = VADD(T2G, T2H);
Chris@42 271 T3A = VSUB(T2G, T2H);
Chris@42 272 Tar = VSUB(T2B, T2E);
Chris@42 273 T2F = VADD(T2B, T2E);
Chris@42 274 T3K = VADD(T3C, T3D);
Chris@42 275 T3E = VSUB(T3C, T3D);
Chris@42 276 TJ = VADD(TF, TI);
Chris@42 277 Taq = VSUB(TI, TF);
Chris@42 278 T2M = VADD(T2I, T2L);
Chris@42 279 Tau = VSUB(T2I, T2L);
Chris@42 280 T3B = VADD(T3z, T3A);
Chris@42 281 T3L = VSUB(T3A, T3z);
Chris@42 282 }
Chris@42 283 T7M = VSUB(T3x, T3w);
Chris@42 284 T3y = VADD(T3w, T3x);
Chris@42 285 Tas = VADD(Taq, Tar);
Chris@42 286 Tce = VSUB(Tar, Taq);
Chris@42 287 TK = VADD(TC, TJ);
Chris@42 288 Tdd = VSUB(TC, TJ);
Chris@42 289 Tav = VADD(Tat, Tau);
Chris@42 290 Tcf = VSUB(Tat, Tau);
Chris@42 291 T7Q = VADD(T3B, T3E);
Chris@42 292 T3F = VSUB(T3B, T3E);
Chris@42 293 Tdc = VSUB(T2F, T2M);
Chris@42 294 T2N = VADD(T2F, T2M);
Chris@42 295 T7N = VADD(T3L, T3K);
Chris@42 296 T3M = VSUB(T3K, T3L);
Chris@42 297 T3J = VSUB(T3H, T3I);
Chris@42 298 T7P = VADD(T3H, T3I);
Chris@42 299 T3G = VFNMS(LDK(KP707106781), T3F, T3y);
Chris@42 300 T6G = VFMA(LDK(KP707106781), T3F, T3y);
Chris@42 301 }
Chris@42 302 }
Chris@42 303 {
Chris@42 304 V T1H, T5I, T1z, Tb8, T56, T53, T1C, Tb9, T5L, T1I, T5e, T5f, T1E, T1F, T59;
Chris@42 305 V T5a;
Chris@42 306 {
Chris@42 307 V T1x, T1y, T54, T55, T1A, T1B, T5J, T5K;
Chris@42 308 T1x = LD(&(ri[WS(is, 63)]), ivs, &(ri[WS(is, 1)]));
Chris@42 309 T9k = VFNMS(LDK(KP707106781), T7N, T7M);
Chris@42 310 T7O = VFMA(LDK(KP707106781), T7N, T7M);
Chris@42 311 T9l = VFNMS(LDK(KP707106781), T7Q, T7P);
Chris@42 312 T7R = VFMA(LDK(KP707106781), T7Q, T7P);
Chris@42 313 T6H = VFMA(LDK(KP707106781), T3M, T3J);
Chris@42 314 T3N = VFNMS(LDK(KP707106781), T3M, T3J);
Chris@42 315 T1y = LD(&(ri[WS(is, 31)]), ivs, &(ri[WS(is, 1)]));
Chris@42 316 T54 = LD(&(ii[WS(is, 63)]), ivs, &(ii[WS(is, 1)]));
Chris@42 317 T55 = LD(&(ii[WS(is, 31)]), ivs, &(ii[WS(is, 1)]));
Chris@42 318 T1A = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)]));
Chris@42 319 T1B = LD(&(ri[WS(is, 47)]), ivs, &(ri[WS(is, 1)]));
Chris@42 320 T5J = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)]));
Chris@42 321 T5K = LD(&(ii[WS(is, 47)]), ivs, &(ii[WS(is, 1)]));
Chris@42 322 T1H = LD(&(ri[WS(is, 55)]), ivs, &(ri[WS(is, 1)]));
Chris@42 323 T5I = VSUB(T1x, T1y);
Chris@42 324 T1z = VADD(T1x, T1y);
Chris@42 325 Tb8 = VADD(T54, T55);
Chris@42 326 T56 = VSUB(T54, T55);
Chris@42 327 T53 = VSUB(T1A, T1B);
Chris@42 328 T1C = VADD(T1A, T1B);
Chris@42 329 Tb9 = VADD(T5J, T5K);
Chris@42 330 T5L = VSUB(T5J, T5K);
Chris@42 331 T1I = LD(&(ri[WS(is, 23)]), ivs, &(ri[WS(is, 1)]));
Chris@42 332 T5e = LD(&(ii[WS(is, 55)]), ivs, &(ii[WS(is, 1)]));
Chris@42 333 T5f = LD(&(ii[WS(is, 23)]), ivs, &(ii[WS(is, 1)]));
Chris@42 334 T1E = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)]));
Chris@42 335 T1F = LD(&(ri[WS(is, 39)]), ivs, &(ri[WS(is, 1)]));
Chris@42 336 T59 = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)]));
Chris@42 337 T5a = LD(&(ii[WS(is, 39)]), ivs, &(ii[WS(is, 1)]));
Chris@42 338 }
Chris@42 339 {
Chris@42 340 V Tbo, T1D, Tdv, T5h, T5N, T1K, Tdw, Tbr, T5O, T5c;
Chris@42 341 {
Chris@42 342 V T1J, T5d, Tbq, T5g, T1G, T58, Tbp, T5b;
Chris@42 343 Tbo = VSUB(T1z, T1C);
Chris@42 344 T1D = VADD(T1z, T1C);
Chris@42 345 T1J = VADD(T1H, T1I);
Chris@42 346 T5d = VSUB(T1H, T1I);
Chris@42 347 Tbq = VADD(T5e, T5f);
Chris@42 348 T5g = VSUB(T5e, T5f);
Chris@42 349 T1G = VADD(T1E, T1F);
Chris@42 350 T58 = VSUB(T1E, T1F);
Chris@42 351 Tbp = VADD(T59, T5a);
Chris@42 352 T5b = VSUB(T59, T5a);
Chris@42 353 Tba = VSUB(Tb8, Tb9);
Chris@42 354 Tdv = VADD(Tb8, Tb9);
Chris@42 355 T57 = VADD(T53, T56);
Chris@42 356 T8l = VSUB(T56, T53);
Chris@42 357 T5h = VSUB(T5d, T5g);
Chris@42 358 T5N = VADD(T5d, T5g);
Chris@42 359 Tb7 = VSUB(T1J, T1G);
Chris@42 360 T1K = VADD(T1G, T1J);
Chris@42 361 Tdw = VADD(Tbp, Tbq);
Chris@42 362 Tbr = VSUB(Tbp, Tbq);
Chris@42 363 T5O = VSUB(T5b, T58);
Chris@42 364 T5c = VADD(T58, T5b);
Chris@42 365 }
Chris@42 366 T5M = VSUB(T5I, T5L);
Chris@42 367 T8w = VADD(T5I, T5L);
Chris@42 368 T1L = VADD(T1D, T1K);
Chris@42 369 TdA = VSUB(T1D, T1K);
Chris@42 370 Tdx = VSUB(Tdv, Tdw);
Chris@42 371 Teo = VADD(Tdv, Tdw);
Chris@42 372 Tbs = VADD(Tbo, Tbr);
Chris@42 373 Tct = VSUB(Tbo, Tbr);
Chris@42 374 T8m = VADD(T5O, T5N);
Chris@42 375 T5P = VSUB(T5N, T5O);
Chris@42 376 T8x = VADD(T5c, T5h);
Chris@42 377 T5i = VSUB(T5c, T5h);
Chris@42 378 }
Chris@42 379 }
Chris@42 380 }
Chris@42 381 }
Chris@42 382 {
Chris@42 383 V T4e, T82, T8d, T4T, T4W, T83, T4p, T8e;
Chris@42 384 {
Chris@42 385 V T7T, T3R, T42, T7W, T3Y, T7X, T45, T7U;
Chris@42 386 {
Chris@42 387 V T40, TN, T2Y, T3Q, T2Q, T3P, TQ, T41, T2T, T3V, TX, T2Z, TS, TT, T2V;
Chris@42 388 V T2W;
Chris@42 389 {
Chris@42 390 V T2O, T2P, TO, TP, TL, TM;
Chris@42 391 TL = LD(&(ri[WS(is, 62)]), ivs, &(ri[0]));
Chris@42 392 TM = LD(&(ri[WS(is, 30)]), ivs, &(ri[0]));
Chris@42 393 T5Q = VFNMS(LDK(KP707106781), T5P, T5M);
Chris@42 394 T6V = VFMA(LDK(KP707106781), T5P, T5M);
Chris@42 395 T8y = VFMA(LDK(KP707106781), T8x, T8w);
Chris@42 396 T9z = VFNMS(LDK(KP707106781), T8x, T8w);
Chris@42 397 T5j = VFNMS(LDK(KP707106781), T5i, T57);
Chris@42 398 T6Y = VFMA(LDK(KP707106781), T5i, T57);
Chris@42 399 Tbb = VADD(Tb7, Tba);
Chris@42 400 Tcw = VSUB(Tba, Tb7);
Chris@42 401 T8n = VFMA(LDK(KP707106781), T8m, T8l);
Chris@42 402 T9C = VFNMS(LDK(KP707106781), T8m, T8l);
Chris@42 403 T40 = VSUB(TL, TM);
Chris@42 404 TN = VADD(TL, TM);
Chris@42 405 T2O = LD(&(ii[WS(is, 62)]), ivs, &(ii[0]));
Chris@42 406 T2P = LD(&(ii[WS(is, 30)]), ivs, &(ii[0]));
Chris@42 407 TO = LD(&(ri[WS(is, 14)]), ivs, &(ri[0]));
Chris@42 408 TP = LD(&(ri[WS(is, 46)]), ivs, &(ri[0]));
Chris@42 409 {
Chris@42 410 V T2R, T2S, TV, TW;
Chris@42 411 T2R = LD(&(ii[WS(is, 14)]), ivs, &(ii[0]));
Chris@42 412 T2S = LD(&(ii[WS(is, 46)]), ivs, &(ii[0]));
Chris@42 413 TV = LD(&(ri[WS(is, 54)]), ivs, &(ri[0]));
Chris@42 414 TW = LD(&(ri[WS(is, 22)]), ivs, &(ri[0]));
Chris@42 415 T2Y = LD(&(ii[WS(is, 54)]), ivs, &(ii[0]));
Chris@42 416 T3Q = VSUB(T2O, T2P);
Chris@42 417 T2Q = VADD(T2O, T2P);
Chris@42 418 T3P = VSUB(TO, TP);
Chris@42 419 TQ = VADD(TO, TP);
Chris@42 420 T41 = VSUB(T2R, T2S);
Chris@42 421 T2T = VADD(T2R, T2S);
Chris@42 422 T3V = VSUB(TV, TW);
Chris@42 423 TX = VADD(TV, TW);
Chris@42 424 T2Z = LD(&(ii[WS(is, 22)]), ivs, &(ii[0]));
Chris@42 425 TS = LD(&(ri[WS(is, 6)]), ivs, &(ri[0]));
Chris@42 426 TT = LD(&(ri[WS(is, 38)]), ivs, &(ri[0]));
Chris@42 427 T2V = LD(&(ii[WS(is, 6)]), ivs, &(ii[0]));
Chris@42 428 T2W = LD(&(ii[WS(is, 38)]), ivs, &(ii[0]));
Chris@42 429 }
Chris@42 430 }
Chris@42 431 {
Chris@42 432 V TaA, TR, Tay, T2U, T3W, T30, TU, T3S, T2X, T3T;
Chris@42 433 TaA = VSUB(TN, TQ);
Chris@42 434 TR = VADD(TN, TQ);
Chris@42 435 Tay = VSUB(T2Q, T2T);
Chris@42 436 T2U = VADD(T2Q, T2T);
Chris@42 437 T3W = VSUB(T2Y, T2Z);
Chris@42 438 T30 = VADD(T2Y, T2Z);
Chris@42 439 TU = VADD(TS, TT);
Chris@42 440 T3S = VSUB(TS, TT);
Chris@42 441 T2X = VADD(T2V, T2W);
Chris@42 442 T3T = VSUB(T2V, T2W);
Chris@42 443 {
Chris@42 444 V T3X, T43, Tax, TY, T31, TaB, T3U, T44;
Chris@42 445 T7T = VSUB(T3Q, T3P);
Chris@42 446 T3R = VADD(T3P, T3Q);
Chris@42 447 T3X = VSUB(T3V, T3W);
Chris@42 448 T43 = VADD(T3V, T3W);
Chris@42 449 Tax = VSUB(TX, TU);
Chris@42 450 TY = VADD(TU, TX);
Chris@42 451 T31 = VADD(T2X, T30);
Chris@42 452 TaB = VSUB(T2X, T30);
Chris@42 453 T3U = VADD(T3S, T3T);
Chris@42 454 T44 = VSUB(T3T, T3S);
Chris@42 455 T42 = VSUB(T40, T41);
Chris@42 456 T7W = VADD(T40, T41);
Chris@42 457 Tch = VSUB(Tay, Tax);
Chris@42 458 Taz = VADD(Tax, Tay);
Chris@42 459 Tdf = VSUB(TR, TY);
Chris@42 460 TZ = VADD(TR, TY);
Chris@42 461 Tdg = VSUB(T2U, T31);
Chris@42 462 T32 = VADD(T2U, T31);
Chris@42 463 Tci = VSUB(TaA, TaB);
Chris@42 464 TaC = VADD(TaA, TaB);
Chris@42 465 T3Y = VSUB(T3U, T3X);
Chris@42 466 T7X = VADD(T3U, T3X);
Chris@42 467 T45 = VSUB(T43, T44);
Chris@42 468 T7U = VADD(T44, T43);
Chris@42 469 }
Chris@42 470 }
Chris@42 471 }
Chris@42 472 {
Chris@42 473 V T4P, T14, T4l, TaH, T4d, T4a, T17, TaI, T4S, T4k, T1e, T4m, T19, T1a, T4g;
Chris@42 474 V T4h;
Chris@42 475 {
Chris@42 476 V T4b, T4c, T15, T16, T12, T13;
Chris@42 477 T12 = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)]));
Chris@42 478 T13 = LD(&(ri[WS(is, 33)]), ivs, &(ri[WS(is, 1)]));
Chris@42 479 T4b = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)]));
Chris@42 480 T6J = VFMA(LDK(KP707106781), T3Y, T3R);
Chris@42 481 T3Z = VFNMS(LDK(KP707106781), T3Y, T3R);
Chris@42 482 T9n = VFNMS(LDK(KP707106781), T7U, T7T);
Chris@42 483 T7V = VFMA(LDK(KP707106781), T7U, T7T);
Chris@42 484 T9o = VFNMS(LDK(KP707106781), T7X, T7W);
Chris@42 485 T7Y = VFMA(LDK(KP707106781), T7X, T7W);
Chris@42 486 T6K = VFMA(LDK(KP707106781), T45, T42);
Chris@42 487 T46 = VFNMS(LDK(KP707106781), T45, T42);
Chris@42 488 T4P = VSUB(T12, T13);
Chris@42 489 T14 = VADD(T12, T13);
Chris@42 490 T4c = LD(&(ii[WS(is, 33)]), ivs, &(ii[WS(is, 1)]));
Chris@42 491 T15 = LD(&(ri[WS(is, 17)]), ivs, &(ri[WS(is, 1)]));
Chris@42 492 T16 = LD(&(ri[WS(is, 49)]), ivs, &(ri[WS(is, 1)]));
Chris@42 493 {
Chris@42 494 V T4Q, T4R, T1c, T1d;
Chris@42 495 T4Q = LD(&(ii[WS(is, 17)]), ivs, &(ii[WS(is, 1)]));
Chris@42 496 T4R = LD(&(ii[WS(is, 49)]), ivs, &(ii[WS(is, 1)]));
Chris@42 497 T1c = LD(&(ri[WS(is, 57)]), ivs, &(ri[WS(is, 1)]));
Chris@42 498 T1d = LD(&(ri[WS(is, 25)]), ivs, &(ri[WS(is, 1)]));
Chris@42 499 T4l = LD(&(ii[WS(is, 57)]), ivs, &(ii[WS(is, 1)]));
Chris@42 500 TaH = VADD(T4b, T4c);
Chris@42 501 T4d = VSUB(T4b, T4c);
Chris@42 502 T4a = VSUB(T15, T16);
Chris@42 503 T17 = VADD(T15, T16);
Chris@42 504 TaI = VADD(T4Q, T4R);
Chris@42 505 T4S = VSUB(T4Q, T4R);
Chris@42 506 T4k = VSUB(T1c, T1d);
Chris@42 507 T1e = VADD(T1c, T1d);
Chris@42 508 T4m = LD(&(ii[WS(is, 25)]), ivs, &(ii[WS(is, 1)]));
Chris@42 509 T19 = LD(&(ri[WS(is, 9)]), ivs, &(ri[WS(is, 1)]));
Chris@42 510 T1a = LD(&(ri[WS(is, 41)]), ivs, &(ri[WS(is, 1)]));
Chris@42 511 T4g = LD(&(ii[WS(is, 9)]), ivs, &(ii[WS(is, 1)]));
Chris@42 512 T4h = LD(&(ii[WS(is, 41)]), ivs, &(ii[WS(is, 1)]));
Chris@42 513 }
Chris@42 514 }
Chris@42 515 {
Chris@42 516 V TaX, T18, T4n, TaZ, TaJ, Tdk, T1b, T4f, TaY, T4i;
Chris@42 517 TaX = VSUB(T14, T17);
Chris@42 518 T18 = VADD(T14, T17);
Chris@42 519 T4n = VSUB(T4l, T4m);
Chris@42 520 TaZ = VADD(T4l, T4m);
Chris@42 521 TaJ = VSUB(TaH, TaI);
Chris@42 522 Tdk = VADD(TaH, TaI);
Chris@42 523 T1b = VADD(T19, T1a);
Chris@42 524 T4f = VSUB(T19, T1a);
Chris@42 525 TaY = VADD(T4g, T4h);
Chris@42 526 T4i = VSUB(T4g, T4h);
Chris@42 527 T4e = VADD(T4a, T4d);
Chris@42 528 T82 = VSUB(T4d, T4a);
Chris@42 529 {
Chris@42 530 V T4U, T4o, T1f, TaG, Tdl, Tb0, T4V, T4j;
Chris@42 531 T8d = VADD(T4P, T4S);
Chris@42 532 T4T = VSUB(T4P, T4S);
Chris@42 533 T4U = VADD(T4k, T4n);
Chris@42 534 T4o = VSUB(T4k, T4n);
Chris@42 535 T1f = VADD(T1b, T1e);
Chris@42 536 TaG = VSUB(T1e, T1b);
Chris@42 537 Tdl = VADD(TaY, TaZ);
Chris@42 538 Tb0 = VSUB(TaY, TaZ);
Chris@42 539 T4V = VSUB(T4i, T4f);
Chris@42 540 T4j = VADD(T4f, T4i);
Chris@42 541 Tdp = VSUB(T18, T1f);
Chris@42 542 T1g = VADD(T18, T1f);
Chris@42 543 Tej = VADD(Tdk, Tdl);
Chris@42 544 Tdm = VSUB(Tdk, Tdl);
Chris@42 545 Tcm = VSUB(TaX, Tb0);
Chris@42 546 Tb1 = VADD(TaX, Tb0);
Chris@42 547 T4W = VSUB(T4U, T4V);
Chris@42 548 T83 = VADD(T4V, T4U);
Chris@42 549 T4p = VSUB(T4j, T4o);
Chris@42 550 T8e = VADD(T4j, T4o);
Chris@42 551 Tcp = VSUB(TaJ, TaG);
Chris@42 552 TaK = VADD(TaG, TaJ);
Chris@42 553 }
Chris@42 554 }
Chris@42 555 }
Chris@42 556 }
Chris@42 557 {
Chris@42 558 V T1n, Tdq, T4r, T1q, TaR, T4z, Tb2, TaP, T4M, T4Y, T4w, T1t, TaS, T4u, T8g;
Chris@42 559 V T87;
Chris@42 560 {
Chris@42 561 V T1r, T85, T4L, TaO, TaN, T86, T4G, T1s, T4s, T4t;
Chris@42 562 {
Chris@42 563 V T1h, T1i, T4I, T4J, T1k, T1l, T4D, T4E;
Chris@42 564 T1h = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)]));
Chris@42 565 T6O = VFMA(LDK(KP707106781), T4W, T4T);
Chris@42 566 T4X = VFNMS(LDK(KP707106781), T4W, T4T);
Chris@42 567 T9s = VFNMS(LDK(KP707106781), T8e, T8d);
Chris@42 568 T8f = VFMA(LDK(KP707106781), T8e, T8d);
Chris@42 569 T6R = VFMA(LDK(KP707106781), T4p, T4e);
Chris@42 570 T4q = VFNMS(LDK(KP707106781), T4p, T4e);
Chris@42 571 T9v = VFNMS(LDK(KP707106781), T83, T82);
Chris@42 572 T84 = VFMA(LDK(KP707106781), T83, T82);
Chris@42 573 T1i = LD(&(ri[WS(is, 37)]), ivs, &(ri[WS(is, 1)]));
Chris@42 574 T4I = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)]));
Chris@42 575 T4J = LD(&(ii[WS(is, 37)]), ivs, &(ii[WS(is, 1)]));
Chris@42 576 T1k = LD(&(ri[WS(is, 21)]), ivs, &(ri[WS(is, 1)]));
Chris@42 577 T1l = LD(&(ri[WS(is, 53)]), ivs, &(ri[WS(is, 1)]));
Chris@42 578 T4D = LD(&(ii[WS(is, 21)]), ivs, &(ii[WS(is, 1)]));
Chris@42 579 T4E = LD(&(ii[WS(is, 53)]), ivs, &(ii[WS(is, 1)]));
Chris@42 580 {
Chris@42 581 V T1o, T4C, T1j, TaL, T4K, T4H, T1m, TaM, T4F, T1p, T4x, T4y;
Chris@42 582 T1o = LD(&(ri[WS(is, 61)]), ivs, &(ri[WS(is, 1)]));
Chris@42 583 T4C = VSUB(T1h, T1i);
Chris@42 584 T1j = VADD(T1h, T1i);
Chris@42 585 TaL = VADD(T4I, T4J);
Chris@42 586 T4K = VSUB(T4I, T4J);
Chris@42 587 T4H = VSUB(T1k, T1l);
Chris@42 588 T1m = VADD(T1k, T1l);
Chris@42 589 TaM = VADD(T4D, T4E);
Chris@42 590 T4F = VSUB(T4D, T4E);
Chris@42 591 T1p = LD(&(ri[WS(is, 29)]), ivs, &(ri[WS(is, 1)]));
Chris@42 592 T4x = LD(&(ii[WS(is, 61)]), ivs, &(ii[WS(is, 1)]));
Chris@42 593 T4y = LD(&(ii[WS(is, 29)]), ivs, &(ii[WS(is, 1)]));
Chris@42 594 T1r = LD(&(ri[WS(is, 13)]), ivs, &(ri[WS(is, 1)]));
Chris@42 595 T85 = VSUB(T4K, T4H);
Chris@42 596 T4L = VADD(T4H, T4K);
Chris@42 597 TaO = VSUB(T1j, T1m);
Chris@42 598 T1n = VADD(T1j, T1m);
Chris@42 599 Tdq = VADD(TaL, TaM);
Chris@42 600 TaN = VSUB(TaL, TaM);
Chris@42 601 T86 = VADD(T4C, T4F);
Chris@42 602 T4G = VSUB(T4C, T4F);
Chris@42 603 T4r = VSUB(T1o, T1p);
Chris@42 604 T1q = VADD(T1o, T1p);
Chris@42 605 TaR = VADD(T4x, T4y);
Chris@42 606 T4z = VSUB(T4x, T4y);
Chris@42 607 T1s = LD(&(ri[WS(is, 45)]), ivs, &(ri[WS(is, 1)]));
Chris@42 608 T4s = LD(&(ii[WS(is, 13)]), ivs, &(ii[WS(is, 1)]));
Chris@42 609 T4t = LD(&(ii[WS(is, 45)]), ivs, &(ii[WS(is, 1)]));
Chris@42 610 }
Chris@42 611 }
Chris@42 612 Tb2 = VADD(TaO, TaN);
Chris@42 613 TaP = VSUB(TaN, TaO);
Chris@42 614 T4M = VFNMS(LDK(KP414213562), T4L, T4G);
Chris@42 615 T4Y = VFMA(LDK(KP414213562), T4G, T4L);
Chris@42 616 T4w = VSUB(T1r, T1s);
Chris@42 617 T1t = VADD(T1r, T1s);
Chris@42 618 TaS = VADD(T4s, T4t);
Chris@42 619 T4u = VSUB(T4s, T4t);
Chris@42 620 T8g = VFMA(LDK(KP414213562), T85, T86);
Chris@42 621 T87 = VFNMS(LDK(KP414213562), T86, T85);
Chris@42 622 }
Chris@42 623 {
Chris@42 624 V T1W, T8o, T5E, Tbf, Tbe, T8p, T5z, T1X, T5l, T5m;
Chris@42 625 {
Chris@42 626 V T5B, T5v, T1O, T5C, T1P, T1Q, T5w, T5x;
Chris@42 627 {
Chris@42 628 V T1M, T88, T4A, T1u, TaQ, Tdr, TaT, T89, T4v, T1N, TaU, Tb3;
Chris@42 629 T1M = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)]));
Chris@42 630 T88 = VSUB(T4z, T4w);
Chris@42 631 T4A = VADD(T4w, T4z);
Chris@42 632 T1u = VADD(T1q, T1t);
Chris@42 633 TaQ = VSUB(T1q, T1t);
Chris@42 634 Tdr = VADD(TaR, TaS);
Chris@42 635 TaT = VSUB(TaR, TaS);
Chris@42 636 T89 = VADD(T4r, T4u);
Chris@42 637 T4v = VSUB(T4r, T4u);
Chris@42 638 T1N = LD(&(ri[WS(is, 35)]), ivs, &(ri[WS(is, 1)]));
Chris@42 639 T5B = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)]));
Chris@42 640 Tdn = VSUB(T1u, T1n);
Chris@42 641 T1v = VADD(T1n, T1u);
Chris@42 642 Tek = VADD(Tdq, Tdr);
Chris@42 643 Tds = VSUB(Tdq, Tdr);
Chris@42 644 TaU = VADD(TaQ, TaT);
Chris@42 645 Tb3 = VSUB(TaQ, TaT);
Chris@42 646 {
Chris@42 647 V T8a, T8h, T4Z, T4B;
Chris@42 648 T8a = VFMA(LDK(KP414213562), T89, T88);
Chris@42 649 T8h = VFNMS(LDK(KP414213562), T88, T89);
Chris@42 650 T4Z = VFNMS(LDK(KP414213562), T4v, T4A);
Chris@42 651 T4B = VFMA(LDK(KP414213562), T4A, T4v);
Chris@42 652 T5v = VSUB(T1M, T1N);
Chris@42 653 T1O = VADD(T1M, T1N);
Chris@42 654 Tcn = VSUB(TaU, TaP);
Chris@42 655 TaV = VADD(TaP, TaU);
Chris@42 656 Tcq = VSUB(Tb2, Tb3);
Chris@42 657 Tb4 = VADD(Tb2, Tb3);
Chris@42 658 T9t = VSUB(T8a, T87);
Chris@42 659 T8b = VADD(T87, T8a);
Chris@42 660 T9w = VSUB(T8g, T8h);
Chris@42 661 T8i = VADD(T8g, T8h);
Chris@42 662 T6S = VADD(T4Y, T4Z);
Chris@42 663 T50 = VSUB(T4Y, T4Z);
Chris@42 664 T6P = VADD(T4M, T4B);
Chris@42 665 T4N = VSUB(T4B, T4M);
Chris@42 666 T5C = LD(&(ii[WS(is, 35)]), ivs, &(ii[WS(is, 1)]));
Chris@42 667 }
Chris@42 668 }
Chris@42 669 T1P = LD(&(ri[WS(is, 19)]), ivs, &(ri[WS(is, 1)]));
Chris@42 670 T1Q = LD(&(ri[WS(is, 51)]), ivs, &(ri[WS(is, 1)]));
Chris@42 671 T5w = LD(&(ii[WS(is, 19)]), ivs, &(ii[WS(is, 1)]));
Chris@42 672 T5x = LD(&(ii[WS(is, 51)]), ivs, &(ii[WS(is, 1)]));
Chris@42 673 {
Chris@42 674 V T5q, Tbc, T5D, T5A, T1R, Tbd, T5y, T5r, T1T, T1U;
Chris@42 675 T1T = LD(&(ri[WS(is, 59)]), ivs, &(ri[WS(is, 1)]));
Chris@42 676 T1U = LD(&(ri[WS(is, 27)]), ivs, &(ri[WS(is, 1)]));
Chris@42 677 T5q = LD(&(ii[WS(is, 59)]), ivs, &(ii[WS(is, 1)]));
Chris@42 678 Tbc = VADD(T5B, T5C);
Chris@42 679 T5D = VSUB(T5B, T5C);
Chris@42 680 T5A = VSUB(T1P, T1Q);
Chris@42 681 T1R = VADD(T1P, T1Q);
Chris@42 682 Tbd = VADD(T5w, T5x);
Chris@42 683 T5y = VSUB(T5w, T5x);
Chris@42 684 T5k = VSUB(T1T, T1U);
Chris@42 685 T1V = VADD(T1T, T1U);
Chris@42 686 T5r = LD(&(ii[WS(is, 27)]), ivs, &(ii[WS(is, 1)]));
Chris@42 687 T1W = LD(&(ri[WS(is, 11)]), ivs, &(ri[WS(is, 1)]));
Chris@42 688 T8o = VSUB(T5D, T5A);
Chris@42 689 T5E = VADD(T5A, T5D);
Chris@42 690 Tbf = VSUB(T1O, T1R);
Chris@42 691 T1S = VADD(T1O, T1R);
Chris@42 692 TdB = VADD(Tbc, Tbd);
Chris@42 693 Tbe = VSUB(Tbc, Tbd);
Chris@42 694 T8p = VADD(T5v, T5y);
Chris@42 695 T5z = VSUB(T5v, T5y);
Chris@42 696 Tbi = VADD(T5q, T5r);
Chris@42 697 T5s = VSUB(T5q, T5r);
Chris@42 698 T1X = LD(&(ri[WS(is, 43)]), ivs, &(ri[WS(is, 1)]));
Chris@42 699 T5l = LD(&(ii[WS(is, 11)]), ivs, &(ii[WS(is, 1)]));
Chris@42 700 T5m = LD(&(ii[WS(is, 43)]), ivs, &(ii[WS(is, 1)]));
Chris@42 701 }
Chris@42 702 }
Chris@42 703 Tbt = VADD(Tbf, Tbe);
Chris@42 704 Tbg = VSUB(Tbe, Tbf);
Chris@42 705 T5F = VFNMS(LDK(KP414213562), T5E, T5z);
Chris@42 706 T5R = VFMA(LDK(KP414213562), T5z, T5E);
Chris@42 707 T5p = VSUB(T1W, T1X);
Chris@42 708 T1Y = VADD(T1W, T1X);
Chris@42 709 Tbj = VADD(T5l, T5m);
Chris@42 710 T5n = VSUB(T5l, T5m);
Chris@42 711 T8z = VFMA(LDK(KP414213562), T8o, T8p);
Chris@42 712 T8q = VFNMS(LDK(KP414213562), T8p, T8o);
Chris@42 713 }
Chris@42 714 }
Chris@42 715 }
Chris@42 716 }
Chris@42 717 {
Chris@42 718 V Tbm, Tbv, T9A, T8u, T9D, T8B, T6Z, T5T, T6W, T5G, TeL, TeM, TeN, TeO, TeR;
Chris@42 719 V TeS, TeT, TeU, TeV, TeW, TeX, TeY, TeZ, Tf0, Tf1, Tf2, Tf3, Tf4, Tf5, Tf6;
Chris@42 720 V Tf7, Tf8, Tf9, Tfa, Tfb, Tfc, TbE, Tao, Tfd, Tfe, Td7, Td8, Tff, Tfg, Tfh;
Chris@42 721 V Tfi, Tfj, Tfk, Tfl, Tfm, Tfn, Tfo, Tfp, Tfq, Tfr, Tfs;
Chris@42 722 {
Chris@42 723 V Tel, Tdy, TdD, Tcu, Tcx, Teq, Tei, Ten, Tex, Teh, TeB, Tev, Te9, Tec;
Chris@42 724 {
Chris@42 725 V Tef, Teu, TeE, TeD, T11, TeF, T1w, T21, Tet, T2y, T33, Teg, T20;
Chris@42 726 {
Chris@42 727 V Tv, T8r, T5t, T1Z, Tbh, TdC, Tbk, T8s, T5o, T10, Tep, Tbl, Tbu;
Chris@42 728 Tef = VSUB(Tf, Tu);
Chris@42 729 Tv = VADD(Tf, Tu);
Chris@42 730 T8r = VSUB(T5s, T5p);
Chris@42 731 T5t = VADD(T5p, T5s);
Chris@42 732 T1Z = VADD(T1V, T1Y);
Chris@42 733 Tbh = VSUB(T1V, T1Y);
Chris@42 734 TdC = VADD(Tbi, Tbj);
Chris@42 735 Tbk = VSUB(Tbi, Tbj);
Chris@42 736 T8s = VADD(T5k, T5n);
Chris@42 737 T5o = VSUB(T5k, T5n);
Chris@42 738 T10 = VADD(TK, TZ);
Chris@42 739 Teu = VSUB(TZ, TK);
Chris@42 740 Tel = VSUB(Tej, Tek);
Chris@42 741 TeE = VADD(Tej, Tek);
Chris@42 742 Tdy = VSUB(T1Z, T1S);
Chris@42 743 T20 = VADD(T1S, T1Z);
Chris@42 744 Tep = VADD(TdB, TdC);
Chris@42 745 TdD = VSUB(TdB, TdC);
Chris@42 746 Tbl = VADD(Tbh, Tbk);
Chris@42 747 Tbu = VSUB(Tbh, Tbk);
Chris@42 748 {
Chris@42 749 V T8t, T8A, T5S, T5u;
Chris@42 750 T8t = VFMA(LDK(KP414213562), T8s, T8r);
Chris@42 751 T8A = VFNMS(LDK(KP414213562), T8r, T8s);
Chris@42 752 T5S = VFNMS(LDK(KP414213562), T5o, T5t);
Chris@42 753 T5u = VFMA(LDK(KP414213562), T5t, T5o);
Chris@42 754 TeD = VSUB(Tv, T10);
Chris@42 755 T11 = VADD(Tv, T10);
Chris@42 756 Tcu = VSUB(Tbl, Tbg);
Chris@42 757 Tbm = VADD(Tbg, Tbl);
Chris@42 758 Tcx = VSUB(Tbt, Tbu);
Chris@42 759 Tbv = VADD(Tbt, Tbu);
Chris@42 760 T9A = VSUB(T8t, T8q);
Chris@42 761 T8u = VADD(T8q, T8t);
Chris@42 762 T9D = VSUB(T8z, T8A);
Chris@42 763 T8B = VADD(T8z, T8A);
Chris@42 764 T6Z = VADD(T5R, T5S);
Chris@42 765 T5T = VSUB(T5R, T5S);
Chris@42 766 T6W = VADD(T5F, T5u);
Chris@42 767 T5G = VSUB(T5u, T5F);
Chris@42 768 TeF = VADD(Teo, Tep);
Chris@42 769 Teq = VSUB(Teo, Tep);
Chris@42 770 }
Chris@42 771 }
Chris@42 772 Tei = VSUB(T1g, T1v);
Chris@42 773 T1w = VADD(T1g, T1v);
Chris@42 774 T21 = VADD(T1L, T20);
Chris@42 775 Ten = VSUB(T1L, T20);
Chris@42 776 Tet = VSUB(T2i, T2x);
Chris@42 777 T2y = VADD(T2i, T2x);
Chris@42 778 T33 = VADD(T2N, T32);
Chris@42 779 Teg = VSUB(T2N, T32);
Chris@42 780 {
Chris@42 781 V TeI, TeG, T23, T22, TeH, T34;
Chris@42 782 TeI = VADD(TeE, TeF);
Chris@42 783 TeG = VSUB(TeE, TeF);
Chris@42 784 T23 = VSUB(T21, T1w);
Chris@42 785 T22 = VADD(T1w, T21);
Chris@42 786 TeH = VADD(T2y, T33);
Chris@42 787 T34 = VSUB(T2y, T33);
Chris@42 788 Tex = VSUB(Tef, Teg);
Chris@42 789 Teh = VADD(Tef, Teg);
Chris@42 790 TeJ = VSUB(TeD, TeG);
Chris@42 791 STM4(&(ro[48]), TeJ, ovs, &(ro[0]));
Chris@42 792 TeK = VADD(TeD, TeG);
Chris@42 793 STM4(&(ro[16]), TeK, ovs, &(ro[0]));
Chris@42 794 TeL = VADD(T11, T22);
Chris@42 795 STM4(&(ro[0]), TeL, ovs, &(ro[0]));
Chris@42 796 TeM = VSUB(T11, T22);
Chris@42 797 STM4(&(ro[32]), TeM, ovs, &(ro[0]));
Chris@42 798 TeN = VADD(TeH, TeI);
Chris@42 799 STM4(&(io[0]), TeN, ovs, &(io[0]));
Chris@42 800 TeO = VSUB(TeH, TeI);
Chris@42 801 STM4(&(io[32]), TeO, ovs, &(io[0]));
Chris@42 802 TeP = VSUB(T34, T23);
Chris@42 803 STM4(&(io[48]), TeP, ovs, &(io[0]));
Chris@42 804 TeQ = VADD(T23, T34);
Chris@42 805 STM4(&(io[16]), TeQ, ovs, &(io[0]));
Chris@42 806 TeB = VADD(Teu, Tet);
Chris@42 807 Tev = VSUB(Tet, Teu);
Chris@42 808 }
Chris@42 809 }
Chris@42 810 {
Chris@42 811 V TdV, Tdb, TdJ, Te5, TdE, Tdz, TdZ, Tdo, Te6, Tdi, Teb, Te3, TdW, TdM, Tdt;
Chris@42 812 V TdY;
Chris@42 813 {
Chris@42 814 V TdL, Tde, Tey, Tem, Tez, Ter, Tdh, TdK, Te1, Te2;
Chris@42 815 TdV = VADD(Td9, Tda);
Chris@42 816 Tdb = VSUB(Td9, Tda);
Chris@42 817 TdJ = VSUB(TdH, TdI);
Chris@42 818 Te5 = VADD(TdI, TdH);
Chris@42 819 TdL = VADD(Tdd, Tdc);
Chris@42 820 Tde = VSUB(Tdc, Tdd);
Chris@42 821 Tey = VSUB(Tel, Tei);
Chris@42 822 Tem = VADD(Tei, Tel);
Chris@42 823 Tez = VADD(Ten, Teq);
Chris@42 824 Ter = VSUB(Ten, Teq);
Chris@42 825 Tdh = VADD(Tdf, Tdg);
Chris@42 826 TdK = VSUB(Tdf, Tdg);
Chris@42 827 TdE = VSUB(TdA, TdD);
Chris@42 828 Te1 = VADD(TdA, TdD);
Chris@42 829 Te2 = VADD(Tdy, Tdx);
Chris@42 830 Tdz = VSUB(Tdx, Tdy);
Chris@42 831 TdZ = VADD(Tdn, Tdm);
Chris@42 832 Tdo = VSUB(Tdm, Tdn);
Chris@42 833 {
Chris@42 834 V TeA, TeC, Tew, Tes;
Chris@42 835 TeA = VSUB(Tey, Tez);
Chris@42 836 TeC = VADD(Tey, Tez);
Chris@42 837 Tew = VSUB(Ter, Tem);
Chris@42 838 Tes = VADD(Tem, Ter);
Chris@42 839 Te6 = VADD(Tde, Tdh);
Chris@42 840 Tdi = VSUB(Tde, Tdh);
Chris@42 841 Teb = VFMA(LDK(KP414213562), Te1, Te2);
Chris@42 842 Te3 = VFNMS(LDK(KP414213562), Te2, Te1);
Chris@42 843 TdW = VADD(TdL, TdK);
Chris@42 844 TdM = VSUB(TdK, TdL);
Chris@42 845 TeR = VFMA(LDK(KP707106781), TeA, Tex);
Chris@42 846 STM4(&(ro[24]), TeR, ovs, &(ro[0]));
Chris@42 847 TeS = VFNMS(LDK(KP707106781), TeA, Tex);
Chris@42 848 STM4(&(ro[56]), TeS, ovs, &(ro[0]));
Chris@42 849 TeT = VFMA(LDK(KP707106781), TeC, TeB);
Chris@42 850 STM4(&(io[8]), TeT, ovs, &(io[0]));
Chris@42 851 TeU = VFNMS(LDK(KP707106781), TeC, TeB);
Chris@42 852 STM4(&(io[40]), TeU, ovs, &(io[0]));
Chris@42 853 TeV = VFMA(LDK(KP707106781), Tew, Tev);
Chris@42 854 STM4(&(io[24]), TeV, ovs, &(io[0]));
Chris@42 855 TeW = VFNMS(LDK(KP707106781), Tew, Tev);
Chris@42 856 STM4(&(io[56]), TeW, ovs, &(io[0]));
Chris@42 857 TeX = VFMA(LDK(KP707106781), Tes, Teh);
Chris@42 858 STM4(&(ro[8]), TeX, ovs, &(ro[0]));
Chris@42 859 TeY = VFNMS(LDK(KP707106781), Tes, Teh);
Chris@42 860 STM4(&(ro[40]), TeY, ovs, &(ro[0]));
Chris@42 861 Tdt = VSUB(Tdp, Tds);
Chris@42 862 TdY = VADD(Tdp, Tds);
Chris@42 863 }
Chris@42 864 }
Chris@42 865 {
Chris@42 866 V TdT, Tdj, TdP, TdN, TdR, Tdu, Tea, Te0, TdQ, TdF, TdX, Ted, Te7;
Chris@42 867 TdT = VFNMS(LDK(KP707106781), Tdi, Tdb);
Chris@42 868 Tdj = VFMA(LDK(KP707106781), Tdi, Tdb);
Chris@42 869 TdP = VFMA(LDK(KP707106781), TdM, TdJ);
Chris@42 870 TdN = VFNMS(LDK(KP707106781), TdM, TdJ);
Chris@42 871 TdR = VFNMS(LDK(KP414213562), Tdo, Tdt);
Chris@42 872 Tdu = VFMA(LDK(KP414213562), Tdt, Tdo);
Chris@42 873 Tea = VFNMS(LDK(KP414213562), TdY, TdZ);
Chris@42 874 Te0 = VFMA(LDK(KP414213562), TdZ, TdY);
Chris@42 875 TdQ = VFMA(LDK(KP414213562), Tdz, TdE);
Chris@42 876 TdF = VFNMS(LDK(KP414213562), TdE, Tdz);
Chris@42 877 Te9 = VFNMS(LDK(KP707106781), TdW, TdV);
Chris@42 878 TdX = VFMA(LDK(KP707106781), TdW, TdV);
Chris@42 879 Ted = VFMA(LDK(KP707106781), Te6, Te5);
Chris@42 880 Te7 = VFNMS(LDK(KP707106781), Te6, Te5);
Chris@42 881 {
Chris@42 882 V Tee, Te8, Te4, TdU, TdS, TdO, TdG;
Chris@42 883 Tee = VADD(Tea, Teb);
Chris@42 884 Tec = VSUB(Tea, Teb);
Chris@42 885 Te8 = VSUB(Te3, Te0);
Chris@42 886 Te4 = VADD(Te0, Te3);
Chris@42 887 TdU = VADD(TdR, TdQ);
Chris@42 888 TdS = VSUB(TdQ, TdR);
Chris@42 889 TdO = VADD(Tdu, TdF);
Chris@42 890 TdG = VSUB(Tdu, TdF);
Chris@42 891 TeZ = VFMA(LDK(KP923879532), Tee, Ted);
Chris@42 892 STM4(&(io[4]), TeZ, ovs, &(io[0]));
Chris@42 893 Tf0 = VFNMS(LDK(KP923879532), Tee, Ted);
Chris@42 894 STM4(&(io[36]), Tf0, ovs, &(io[0]));
Chris@42 895 Tf1 = VFMA(LDK(KP923879532), Te4, TdX);
Chris@42 896 STM4(&(ro[4]), Tf1, ovs, &(ro[0]));
Chris@42 897 Tf2 = VFNMS(LDK(KP923879532), Te4, TdX);
Chris@42 898 STM4(&(ro[36]), Tf2, ovs, &(ro[0]));
Chris@42 899 Tf3 = VFMA(LDK(KP923879532), TdU, TdT);
Chris@42 900 STM4(&(ro[60]), Tf3, ovs, &(ro[0]));
Chris@42 901 Tf4 = VFNMS(LDK(KP923879532), TdU, TdT);
Chris@42 902 STM4(&(ro[28]), Tf4, ovs, &(ro[0]));
Chris@42 903 Tf5 = VFMA(LDK(KP923879532), TdS, TdP);
Chris@42 904 STM4(&(io[12]), Tf5, ovs, &(io[0]));
Chris@42 905 Tf6 = VFNMS(LDK(KP923879532), TdS, TdP);
Chris@42 906 STM4(&(io[44]), Tf6, ovs, &(io[0]));
Chris@42 907 Tf7 = VFMA(LDK(KP923879532), TdO, TdN);
Chris@42 908 STM4(&(io[60]), Tf7, ovs, &(io[0]));
Chris@42 909 Tf8 = VFNMS(LDK(KP923879532), TdO, TdN);
Chris@42 910 STM4(&(io[28]), Tf8, ovs, &(io[0]));
Chris@42 911 Tf9 = VFMA(LDK(KP923879532), TdG, Tdj);
Chris@42 912 STM4(&(ro[12]), Tf9, ovs, &(ro[0]));
Chris@42 913 Tfa = VFNMS(LDK(KP923879532), TdG, Tdj);
Chris@42 914 STM4(&(ro[44]), Tfa, ovs, &(ro[0]));
Chris@42 915 Tfb = VFMA(LDK(KP923879532), Te8, Te7);
Chris@42 916 STM4(&(io[20]), Tfb, ovs, &(io[0]));
Chris@42 917 Tfc = VFNMS(LDK(KP923879532), Te8, Te7);
Chris@42 918 STM4(&(io[52]), Tfc, ovs, &(io[0]));
Chris@42 919 }
Chris@42 920 }
Chris@42 921 }
Chris@42 922 {
Chris@42 923 V TcF, TcE, Tcy, Tcv, TcT, Tco, TcP, Tcd, TcZ, TcD, Td0, Tck, Td4, TcX, Tcr;
Chris@42 924 V TcS;
Chris@42 925 {
Chris@42 926 V Tcc, TcC, Tcg, Tcj, TcV, TcW;
Chris@42 927 TbE = VADD(TbC, TbD);
Chris@42 928 Tcc = VSUB(TbC, TbD);
Chris@42 929 TcC = VSUB(Tan, Tak);
Chris@42 930 Tao = VADD(Tak, Tan);
Chris@42 931 TcF = VFNMS(LDK(KP414213562), Tce, Tcf);
Chris@42 932 Tcg = VFMA(LDK(KP414213562), Tcf, Tce);
Chris@42 933 Tcj = VFNMS(LDK(KP414213562), Tci, Tch);
Chris@42 934 TcE = VFMA(LDK(KP414213562), Tch, Tci);
Chris@42 935 Tcy = VFNMS(LDK(KP707106781), Tcx, Tcw);
Chris@42 936 TcV = VFMA(LDK(KP707106781), Tcx, Tcw);
Chris@42 937 TcW = VFMA(LDK(KP707106781), Tcu, Tct);
Chris@42 938 Tcv = VFNMS(LDK(KP707106781), Tcu, Tct);
Chris@42 939 TcT = VFMA(LDK(KP707106781), Tcn, Tcm);
Chris@42 940 Tco = VFNMS(LDK(KP707106781), Tcn, Tcm);
Chris@42 941 Tfd = VFMA(LDK(KP923879532), Tec, Te9);
Chris@42 942 STM4(&(ro[20]), Tfd, ovs, &(ro[0]));
Chris@42 943 Tfe = VFNMS(LDK(KP923879532), Tec, Te9);
Chris@42 944 STM4(&(ro[52]), Tfe, ovs, &(ro[0]));
Chris@42 945 TcP = VFNMS(LDK(KP707106781), Tcc, Tcb);
Chris@42 946 Tcd = VFMA(LDK(KP707106781), Tcc, Tcb);
Chris@42 947 TcZ = VFNMS(LDK(KP707106781), TcC, TcB);
Chris@42 948 TcD = VFMA(LDK(KP707106781), TcC, TcB);
Chris@42 949 Td0 = VADD(Tcg, Tcj);
Chris@42 950 Tck = VSUB(Tcg, Tcj);
Chris@42 951 Td4 = VFMA(LDK(KP198912367), TcV, TcW);
Chris@42 952 TcX = VFNMS(LDK(KP198912367), TcW, TcV);
Chris@42 953 Tcr = VFNMS(LDK(KP707106781), Tcq, Tcp);
Chris@42 954 TcS = VFMA(LDK(KP707106781), Tcq, Tcp);
Chris@42 955 }
Chris@42 956 {
Chris@42 957 V TcJ, Tcl, TcK, Tcs, TcQ, TcG, Td5, TcU, TcL, Tcz;
Chris@42 958 TcJ = VFNMS(LDK(KP923879532), Tck, Tcd);
Chris@42 959 Tcl = VFMA(LDK(KP923879532), Tck, Tcd);
Chris@42 960 TcK = VFNMS(LDK(KP668178637), Tco, Tcr);
Chris@42 961 Tcs = VFMA(LDK(KP668178637), Tcr, Tco);
Chris@42 962 TcQ = VADD(TcF, TcE);
Chris@42 963 TcG = VSUB(TcE, TcF);
Chris@42 964 Td5 = VFNMS(LDK(KP198912367), TcS, TcT);
Chris@42 965 TcU = VFMA(LDK(KP198912367), TcT, TcS);
Chris@42 966 TcL = VFMA(LDK(KP668178637), Tcv, Tcy);
Chris@42 967 Tcz = VFNMS(LDK(KP668178637), Tcy, Tcv);
Chris@42 968 {
Chris@42 969 V Td1, Td3, TcR, TcN, TcH, Td2, TcY, TcM, TcO, TcI, TcA, Td6;
Chris@42 970 Td1 = VFMA(LDK(KP923879532), Td0, TcZ);
Chris@42 971 Td3 = VFNMS(LDK(KP923879532), Td0, TcZ);
Chris@42 972 TcR = VFNMS(LDK(KP923879532), TcQ, TcP);
Chris@42 973 Td7 = VFMA(LDK(KP923879532), TcQ, TcP);
Chris@42 974 TcN = VFMA(LDK(KP923879532), TcG, TcD);
Chris@42 975 TcH = VFNMS(LDK(KP923879532), TcG, TcD);
Chris@42 976 Td2 = VADD(TcU, TcX);
Chris@42 977 TcY = VSUB(TcU, TcX);
Chris@42 978 TcM = VSUB(TcK, TcL);
Chris@42 979 TcO = VADD(TcK, TcL);
Chris@42 980 TcI = VSUB(Tcz, Tcs);
Chris@42 981 TcA = VADD(Tcs, Tcz);
Chris@42 982 Td6 = VSUB(Td4, Td5);
Chris@42 983 Td8 = VADD(Td5, Td4);
Chris@42 984 Tff = VFMA(LDK(KP980785280), TcY, TcR);
Chris@42 985 STM4(&(ro[14]), Tff, ovs, &(ro[0]));
Chris@42 986 Tfg = VFNMS(LDK(KP980785280), TcY, TcR);
Chris@42 987 STM4(&(ro[46]), Tfg, ovs, &(ro[0]));
Chris@42 988 Tfh = VFMA(LDK(KP831469612), TcM, TcJ);
Chris@42 989 STM4(&(ro[22]), Tfh, ovs, &(ro[0]));
Chris@42 990 Tfi = VFNMS(LDK(KP831469612), TcM, TcJ);
Chris@42 991 STM4(&(ro[54]), Tfi, ovs, &(ro[0]));
Chris@42 992 Tfj = VFMA(LDK(KP831469612), TcO, TcN);
Chris@42 993 STM4(&(io[6]), Tfj, ovs, &(io[0]));
Chris@42 994 Tfk = VFNMS(LDK(KP831469612), TcO, TcN);
Chris@42 995 STM4(&(io[38]), Tfk, ovs, &(io[0]));
Chris@42 996 Tfl = VFMA(LDK(KP831469612), TcI, TcH);
Chris@42 997 STM4(&(io[22]), Tfl, ovs, &(io[0]));
Chris@42 998 Tfm = VFNMS(LDK(KP831469612), TcI, TcH);
Chris@42 999 STM4(&(io[54]), Tfm, ovs, &(io[0]));
Chris@42 1000 Tfn = VFMA(LDK(KP831469612), TcA, Tcl);
Chris@42 1001 STM4(&(ro[6]), Tfn, ovs, &(ro[0]));
Chris@42 1002 Tfo = VFNMS(LDK(KP831469612), TcA, Tcl);
Chris@42 1003 STM4(&(ro[38]), Tfo, ovs, &(ro[0]));
Chris@42 1004 Tfp = VFMA(LDK(KP980785280), Td6, Td3);
Chris@42 1005 STM4(&(io[14]), Tfp, ovs, &(io[0]));
Chris@42 1006 Tfq = VFNMS(LDK(KP980785280), Td6, Td3);
Chris@42 1007 STM4(&(io[46]), Tfq, ovs, &(io[0]));
Chris@42 1008 Tfr = VFNMS(LDK(KP980785280), Td2, Td1);
Chris@42 1009 STM4(&(io[30]), Tfr, ovs, &(io[0]));
Chris@42 1010 Tfs = VFMA(LDK(KP980785280), Td2, Td1);
Chris@42 1011 STM4(&(io[62]), Tfs, ovs, &(io[0]));
Chris@42 1012 }
Chris@42 1013 }
Chris@42 1014 }
Chris@42 1015 }
Chris@42 1016 {
Chris@42 1017 V Tft, Tfu, Tfv, Tfw, Tfx, Tfy, Tfz, TfA, TfB, TfC, TfD, TfE, TfF, TfG, T3f;
Chris@42 1018 V T66, T63, T3u, TfL, TfM, TfN, TfO, TfP, TfQ, TfR, TfS, TfT, TfU, TfV, TfW;
Chris@42 1019 V TfX, TfY, TfZ, Tg0, Tc5, Tc8;
Chris@42 1020 {
Chris@42 1021 V TbH, TbG, Tbw, Tbn, TbV, TaW, TbR, Tap, Tc1, TbF, Tc2, TaE, Tc7, TbZ, Tb5;
Chris@42 1022 V TbU;
Chris@42 1023 {
Chris@42 1024 V Taw, TaD, TbX, TbY;
Chris@42 1025 TbH = VFMA(LDK(KP414213562), Tas, Tav);
Chris@42 1026 Taw = VFNMS(LDK(KP414213562), Tav, Tas);
Chris@42 1027 TaD = VFMA(LDK(KP414213562), TaC, Taz);
Chris@42 1028 TbG = VFNMS(LDK(KP414213562), Taz, TaC);
Chris@42 1029 Tbw = VFNMS(LDK(KP707106781), Tbv, Tbs);
Chris@42 1030 TbX = VFMA(LDK(KP707106781), Tbv, Tbs);
Chris@42 1031 TbY = VFMA(LDK(KP707106781), Tbm, Tbb);
Chris@42 1032 Tbn = VFNMS(LDK(KP707106781), Tbm, Tbb);
Chris@42 1033 TbV = VFMA(LDK(KP707106781), TaV, TaK);
Chris@42 1034 TaW = VFNMS(LDK(KP707106781), TaV, TaK);
Chris@42 1035 Tft = VFMA(LDK(KP980785280), Td8, Td7);
Chris@42 1036 STM4(&(ro[62]), Tft, ovs, &(ro[0]));
Chris@42 1037 Tfu = VFNMS(LDK(KP980785280), Td8, Td7);
Chris@42 1038 STM4(&(ro[30]), Tfu, ovs, &(ro[0]));
Chris@42 1039 TbR = VFMA(LDK(KP707106781), Tao, Tah);
Chris@42 1040 Tap = VFNMS(LDK(KP707106781), Tao, Tah);
Chris@42 1041 Tc1 = VFMA(LDK(KP707106781), TbE, TbB);
Chris@42 1042 TbF = VFNMS(LDK(KP707106781), TbE, TbB);
Chris@42 1043 Tc2 = VADD(Taw, TaD);
Chris@42 1044 TaE = VSUB(Taw, TaD);
Chris@42 1045 Tc7 = VFMA(LDK(KP198912367), TbX, TbY);
Chris@42 1046 TbZ = VFNMS(LDK(KP198912367), TbY, TbX);
Chris@42 1047 Tb5 = VFNMS(LDK(KP707106781), Tb4, Tb1);
Chris@42 1048 TbU = VFMA(LDK(KP707106781), Tb4, Tb1);
Chris@42 1049 }
Chris@42 1050 {
Chris@42 1051 V TbP, TaF, TbN, Tb6, TbS, TbI, Tc6, TbW, TbM, Tbx;
Chris@42 1052 TbP = VFNMS(LDK(KP923879532), TaE, Tap);
Chris@42 1053 TaF = VFMA(LDK(KP923879532), TaE, Tap);
Chris@42 1054 TbN = VFNMS(LDK(KP668178637), TaW, Tb5);
Chris@42 1055 Tb6 = VFMA(LDK(KP668178637), Tb5, TaW);
Chris@42 1056 TbS = VADD(TbH, TbG);
Chris@42 1057 TbI = VSUB(TbG, TbH);
Chris@42 1058 Tc6 = VFNMS(LDK(KP198912367), TbU, TbV);
Chris@42 1059 TbW = VFMA(LDK(KP198912367), TbV, TbU);
Chris@42 1060 TbM = VFMA(LDK(KP668178637), Tbn, Tbw);
Chris@42 1061 Tbx = VFNMS(LDK(KP668178637), Tbw, Tbn);
Chris@42 1062 {
Chris@42 1063 V Tc3, Tc9, TbT, TbL, TbJ, Tc4, Tc0, TbQ, TbO, TbK, Tby, Tca;
Chris@42 1064 Tc3 = VFNMS(LDK(KP923879532), Tc2, Tc1);
Chris@42 1065 Tc9 = VFMA(LDK(KP923879532), Tc2, Tc1);
Chris@42 1066 TbT = VFMA(LDK(KP923879532), TbS, TbR);
Chris@42 1067 Tc5 = VFNMS(LDK(KP923879532), TbS, TbR);
Chris@42 1068 TbL = VFMA(LDK(KP923879532), TbI, TbF);
Chris@42 1069 TbJ = VFNMS(LDK(KP923879532), TbI, TbF);
Chris@42 1070 Tc4 = VSUB(TbZ, TbW);
Chris@42 1071 Tc0 = VADD(TbW, TbZ);
Chris@42 1072 TbQ = VADD(TbN, TbM);
Chris@42 1073 TbO = VSUB(TbM, TbN);
Chris@42 1074 TbK = VADD(Tb6, Tbx);
Chris@42 1075 Tby = VSUB(Tb6, Tbx);
Chris@42 1076 Tca = VADD(Tc6, Tc7);
Chris@42 1077 Tc8 = VSUB(Tc6, Tc7);
Chris@42 1078 Tfv = VFMA(LDK(KP980785280), Tc0, TbT);
Chris@42 1079 STM4(&(ro[2]), Tfv, ovs, &(ro[0]));
Chris@42 1080 Tfw = VFNMS(LDK(KP980785280), Tc0, TbT);
Chris@42 1081 STM4(&(ro[34]), Tfw, ovs, &(ro[0]));
Chris@42 1082 Tfx = VFMA(LDK(KP831469612), TbQ, TbP);
Chris@42 1083 STM4(&(ro[58]), Tfx, ovs, &(ro[0]));
Chris@42 1084 Tfy = VFNMS(LDK(KP831469612), TbQ, TbP);
Chris@42 1085 STM4(&(ro[26]), Tfy, ovs, &(ro[0]));
Chris@42 1086 Tfz = VFMA(LDK(KP831469612), TbO, TbL);
Chris@42 1087 STM4(&(io[10]), Tfz, ovs, &(io[0]));
Chris@42 1088 TfA = VFNMS(LDK(KP831469612), TbO, TbL);
Chris@42 1089 STM4(&(io[42]), TfA, ovs, &(io[0]));
Chris@42 1090 TfB = VFMA(LDK(KP831469612), TbK, TbJ);
Chris@42 1091 STM4(&(io[58]), TfB, ovs, &(io[0]));
Chris@42 1092 TfC = VFNMS(LDK(KP831469612), TbK, TbJ);
Chris@42 1093 STM4(&(io[26]), TfC, ovs, &(io[0]));
Chris@42 1094 TfD = VFMA(LDK(KP831469612), Tby, TaF);
Chris@42 1095 STM4(&(ro[10]), TfD, ovs, &(ro[0]));
Chris@42 1096 TfE = VFNMS(LDK(KP831469612), Tby, TaF);
Chris@42 1097 STM4(&(ro[42]), TfE, ovs, &(ro[0]));
Chris@42 1098 TfF = VFMA(LDK(KP980785280), Tca, Tc9);
Chris@42 1099 STM4(&(io[2]), TfF, ovs, &(io[0]));
Chris@42 1100 TfG = VFNMS(LDK(KP980785280), Tca, Tc9);
Chris@42 1101 STM4(&(io[34]), TfG, ovs, &(io[0]));
Chris@42 1102 TfH = VFNMS(LDK(KP980785280), Tc4, Tc3);
Chris@42 1103 STM4(&(io[50]), TfH, ovs, &(io[0]));
Chris@42 1104 TfI = VFMA(LDK(KP980785280), Tc4, Tc3);
Chris@42 1105 STM4(&(io[18]), TfI, ovs, &(io[0]));
Chris@42 1106 }
Chris@42 1107 }
Chris@42 1108 }
Chris@42 1109 {
Chris@42 1110 V T70, T6X, T7h, T6F, T7x, T7m, T7w, T7p, T7s, T6M, T7c, T6U, T7r, T75, T7i;
Chris@42 1111 V T78, T7b, T6N;
Chris@42 1112 {
Chris@42 1113 V T6T, T6Q, T77, T6I, T6L, T76, T73, T74;
Chris@42 1114 {
Chris@42 1115 V T6D, T6E, T7k, T7l, T7n, T7o;
Chris@42 1116 T3f = VFMA(LDK(KP707106781), T3e, T37);
Chris@42 1117 T6D = VFNMS(LDK(KP707106781), T3e, T37);
Chris@42 1118 T6E = VADD(T65, T64);
Chris@42 1119 T66 = VSUB(T64, T65);
Chris@42 1120 T6T = VFNMS(LDK(KP923879532), T6S, T6R);
Chris@42 1121 T7k = VFMA(LDK(KP923879532), T6S, T6R);
Chris@42 1122 T7l = VFMA(LDK(KP923879532), T6P, T6O);
Chris@42 1123 T6Q = VFNMS(LDK(KP923879532), T6P, T6O);
Chris@42 1124 T70 = VFNMS(LDK(KP923879532), T6Z, T6Y);
Chris@42 1125 T7n = VFMA(LDK(KP923879532), T6Z, T6Y);
Chris@42 1126 T7o = VFMA(LDK(KP923879532), T6W, T6V);
Chris@42 1127 T6X = VFNMS(LDK(KP923879532), T6W, T6V);
Chris@42 1128 T77 = VFNMS(LDK(KP198912367), T6G, T6H);
Chris@42 1129 T6I = VFMA(LDK(KP198912367), T6H, T6G);
Chris@42 1130 TfJ = VFMA(LDK(KP980785280), Tc8, Tc5);
Chris@42 1131 STM4(&(ro[18]), TfJ, ovs, &(ro[0]));
Chris@42 1132 TfK = VFNMS(LDK(KP980785280), Tc8, Tc5);
Chris@42 1133 STM4(&(ro[50]), TfK, ovs, &(ro[0]));
Chris@42 1134 T7h = VFMA(LDK(KP923879532), T6E, T6D);
Chris@42 1135 T6F = VFNMS(LDK(KP923879532), T6E, T6D);
Chris@42 1136 T7x = VFNMS(LDK(KP098491403), T7k, T7l);
Chris@42 1137 T7m = VFMA(LDK(KP098491403), T7l, T7k);
Chris@42 1138 T7w = VFMA(LDK(KP098491403), T7n, T7o);
Chris@42 1139 T7p = VFNMS(LDK(KP098491403), T7o, T7n);
Chris@42 1140 T6L = VFNMS(LDK(KP198912367), T6K, T6J);
Chris@42 1141 T76 = VFMA(LDK(KP198912367), T6J, T6K);
Chris@42 1142 }
Chris@42 1143 T63 = VFMA(LDK(KP707106781), T62, T5Z);
Chris@42 1144 T73 = VFNMS(LDK(KP707106781), T62, T5Z);
Chris@42 1145 T74 = VADD(T3m, T3t);
Chris@42 1146 T3u = VSUB(T3m, T3t);
Chris@42 1147 T7s = VADD(T6I, T6L);
Chris@42 1148 T6M = VSUB(T6I, T6L);
Chris@42 1149 T7c = VFNMS(LDK(KP820678790), T6Q, T6T);
Chris@42 1150 T6U = VFMA(LDK(KP820678790), T6T, T6Q);
Chris@42 1151 T7r = VFMA(LDK(KP923879532), T74, T73);
Chris@42 1152 T75 = VFNMS(LDK(KP923879532), T74, T73);
Chris@42 1153 T7i = VADD(T77, T76);
Chris@42 1154 T78 = VSUB(T76, T77);
Chris@42 1155 }
Chris@42 1156 T7b = VFNMS(LDK(KP980785280), T6M, T6F);
Chris@42 1157 T6N = VFMA(LDK(KP980785280), T6M, T6F);
Chris@42 1158 {
Chris@42 1159 V T7u, T7q, T7v, T7t, T7A, T7y, T7j, T7z, T7f, T79, T71, T7d;
Chris@42 1160 T7u = VADD(T7m, T7p);
Chris@42 1161 T7q = VSUB(T7m, T7p);
Chris@42 1162 T7v = VFNMS(LDK(KP980785280), T7s, T7r);
Chris@42 1163 T7t = VFMA(LDK(KP980785280), T7s, T7r);
Chris@42 1164 T7A = VADD(T7x, T7w);
Chris@42 1165 T7y = VSUB(T7w, T7x);
Chris@42 1166 T7j = VFNMS(LDK(KP980785280), T7i, T7h);
Chris@42 1167 T7z = VFMA(LDK(KP980785280), T7i, T7h);
Chris@42 1168 T7f = VFMA(LDK(KP980785280), T78, T75);
Chris@42 1169 T79 = VFNMS(LDK(KP980785280), T78, T75);
Chris@42 1170 T71 = VFNMS(LDK(KP820678790), T70, T6X);
Chris@42 1171 T7d = VFMA(LDK(KP820678790), T6X, T70);
Chris@42 1172 {
Chris@42 1173 V T7g, T7e, T72, T7a;
Chris@42 1174 TfL = VFMA(LDK(KP995184726), T7y, T7v);
Chris@42 1175 STM4(&(io[15]), TfL, ovs, &(io[1]));
Chris@42 1176 TfM = VFNMS(LDK(KP995184726), T7y, T7v);
Chris@42 1177 STM4(&(io[47]), TfM, ovs, &(io[1]));
Chris@42 1178 TfN = VFMA(LDK(KP995184726), T7q, T7j);
Chris@42 1179 STM4(&(ro[15]), TfN, ovs, &(ro[1]));
Chris@42 1180 TfO = VFNMS(LDK(KP995184726), T7q, T7j);
Chris@42 1181 STM4(&(ro[47]), TfO, ovs, &(ro[1]));
Chris@42 1182 T7g = VADD(T7c, T7d);
Chris@42 1183 T7e = VSUB(T7c, T7d);
Chris@42 1184 T72 = VADD(T6U, T71);
Chris@42 1185 T7a = VSUB(T71, T6U);
Chris@42 1186 TfP = VFNMS(LDK(KP995184726), T7u, T7t);
Chris@42 1187 STM4(&(io[31]), TfP, ovs, &(io[1]));
Chris@42 1188 TfQ = VFMA(LDK(KP995184726), T7u, T7t);
Chris@42 1189 STM4(&(io[63]), TfQ, ovs, &(io[1]));
Chris@42 1190 TfR = VFMA(LDK(KP773010453), T7e, T7b);
Chris@42 1191 STM4(&(ro[23]), TfR, ovs, &(ro[1]));
Chris@42 1192 TfS = VFNMS(LDK(KP773010453), T7e, T7b);
Chris@42 1193 STM4(&(ro[55]), TfS, ovs, &(ro[1]));
Chris@42 1194 TfT = VFMA(LDK(KP773010453), T7g, T7f);
Chris@42 1195 STM4(&(io[7]), TfT, ovs, &(io[1]));
Chris@42 1196 TfU = VFNMS(LDK(KP773010453), T7g, T7f);
Chris@42 1197 STM4(&(io[39]), TfU, ovs, &(io[1]));
Chris@42 1198 TfV = VFMA(LDK(KP773010453), T7a, T79);
Chris@42 1199 STM4(&(io[23]), TfV, ovs, &(io[1]));
Chris@42 1200 TfW = VFNMS(LDK(KP773010453), T7a, T79);
Chris@42 1201 STM4(&(io[55]), TfW, ovs, &(io[1]));
Chris@42 1202 TfX = VFMA(LDK(KP773010453), T72, T6N);
Chris@42 1203 STM4(&(ro[7]), TfX, ovs, &(ro[1]));
Chris@42 1204 TfY = VFNMS(LDK(KP773010453), T72, T6N);
Chris@42 1205 STM4(&(ro[39]), TfY, ovs, &(ro[1]));
Chris@42 1206 TfZ = VFNMS(LDK(KP995184726), T7A, T7z);
Chris@42 1207 STM4(&(ro[31]), TfZ, ovs, &(ro[1]));
Chris@42 1208 Tg0 = VFMA(LDK(KP995184726), T7A, T7z);
Chris@42 1209 STM4(&(ro[63]), Tg0, ovs, &(ro[1]));
Chris@42 1210 }
Chris@42 1211 }
Chris@42 1212 }
Chris@42 1213 {
Chris@42 1214 V T7D, T8K, T8H, T7K, Ta8, Ta7, Tae, Tad;
Chris@42 1215 {
Chris@42 1216 V T9x, T9u, T9E, T9B, T9L, T9K, T9V, T9j, Tab, Ta0, Taa, Ta3, Ta6, T9q, T9H;
Chris@42 1217 V T9I;
Chris@42 1218 {
Chris@42 1219 V T9h, T9i, T9Y, T9Z, Ta1, Ta2, T9m, T9p;
Chris@42 1220 T7D = VFMA(LDK(KP707106781), T7C, T7B);
Chris@42 1221 T9h = VFNMS(LDK(KP707106781), T7C, T7B);
Chris@42 1222 T9i = VSUB(T8I, T8J);
Chris@42 1223 T8K = VADD(T8I, T8J);
Chris@42 1224 T9x = VFNMS(LDK(KP923879532), T9w, T9v);
Chris@42 1225 T9Y = VFMA(LDK(KP923879532), T9w, T9v);
Chris@42 1226 T9Z = VFMA(LDK(KP923879532), T9t, T9s);
Chris@42 1227 T9u = VFNMS(LDK(KP923879532), T9t, T9s);
Chris@42 1228 T9E = VFNMS(LDK(KP923879532), T9D, T9C);
Chris@42 1229 Ta1 = VFMA(LDK(KP923879532), T9D, T9C);
Chris@42 1230 Ta2 = VFMA(LDK(KP923879532), T9A, T9z);
Chris@42 1231 T9B = VFNMS(LDK(KP923879532), T9A, T9z);
Chris@42 1232 T9L = VFNMS(LDK(KP668178637), T9k, T9l);
Chris@42 1233 T9m = VFMA(LDK(KP668178637), T9l, T9k);
Chris@42 1234 T9p = VFNMS(LDK(KP668178637), T9o, T9n);
Chris@42 1235 T9K = VFMA(LDK(KP668178637), T9n, T9o);
Chris@42 1236 T9V = VFNMS(LDK(KP923879532), T9i, T9h);
Chris@42 1237 T9j = VFMA(LDK(KP923879532), T9i, T9h);
Chris@42 1238 Tab = VFNMS(LDK(KP303346683), T9Y, T9Z);
Chris@42 1239 Ta0 = VFMA(LDK(KP303346683), T9Z, T9Y);
Chris@42 1240 Taa = VFMA(LDK(KP303346683), Ta1, Ta2);
Chris@42 1241 Ta3 = VFNMS(LDK(KP303346683), Ta2, Ta1);
Chris@42 1242 Ta6 = VADD(T9m, T9p);
Chris@42 1243 T9q = VSUB(T9m, T9p);
Chris@42 1244 T8H = VFMA(LDK(KP707106781), T8G, T8F);
Chris@42 1245 T9H = VFNMS(LDK(KP707106781), T8G, T8F);
Chris@42 1246 T9I = VSUB(T7J, T7G);
Chris@42 1247 T7K = VADD(T7G, T7J);
Chris@42 1248 }
Chris@42 1249 {
Chris@42 1250 V T9P, T9r, T9Q, T9y, Ta5, T9J, T9W, T9M, T9R, T9F;
Chris@42 1251 T9P = VFNMS(LDK(KP831469612), T9q, T9j);
Chris@42 1252 T9r = VFMA(LDK(KP831469612), T9q, T9j);
Chris@42 1253 T9Q = VFNMS(LDK(KP534511135), T9u, T9x);
Chris@42 1254 T9y = VFMA(LDK(KP534511135), T9x, T9u);
Chris@42 1255 Ta5 = VFNMS(LDK(KP923879532), T9I, T9H);
Chris@42 1256 T9J = VFMA(LDK(KP923879532), T9I, T9H);
Chris@42 1257 T9W = VADD(T9L, T9K);
Chris@42 1258 T9M = VSUB(T9K, T9L);
Chris@42 1259 T9R = VFMA(LDK(KP534511135), T9B, T9E);
Chris@42 1260 T9F = VFNMS(LDK(KP534511135), T9E, T9B);
Chris@42 1261 {
Chris@42 1262 V T9T, T9N, T9U, T9S, T9G, T9O;
Chris@42 1263 {
Chris@42 1264 V Ta4, Ta9, Tac, T9X;
Chris@42 1265 Ta8 = VADD(Ta0, Ta3);
Chris@42 1266 Ta4 = VSUB(Ta0, Ta3);
Chris@42 1267 Ta9 = VFNMS(LDK(KP831469612), Ta6, Ta5);
Chris@42 1268 Ta7 = VFMA(LDK(KP831469612), Ta6, Ta5);
Chris@42 1269 Tae = VADD(Tab, Taa);
Chris@42 1270 Tac = VSUB(Taa, Tab);
Chris@42 1271 T9X = VFNMS(LDK(KP831469612), T9W, T9V);
Chris@42 1272 Tad = VFMA(LDK(KP831469612), T9W, T9V);
Chris@42 1273 T9T = VFMA(LDK(KP831469612), T9M, T9J);
Chris@42 1274 T9N = VFNMS(LDK(KP831469612), T9M, T9J);
Chris@42 1275 T9U = VADD(T9Q, T9R);
Chris@42 1276 T9S = VSUB(T9Q, T9R);
Chris@42 1277 T9G = VADD(T9y, T9F);
Chris@42 1278 T9O = VSUB(T9F, T9y);
Chris@42 1279 {
Chris@42 1280 V Tg1, Tg2, Tg3, Tg4;
Chris@42 1281 Tg1 = VFNMS(LDK(KP956940335), Tac, Ta9);
Chris@42 1282 STM4(&(io[45]), Tg1, ovs, &(io[1]));
Chris@42 1283 STN4(&(io[44]), Tf6, Tg1, Tfq, TfM, ovs);
Chris@42 1284 Tg2 = VFMA(LDK(KP956940335), Ta4, T9X);
Chris@42 1285 STM4(&(ro[13]), Tg2, ovs, &(ro[1]));
Chris@42 1286 STN4(&(ro[12]), Tf9, Tg2, Tff, TfN, ovs);
Chris@42 1287 Tg3 = VFNMS(LDK(KP956940335), Ta4, T9X);
Chris@42 1288 STM4(&(ro[45]), Tg3, ovs, &(ro[1]));
Chris@42 1289 STN4(&(ro[44]), Tfa, Tg3, Tfg, TfO, ovs);
Chris@42 1290 Tg4 = VFMA(LDK(KP956940335), Tac, Ta9);
Chris@42 1291 STM4(&(io[13]), Tg4, ovs, &(io[1]));
Chris@42 1292 STN4(&(io[12]), Tf5, Tg4, Tfp, TfL, ovs);
Chris@42 1293 }
Chris@42 1294 }
Chris@42 1295 {
Chris@42 1296 V Tg5, Tg6, Tg7, Tg8;
Chris@42 1297 Tg5 = VFMA(LDK(KP881921264), T9S, T9P);
Chris@42 1298 STM4(&(ro[21]), Tg5, ovs, &(ro[1]));
Chris@42 1299 STN4(&(ro[20]), Tfd, Tg5, Tfh, TfR, ovs);
Chris@42 1300 Tg6 = VFNMS(LDK(KP881921264), T9S, T9P);
Chris@42 1301 STM4(&(ro[53]), Tg6, ovs, &(ro[1]));
Chris@42 1302 STN4(&(ro[52]), Tfe, Tg6, Tfi, TfS, ovs);
Chris@42 1303 Tg7 = VFMA(LDK(KP881921264), T9U, T9T);
Chris@42 1304 STM4(&(io[5]), Tg7, ovs, &(io[1]));
Chris@42 1305 STN4(&(io[4]), TeZ, Tg7, Tfj, TfT, ovs);
Chris@42 1306 Tg8 = VFNMS(LDK(KP881921264), T9U, T9T);
Chris@42 1307 STM4(&(io[37]), Tg8, ovs, &(io[1]));
Chris@42 1308 STN4(&(io[36]), Tf0, Tg8, Tfk, TfU, ovs);
Chris@42 1309 {
Chris@42 1310 V Tg9, Tga, Tgb, Tgc;
Chris@42 1311 Tg9 = VFMA(LDK(KP881921264), T9O, T9N);
Chris@42 1312 STM4(&(io[21]), Tg9, ovs, &(io[1]));
Chris@42 1313 STN4(&(io[20]), Tfb, Tg9, Tfl, TfV, ovs);
Chris@42 1314 Tga = VFNMS(LDK(KP881921264), T9O, T9N);
Chris@42 1315 STM4(&(io[53]), Tga, ovs, &(io[1]));
Chris@42 1316 STN4(&(io[52]), Tfc, Tga, Tfm, TfW, ovs);
Chris@42 1317 Tgb = VFMA(LDK(KP881921264), T9G, T9r);
Chris@42 1318 STM4(&(ro[5]), Tgb, ovs, &(ro[1]));
Chris@42 1319 STN4(&(ro[4]), Tf1, Tgb, Tfn, TfX, ovs);
Chris@42 1320 Tgc = VFNMS(LDK(KP881921264), T9G, T9r);
Chris@42 1321 STM4(&(ro[37]), Tgc, ovs, &(ro[1]));
Chris@42 1322 STN4(&(ro[36]), Tf2, Tgc, Tfo, TfY, ovs);
Chris@42 1323 }
Chris@42 1324 }
Chris@42 1325 }
Chris@42 1326 }
Chris@42 1327 }
Chris@42 1328 {
Chris@42 1329 V Tgh, Tgi, Tgl, Tgm, Tgn, Tgo, Tgp, Tgq, Tgr, Tgs, Tgt, Tgu;
Chris@42 1330 {
Chris@42 1331 V T5U, T6j, T3v, T6y, T6o, T5H, T69, T68, T6z, T6r, T6u, T48, T6f, T52, T6t;
Chris@42 1332 V T67, T6h, T49;
Chris@42 1333 {
Chris@42 1334 V T51, T4O, T6p, T6q, T3O, T47, T6m, T6n;
Chris@42 1335 T51 = VFNMS(LDK(KP923879532), T50, T4X);
Chris@42 1336 T6m = VFMA(LDK(KP923879532), T50, T4X);
Chris@42 1337 T6n = VFMA(LDK(KP923879532), T4N, T4q);
Chris@42 1338 T4O = VFNMS(LDK(KP923879532), T4N, T4q);
Chris@42 1339 T5U = VFNMS(LDK(KP923879532), T5T, T5Q);
Chris@42 1340 T6p = VFMA(LDK(KP923879532), T5T, T5Q);
Chris@42 1341 {
Chris@42 1342 V Tgd, Tge, Tgf, Tgg;
Chris@42 1343 Tgd = VFMA(LDK(KP956940335), Ta8, Ta7);
Chris@42 1344 STM4(&(io[61]), Tgd, ovs, &(io[1]));
Chris@42 1345 STN4(&(io[60]), Tf7, Tgd, Tfs, TfQ, ovs);
Chris@42 1346 Tge = VFNMS(LDK(KP956940335), Ta8, Ta7);
Chris@42 1347 STM4(&(io[29]), Tge, ovs, &(io[1]));
Chris@42 1348 STN4(&(io[28]), Tf8, Tge, Tfr, TfP, ovs);
Chris@42 1349 Tgf = VFMA(LDK(KP956940335), Tae, Tad);
Chris@42 1350 STM4(&(ro[61]), Tgf, ovs, &(ro[1]));
Chris@42 1351 STN4(&(ro[60]), Tf3, Tgf, Tft, Tg0, ovs);
Chris@42 1352 Tgg = VFNMS(LDK(KP956940335), Tae, Tad);
Chris@42 1353 STM4(&(ro[29]), Tgg, ovs, &(ro[1]));
Chris@42 1354 STN4(&(ro[28]), Tf4, Tgg, Tfu, TfZ, ovs);
Chris@42 1355 T6j = VFMA(LDK(KP923879532), T3u, T3f);
Chris@42 1356 T3v = VFNMS(LDK(KP923879532), T3u, T3f);
Chris@42 1357 T6y = VFNMS(LDK(KP303346683), T6m, T6n);
Chris@42 1358 T6o = VFMA(LDK(KP303346683), T6n, T6m);
Chris@42 1359 T6q = VFMA(LDK(KP923879532), T5G, T5j);
Chris@42 1360 T5H = VFNMS(LDK(KP923879532), T5G, T5j);
Chris@42 1361 }
Chris@42 1362 T69 = VFMA(LDK(KP668178637), T3G, T3N);
Chris@42 1363 T3O = VFNMS(LDK(KP668178637), T3N, T3G);
Chris@42 1364 T47 = VFMA(LDK(KP668178637), T46, T3Z);
Chris@42 1365 T68 = VFNMS(LDK(KP668178637), T3Z, T46);
Chris@42 1366 T6z = VFMA(LDK(KP303346683), T6p, T6q);
Chris@42 1367 T6r = VFNMS(LDK(KP303346683), T6q, T6p);
Chris@42 1368 T6u = VADD(T3O, T47);
Chris@42 1369 T48 = VSUB(T3O, T47);
Chris@42 1370 T6f = VFNMS(LDK(KP534511135), T4O, T51);
Chris@42 1371 T52 = VFMA(LDK(KP534511135), T51, T4O);
Chris@42 1372 T6t = VFMA(LDK(KP923879532), T66, T63);
Chris@42 1373 T67 = VFNMS(LDK(KP923879532), T66, T63);
Chris@42 1374 }
Chris@42 1375 T6h = VFNMS(LDK(KP831469612), T48, T3v);
Chris@42 1376 T49 = VFMA(LDK(KP831469612), T48, T3v);
Chris@42 1377 {
Chris@42 1378 V T6w, T6s, T6B, T6v, T6A, T6C, T6k, T6a, T6e, T5V;
Chris@42 1379 T6w = VSUB(T6r, T6o);
Chris@42 1380 T6s = VADD(T6o, T6r);
Chris@42 1381 T6B = VFMA(LDK(KP831469612), T6u, T6t);
Chris@42 1382 T6v = VFNMS(LDK(KP831469612), T6u, T6t);
Chris@42 1383 T6A = VSUB(T6y, T6z);
Chris@42 1384 T6C = VADD(T6y, T6z);
Chris@42 1385 T6k = VADD(T69, T68);
Chris@42 1386 T6a = VSUB(T68, T69);
Chris@42 1387 T6e = VFMA(LDK(KP534511135), T5H, T5U);
Chris@42 1388 T5V = VFNMS(LDK(KP534511135), T5U, T5H);
Chris@42 1389 Tgh = VFMA(LDK(KP956940335), T6C, T6B);
Chris@42 1390 STM4(&(io[3]), Tgh, ovs, &(io[1]));
Chris@42 1391 Tgi = VFNMS(LDK(KP956940335), T6C, T6B);
Chris@42 1392 STM4(&(io[35]), Tgi, ovs, &(io[1]));
Chris@42 1393 {
Chris@42 1394 V T6l, T6x, T6d, T6b;
Chris@42 1395 T6l = VFMA(LDK(KP831469612), T6k, T6j);
Chris@42 1396 T6x = VFNMS(LDK(KP831469612), T6k, T6j);
Chris@42 1397 T6d = VFMA(LDK(KP831469612), T6a, T67);
Chris@42 1398 T6b = VFNMS(LDK(KP831469612), T6a, T67);
Chris@42 1399 {
Chris@42 1400 V T6g, T6i, T5W, T6c;
Chris@42 1401 T6g = VSUB(T6e, T6f);
Chris@42 1402 T6i = VADD(T6f, T6e);
Chris@42 1403 T5W = VSUB(T52, T5V);
Chris@42 1404 T6c = VADD(T52, T5V);
Chris@42 1405 Tgj = VFMA(LDK(KP956940335), T6w, T6v);
Chris@42 1406 STM4(&(io[19]), Tgj, ovs, &(io[1]));
Chris@42 1407 Tgk = VFNMS(LDK(KP956940335), T6w, T6v);
Chris@42 1408 STM4(&(io[51]), Tgk, ovs, &(io[1]));
Chris@42 1409 Tgl = VFMA(LDK(KP956940335), T6s, T6l);
Chris@42 1410 STM4(&(ro[3]), Tgl, ovs, &(ro[1]));
Chris@42 1411 Tgm = VFNMS(LDK(KP956940335), T6s, T6l);
Chris@42 1412 STM4(&(ro[35]), Tgm, ovs, &(ro[1]));
Chris@42 1413 Tgn = VFMA(LDK(KP881921264), T6i, T6h);
Chris@42 1414 STM4(&(ro[59]), Tgn, ovs, &(ro[1]));
Chris@42 1415 Tgo = VFNMS(LDK(KP881921264), T6i, T6h);
Chris@42 1416 STM4(&(ro[27]), Tgo, ovs, &(ro[1]));
Chris@42 1417 Tgp = VFMA(LDK(KP881921264), T6g, T6d);
Chris@42 1418 STM4(&(io[11]), Tgp, ovs, &(io[1]));
Chris@42 1419 Tgq = VFNMS(LDK(KP881921264), T6g, T6d);
Chris@42 1420 STM4(&(io[43]), Tgq, ovs, &(io[1]));
Chris@42 1421 Tgr = VFMA(LDK(KP881921264), T6c, T6b);
Chris@42 1422 STM4(&(io[59]), Tgr, ovs, &(io[1]));
Chris@42 1423 Tgs = VFNMS(LDK(KP881921264), T6c, T6b);
Chris@42 1424 STM4(&(io[27]), Tgs, ovs, &(io[1]));
Chris@42 1425 Tgt = VFMA(LDK(KP881921264), T5W, T49);
Chris@42 1426 STM4(&(ro[11]), Tgt, ovs, &(ro[1]));
Chris@42 1427 Tgu = VFNMS(LDK(KP881921264), T5W, T49);
Chris@42 1428 STM4(&(ro[43]), Tgu, ovs, &(ro[1]));
Chris@42 1429 Tgv = VFNMS(LDK(KP956940335), T6A, T6x);
Chris@42 1430 STM4(&(ro[51]), Tgv, ovs, &(ro[1]));
Chris@42 1431 Tgw = VFMA(LDK(KP956940335), T6A, T6x);
Chris@42 1432 STM4(&(ro[19]), Tgw, ovs, &(ro[1]));
Chris@42 1433 }
Chris@42 1434 }
Chris@42 1435 }
Chris@42 1436 }
Chris@42 1437 {
Chris@42 1438 V T8j, T8c, T8C, T8v, T8N, T8M, T8X, T7L, T9c, T92, T9d, T95, T98, T80;
Chris@42 1439 {
Chris@42 1440 V T90, T91, T93, T94, T7S, T7Z;
Chris@42 1441 T8j = VFNMS(LDK(KP923879532), T8i, T8f);
Chris@42 1442 T90 = VFMA(LDK(KP923879532), T8i, T8f);
Chris@42 1443 T91 = VFMA(LDK(KP923879532), T8b, T84);
Chris@42 1444 T8c = VFNMS(LDK(KP923879532), T8b, T84);
Chris@42 1445 T8C = VFNMS(LDK(KP923879532), T8B, T8y);
Chris@42 1446 T93 = VFMA(LDK(KP923879532), T8B, T8y);
Chris@42 1447 T94 = VFMA(LDK(KP923879532), T8u, T8n);
Chris@42 1448 T8v = VFNMS(LDK(KP923879532), T8u, T8n);
Chris@42 1449 T8N = VFMA(LDK(KP198912367), T7O, T7R);
Chris@42 1450 T7S = VFNMS(LDK(KP198912367), T7R, T7O);
Chris@42 1451 T7Z = VFMA(LDK(KP198912367), T7Y, T7V);
Chris@42 1452 T8M = VFNMS(LDK(KP198912367), T7V, T7Y);
Chris@42 1453 T8X = VFMA(LDK(KP923879532), T7K, T7D);
Chris@42 1454 T7L = VFNMS(LDK(KP923879532), T7K, T7D);
Chris@42 1455 T9c = VFNMS(LDK(KP098491403), T90, T91);
Chris@42 1456 T92 = VFMA(LDK(KP098491403), T91, T90);
Chris@42 1457 T9d = VFMA(LDK(KP098491403), T93, T94);
Chris@42 1458 T95 = VFNMS(LDK(KP098491403), T94, T93);
Chris@42 1459 T98 = VADD(T7S, T7Z);
Chris@42 1460 T80 = VSUB(T7S, T7Z);
Chris@42 1461 }
Chris@42 1462 {
Chris@42 1463 V T8V, T81, T8T, T8k, T97, T8L, T8Y, T8O, T8S, T8D;
Chris@42 1464 T8V = VFNMS(LDK(KP980785280), T80, T7L);
Chris@42 1465 T81 = VFMA(LDK(KP980785280), T80, T7L);
Chris@42 1466 T8T = VFNMS(LDK(KP820678790), T8c, T8j);
Chris@42 1467 T8k = VFMA(LDK(KP820678790), T8j, T8c);
Chris@42 1468 T97 = VFMA(LDK(KP923879532), T8K, T8H);
Chris@42 1469 T8L = VFNMS(LDK(KP923879532), T8K, T8H);
Chris@42 1470 T8Y = VADD(T8N, T8M);
Chris@42 1471 T8O = VSUB(T8M, T8N);
Chris@42 1472 T8S = VFMA(LDK(KP820678790), T8v, T8C);
Chris@42 1473 T8D = VFNMS(LDK(KP820678790), T8C, T8v);
Chris@42 1474 {
Chris@42 1475 V T8R, T8P, T8U, T8W, T8E, T8Q;
Chris@42 1476 {
Chris@42 1477 V T96, T9f, T9g, T8Z;
Chris@42 1478 T9a = VSUB(T95, T92);
Chris@42 1479 T96 = VADD(T92, T95);
Chris@42 1480 T9f = VFMA(LDK(KP980785280), T98, T97);
Chris@42 1481 T99 = VFNMS(LDK(KP980785280), T98, T97);
Chris@42 1482 T9e = VSUB(T9c, T9d);
Chris@42 1483 T9g = VADD(T9c, T9d);
Chris@42 1484 T8Z = VFMA(LDK(KP980785280), T8Y, T8X);
Chris@42 1485 T9b = VFNMS(LDK(KP980785280), T8Y, T8X);
Chris@42 1486 T8R = VFMA(LDK(KP980785280), T8O, T8L);
Chris@42 1487 T8P = VFNMS(LDK(KP980785280), T8O, T8L);
Chris@42 1488 T8U = VSUB(T8S, T8T);
Chris@42 1489 T8W = VADD(T8T, T8S);
Chris@42 1490 T8E = VSUB(T8k, T8D);
Chris@42 1491 T8Q = VADD(T8k, T8D);
Chris@42 1492 {
Chris@42 1493 V Tgx, Tgy, Tgz, TgA;
Chris@42 1494 Tgx = VFNMS(LDK(KP995184726), T9g, T9f);
Chris@42 1495 STM4(&(io[33]), Tgx, ovs, &(io[1]));
Chris@42 1496 STN4(&(io[32]), TeO, Tgx, TfG, Tgi, ovs);
Chris@42 1497 Tgy = VFMA(LDK(KP995184726), T96, T8Z);
Chris@42 1498 STM4(&(ro[1]), Tgy, ovs, &(ro[1]));
Chris@42 1499 STN4(&(ro[0]), TeL, Tgy, Tfv, Tgl, ovs);
Chris@42 1500 Tgz = VFNMS(LDK(KP995184726), T96, T8Z);
Chris@42 1501 STM4(&(ro[33]), Tgz, ovs, &(ro[1]));
Chris@42 1502 STN4(&(ro[32]), TeM, Tgz, Tfw, Tgm, ovs);
Chris@42 1503 TgA = VFMA(LDK(KP995184726), T9g, T9f);
Chris@42 1504 STM4(&(io[1]), TgA, ovs, &(io[1]));
Chris@42 1505 STN4(&(io[0]), TeN, TgA, TfF, Tgh, ovs);
Chris@42 1506 }
Chris@42 1507 }
Chris@42 1508 {
Chris@42 1509 V TgB, TgC, TgD, TgE;
Chris@42 1510 TgB = VFMA(LDK(KP773010453), T8W, T8V);
Chris@42 1511 STM4(&(ro[57]), TgB, ovs, &(ro[1]));
Chris@42 1512 STN4(&(ro[56]), TeS, TgB, Tfx, Tgn, ovs);
Chris@42 1513 TgC = VFNMS(LDK(KP773010453), T8W, T8V);
Chris@42 1514 STM4(&(ro[25]), TgC, ovs, &(ro[1]));
Chris@42 1515 STN4(&(ro[24]), TeR, TgC, Tfy, Tgo, ovs);
Chris@42 1516 TgD = VFMA(LDK(KP773010453), T8U, T8R);
Chris@42 1517 STM4(&(io[9]), TgD, ovs, &(io[1]));
Chris@42 1518 STN4(&(io[8]), TeT, TgD, Tfz, Tgp, ovs);
Chris@42 1519 TgE = VFNMS(LDK(KP773010453), T8U, T8R);
Chris@42 1520 STM4(&(io[41]), TgE, ovs, &(io[1]));
Chris@42 1521 STN4(&(io[40]), TeU, TgE, TfA, Tgq, ovs);
Chris@42 1522 {
Chris@42 1523 V TgF, TgG, TgH, TgI;
Chris@42 1524 TgF = VFMA(LDK(KP773010453), T8Q, T8P);
Chris@42 1525 STM4(&(io[57]), TgF, ovs, &(io[1]));
Chris@42 1526 STN4(&(io[56]), TeW, TgF, TfB, Tgr, ovs);
Chris@42 1527 TgG = VFNMS(LDK(KP773010453), T8Q, T8P);
Chris@42 1528 STM4(&(io[25]), TgG, ovs, &(io[1]));
Chris@42 1529 STN4(&(io[24]), TeV, TgG, TfC, Tgs, ovs);
Chris@42 1530 TgH = VFMA(LDK(KP773010453), T8E, T81);
Chris@42 1531 STM4(&(ro[9]), TgH, ovs, &(ro[1]));
Chris@42 1532 STN4(&(ro[8]), TeX, TgH, TfD, Tgt, ovs);
Chris@42 1533 TgI = VFNMS(LDK(KP773010453), T8E, T81);
Chris@42 1534 STM4(&(ro[41]), TgI, ovs, &(ro[1]));
Chris@42 1535 STN4(&(ro[40]), TeY, TgI, TfE, Tgu, ovs);
Chris@42 1536 }
Chris@42 1537 }
Chris@42 1538 }
Chris@42 1539 }
Chris@42 1540 }
Chris@42 1541 }
Chris@42 1542 }
Chris@42 1543 }
Chris@42 1544 }
Chris@42 1545 }
Chris@42 1546 {
Chris@42 1547 V TgJ, TgK, TgL, TgM;
Chris@42 1548 TgJ = VFMA(LDK(KP995184726), T9a, T99);
Chris@42 1549 STM4(&(io[17]), TgJ, ovs, &(io[1]));
Chris@42 1550 STN4(&(io[16]), TeQ, TgJ, TfI, Tgj, ovs);
Chris@42 1551 TgK = VFNMS(LDK(KP995184726), T9a, T99);
Chris@42 1552 STM4(&(io[49]), TgK, ovs, &(io[1]));
Chris@42 1553 STN4(&(io[48]), TeP, TgK, TfH, Tgk, ovs);
Chris@42 1554 TgL = VFMA(LDK(KP995184726), T9e, T9b);
Chris@42 1555 STM4(&(ro[17]), TgL, ovs, &(ro[1]));
Chris@42 1556 STN4(&(ro[16]), TeK, TgL, TfJ, Tgw, ovs);
Chris@42 1557 TgM = VFNMS(LDK(KP995184726), T9e, T9b);
Chris@42 1558 STM4(&(ro[49]), TgM, ovs, &(ro[1]));
Chris@42 1559 STN4(&(ro[48]), TeJ, TgM, TfK, Tgv, ovs);
Chris@42 1560 }
Chris@42 1561 }
Chris@42 1562 }
Chris@42 1563 VLEAVE();
Chris@42 1564 }
Chris@42 1565
Chris@42 1566 static const kdft_desc desc = { 64, XSIMD_STRING("n2sv_64"), {520, 0, 392, 0}, &GENUS, 0, 1, 0, 0 };
Chris@42 1567
Chris@42 1568 void XSIMD(codelet_n2sv_64) (planner *p) {
Chris@42 1569 X(kdft_register) (p, n2sv_64, &desc);
Chris@42 1570 }
Chris@42 1571
Chris@42 1572 #else /* HAVE_FMA */
Chris@42 1573
Chris@42 1574 /* Generated by: ../../../genfft/gen_notw.native -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2sv_64 -with-ostride 1 -include n2s.h -store-multiple 4 */
Chris@42 1575
Chris@42 1576 /*
Chris@42 1577 * This function contains 912 FP additions, 248 FP multiplications,
Chris@42 1578 * (or, 808 additions, 144 multiplications, 104 fused multiply/add),
Chris@42 1579 * 260 stack variables, 15 constants, and 288 memory accesses
Chris@42 1580 */
Chris@42 1581 #include "n2s.h"
Chris@42 1582
Chris@42 1583 static void n2sv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@42 1584 {
Chris@42 1585 DVK(KP773010453, +0.773010453362736960810906609758469800971041293);
Chris@42 1586 DVK(KP634393284, +0.634393284163645498215171613225493370675687095);
Chris@42 1587 DVK(KP098017140, +0.098017140329560601994195563888641845861136673);
Chris@42 1588 DVK(KP995184726, +0.995184726672196886244836953109479921575474869);
Chris@42 1589 DVK(KP881921264, +0.881921264348355029712756863660388349508442621);
Chris@42 1590 DVK(KP471396736, +0.471396736825997648556387625905254377657460319);
Chris@42 1591 DVK(KP290284677, +0.290284677254462367636192375817395274691476278);
Chris@42 1592 DVK(KP956940335, +0.956940335732208864935797886980269969482849206);
Chris@42 1593 DVK(KP831469612, +0.831469612302545237078788377617905756738560812);
Chris@42 1594 DVK(KP555570233, +0.555570233019602224742830813948532874374937191);
Chris@42 1595 DVK(KP195090322, +0.195090322016128267848284868477022240927691618);
Chris@42 1596 DVK(KP980785280, +0.980785280403230449126182236134239036973933731);
Chris@42 1597 DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
Chris@42 1598 DVK(KP382683432, +0.382683432365089771728459984030398866761344562);
Chris@42 1599 DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
Chris@42 1600 {
Chris@42 1601 INT i;
Chris@42 1602 for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) {
Chris@42 1603 V T37, T7B, T8F, T5Z, Tf, Td9, TbB, TcB, T62, T7C, T2i, TdH, Tah, Tcb, T3e;
Chris@42 1604 V T8G, Tu, TdI, Tak, TbD, Tan, TbC, T2x, Tda, T3m, T65, T7G, T8J, T7J, T8I;
Chris@42 1605 V T3t, T64, TK, Tdd, Tas, Tce, Tav, Tcf, T2N, Tdc, T3G, T6G, T7O, T9k, T7R;
Chris@42 1606 V T9l, T3N, T6H, T1L, Tdv, Tbs, Tcw, TdC, Teo, T5j, T6V, T5Q, T6Y, T8y, T9C;
Chris@42 1607 V Tbb, Tct, T8n, T9z, TZ, Tdf, Taz, Tch, TaC, Tci, T32, Tdg, T3Z, T6J, T7V;
Chris@42 1608 V T9n, T7Y, T9o, T46, T6K, T1g, Tdp, Tb1, Tcm, Tdm, Tej, T4q, T6R, T4X, T6O;
Chris@42 1609 V T8f, T9s, TaK, Tcp, T84, T9v, T1v, Tdn, Tb4, Tcq, Tds, Tek, T4N, T6P, T50;
Chris@42 1610 V T6S, T8i, T9w, TaV, Tcn, T8b, T9t, T20, TdD, Tbv, Tcu, Tdy, Tep, T5G, T6Z;
Chris@42 1611 V T5T, T6W, T8B, T9A, Tbm, Tcx, T8u, T9D;
Chris@42 1612 {
Chris@42 1613 V T3, T35, T26, T5Y, T6, T5X, T29, T36, Ta, T39, T2d, T38, Td, T3b, T2g;
Chris@42 1614 V T3c;
Chris@42 1615 {
Chris@42 1616 V T1, T2, T24, T25;
Chris@42 1617 T1 = LD(&(ri[0]), ivs, &(ri[0]));
Chris@42 1618 T2 = LD(&(ri[WS(is, 32)]), ivs, &(ri[0]));
Chris@42 1619 T3 = VADD(T1, T2);
Chris@42 1620 T35 = VSUB(T1, T2);
Chris@42 1621 T24 = LD(&(ii[0]), ivs, &(ii[0]));
Chris@42 1622 T25 = LD(&(ii[WS(is, 32)]), ivs, &(ii[0]));
Chris@42 1623 T26 = VADD(T24, T25);
Chris@42 1624 T5Y = VSUB(T24, T25);
Chris@42 1625 }
Chris@42 1626 {
Chris@42 1627 V T4, T5, T27, T28;
Chris@42 1628 T4 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0]));
Chris@42 1629 T5 = LD(&(ri[WS(is, 48)]), ivs, &(ri[0]));
Chris@42 1630 T6 = VADD(T4, T5);
Chris@42 1631 T5X = VSUB(T4, T5);
Chris@42 1632 T27 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0]));
Chris@42 1633 T28 = LD(&(ii[WS(is, 48)]), ivs, &(ii[0]));
Chris@42 1634 T29 = VADD(T27, T28);
Chris@42 1635 T36 = VSUB(T27, T28);
Chris@42 1636 }
Chris@42 1637 {
Chris@42 1638 V T8, T9, T2b, T2c;
Chris@42 1639 T8 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0]));
Chris@42 1640 T9 = LD(&(ri[WS(is, 40)]), ivs, &(ri[0]));
Chris@42 1641 Ta = VADD(T8, T9);
Chris@42 1642 T39 = VSUB(T8, T9);
Chris@42 1643 T2b = LD(&(ii[WS(is, 8)]), ivs, &(ii[0]));
Chris@42 1644 T2c = LD(&(ii[WS(is, 40)]), ivs, &(ii[0]));
Chris@42 1645 T2d = VADD(T2b, T2c);
Chris@42 1646 T38 = VSUB(T2b, T2c);
Chris@42 1647 }
Chris@42 1648 {
Chris@42 1649 V Tb, Tc, T2e, T2f;
Chris@42 1650 Tb = LD(&(ri[WS(is, 56)]), ivs, &(ri[0]));
Chris@42 1651 Tc = LD(&(ri[WS(is, 24)]), ivs, &(ri[0]));
Chris@42 1652 Td = VADD(Tb, Tc);
Chris@42 1653 T3b = VSUB(Tb, Tc);
Chris@42 1654 T2e = LD(&(ii[WS(is, 56)]), ivs, &(ii[0]));
Chris@42 1655 T2f = LD(&(ii[WS(is, 24)]), ivs, &(ii[0]));
Chris@42 1656 T2g = VADD(T2e, T2f);
Chris@42 1657 T3c = VSUB(T2e, T2f);
Chris@42 1658 }
Chris@42 1659 {
Chris@42 1660 V T7, Te, T2a, T2h;
Chris@42 1661 T37 = VSUB(T35, T36);
Chris@42 1662 T7B = VADD(T35, T36);
Chris@42 1663 T8F = VSUB(T5Y, T5X);
Chris@42 1664 T5Z = VADD(T5X, T5Y);
Chris@42 1665 T7 = VADD(T3, T6);
Chris@42 1666 Te = VADD(Ta, Td);
Chris@42 1667 Tf = VADD(T7, Te);
Chris@42 1668 Td9 = VSUB(T7, Te);
Chris@42 1669 {
Chris@42 1670 V Tbz, TbA, T60, T61;
Chris@42 1671 Tbz = VSUB(T26, T29);
Chris@42 1672 TbA = VSUB(Td, Ta);
Chris@42 1673 TbB = VSUB(Tbz, TbA);
Chris@42 1674 TcB = VADD(TbA, Tbz);
Chris@42 1675 T60 = VSUB(T3b, T3c);
Chris@42 1676 T61 = VADD(T39, T38);
Chris@42 1677 T62 = VMUL(LDK(KP707106781), VSUB(T60, T61));
Chris@42 1678 T7C = VMUL(LDK(KP707106781), VADD(T61, T60));
Chris@42 1679 }
Chris@42 1680 T2a = VADD(T26, T29);
Chris@42 1681 T2h = VADD(T2d, T2g);
Chris@42 1682 T2i = VADD(T2a, T2h);
Chris@42 1683 TdH = VSUB(T2a, T2h);
Chris@42 1684 {
Chris@42 1685 V Taf, Tag, T3a, T3d;
Chris@42 1686 Taf = VSUB(T3, T6);
Chris@42 1687 Tag = VSUB(T2d, T2g);
Chris@42 1688 Tah = VSUB(Taf, Tag);
Chris@42 1689 Tcb = VADD(Taf, Tag);
Chris@42 1690 T3a = VSUB(T38, T39);
Chris@42 1691 T3d = VADD(T3b, T3c);
Chris@42 1692 T3e = VMUL(LDK(KP707106781), VSUB(T3a, T3d));
Chris@42 1693 T8G = VMUL(LDK(KP707106781), VADD(T3a, T3d));
Chris@42 1694 }
Chris@42 1695 }
Chris@42 1696 }
Chris@42 1697 {
Chris@42 1698 V Ti, T3j, T2l, T3h, Tl, T3g, T2o, T3k, Tp, T3q, T2s, T3o, Ts, T3n, T2v;
Chris@42 1699 V T3r;
Chris@42 1700 {
Chris@42 1701 V Tg, Th, T2j, T2k;
Chris@42 1702 Tg = LD(&(ri[WS(is, 4)]), ivs, &(ri[0]));
Chris@42 1703 Th = LD(&(ri[WS(is, 36)]), ivs, &(ri[0]));
Chris@42 1704 Ti = VADD(Tg, Th);
Chris@42 1705 T3j = VSUB(Tg, Th);
Chris@42 1706 T2j = LD(&(ii[WS(is, 4)]), ivs, &(ii[0]));
Chris@42 1707 T2k = LD(&(ii[WS(is, 36)]), ivs, &(ii[0]));
Chris@42 1708 T2l = VADD(T2j, T2k);
Chris@42 1709 T3h = VSUB(T2j, T2k);
Chris@42 1710 }
Chris@42 1711 {
Chris@42 1712 V Tj, Tk, T2m, T2n;
Chris@42 1713 Tj = LD(&(ri[WS(is, 20)]), ivs, &(ri[0]));
Chris@42 1714 Tk = LD(&(ri[WS(is, 52)]), ivs, &(ri[0]));
Chris@42 1715 Tl = VADD(Tj, Tk);
Chris@42 1716 T3g = VSUB(Tj, Tk);
Chris@42 1717 T2m = LD(&(ii[WS(is, 20)]), ivs, &(ii[0]));
Chris@42 1718 T2n = LD(&(ii[WS(is, 52)]), ivs, &(ii[0]));
Chris@42 1719 T2o = VADD(T2m, T2n);
Chris@42 1720 T3k = VSUB(T2m, T2n);
Chris@42 1721 }
Chris@42 1722 {
Chris@42 1723 V Tn, To, T2q, T2r;
Chris@42 1724 Tn = LD(&(ri[WS(is, 60)]), ivs, &(ri[0]));
Chris@42 1725 To = LD(&(ri[WS(is, 28)]), ivs, &(ri[0]));
Chris@42 1726 Tp = VADD(Tn, To);
Chris@42 1727 T3q = VSUB(Tn, To);
Chris@42 1728 T2q = LD(&(ii[WS(is, 60)]), ivs, &(ii[0]));
Chris@42 1729 T2r = LD(&(ii[WS(is, 28)]), ivs, &(ii[0]));
Chris@42 1730 T2s = VADD(T2q, T2r);
Chris@42 1731 T3o = VSUB(T2q, T2r);
Chris@42 1732 }
Chris@42 1733 {
Chris@42 1734 V Tq, Tr, T2t, T2u;
Chris@42 1735 Tq = LD(&(ri[WS(is, 12)]), ivs, &(ri[0]));
Chris@42 1736 Tr = LD(&(ri[WS(is, 44)]), ivs, &(ri[0]));
Chris@42 1737 Ts = VADD(Tq, Tr);
Chris@42 1738 T3n = VSUB(Tq, Tr);
Chris@42 1739 T2t = LD(&(ii[WS(is, 12)]), ivs, &(ii[0]));
Chris@42 1740 T2u = LD(&(ii[WS(is, 44)]), ivs, &(ii[0]));
Chris@42 1741 T2v = VADD(T2t, T2u);
Chris@42 1742 T3r = VSUB(T2t, T2u);
Chris@42 1743 }
Chris@42 1744 {
Chris@42 1745 V Tm, Tt, Tai, Taj;
Chris@42 1746 Tm = VADD(Ti, Tl);
Chris@42 1747 Tt = VADD(Tp, Ts);
Chris@42 1748 Tu = VADD(Tm, Tt);
Chris@42 1749 TdI = VSUB(Tt, Tm);
Chris@42 1750 Tai = VSUB(T2l, T2o);
Chris@42 1751 Taj = VSUB(Ti, Tl);
Chris@42 1752 Tak = VSUB(Tai, Taj);
Chris@42 1753 TbD = VADD(Taj, Tai);
Chris@42 1754 }
Chris@42 1755 {
Chris@42 1756 V Tal, Tam, T2p, T2w;
Chris@42 1757 Tal = VSUB(Tp, Ts);
Chris@42 1758 Tam = VSUB(T2s, T2v);
Chris@42 1759 Tan = VADD(Tal, Tam);
Chris@42 1760 TbC = VSUB(Tal, Tam);
Chris@42 1761 T2p = VADD(T2l, T2o);
Chris@42 1762 T2w = VADD(T2s, T2v);
Chris@42 1763 T2x = VADD(T2p, T2w);
Chris@42 1764 Tda = VSUB(T2p, T2w);
Chris@42 1765 }
Chris@42 1766 {
Chris@42 1767 V T3i, T3l, T7E, T7F;
Chris@42 1768 T3i = VADD(T3g, T3h);
Chris@42 1769 T3l = VSUB(T3j, T3k);
Chris@42 1770 T3m = VFNMS(LDK(KP923879532), T3l, VMUL(LDK(KP382683432), T3i));
Chris@42 1771 T65 = VFMA(LDK(KP923879532), T3i, VMUL(LDK(KP382683432), T3l));
Chris@42 1772 T7E = VSUB(T3h, T3g);
Chris@42 1773 T7F = VADD(T3j, T3k);
Chris@42 1774 T7G = VFNMS(LDK(KP382683432), T7F, VMUL(LDK(KP923879532), T7E));
Chris@42 1775 T8J = VFMA(LDK(KP382683432), T7E, VMUL(LDK(KP923879532), T7F));
Chris@42 1776 }
Chris@42 1777 {
Chris@42 1778 V T7H, T7I, T3p, T3s;
Chris@42 1779 T7H = VSUB(T3o, T3n);
Chris@42 1780 T7I = VADD(T3q, T3r);
Chris@42 1781 T7J = VFMA(LDK(KP923879532), T7H, VMUL(LDK(KP382683432), T7I));
Chris@42 1782 T8I = VFNMS(LDK(KP382683432), T7H, VMUL(LDK(KP923879532), T7I));
Chris@42 1783 T3p = VADD(T3n, T3o);
Chris@42 1784 T3s = VSUB(T3q, T3r);
Chris@42 1785 T3t = VFMA(LDK(KP382683432), T3p, VMUL(LDK(KP923879532), T3s));
Chris@42 1786 T64 = VFNMS(LDK(KP923879532), T3p, VMUL(LDK(KP382683432), T3s));
Chris@42 1787 }
Chris@42 1788 }
Chris@42 1789 {
Chris@42 1790 V Ty, T3H, T2B, T3x, TB, T3w, T2E, T3I, TI, T3L, T2L, T3B, TF, T3K, T2I;
Chris@42 1791 V T3E;
Chris@42 1792 {
Chris@42 1793 V Tw, Tx, T2C, T2D;
Chris@42 1794 Tw = LD(&(ri[WS(is, 2)]), ivs, &(ri[0]));
Chris@42 1795 Tx = LD(&(ri[WS(is, 34)]), ivs, &(ri[0]));
Chris@42 1796 Ty = VADD(Tw, Tx);
Chris@42 1797 T3H = VSUB(Tw, Tx);
Chris@42 1798 {
Chris@42 1799 V T2z, T2A, Tz, TA;
Chris@42 1800 T2z = LD(&(ii[WS(is, 2)]), ivs, &(ii[0]));
Chris@42 1801 T2A = LD(&(ii[WS(is, 34)]), ivs, &(ii[0]));
Chris@42 1802 T2B = VADD(T2z, T2A);
Chris@42 1803 T3x = VSUB(T2z, T2A);
Chris@42 1804 Tz = LD(&(ri[WS(is, 18)]), ivs, &(ri[0]));
Chris@42 1805 TA = LD(&(ri[WS(is, 50)]), ivs, &(ri[0]));
Chris@42 1806 TB = VADD(Tz, TA);
Chris@42 1807 T3w = VSUB(Tz, TA);
Chris@42 1808 }
Chris@42 1809 T2C = LD(&(ii[WS(is, 18)]), ivs, &(ii[0]));
Chris@42 1810 T2D = LD(&(ii[WS(is, 50)]), ivs, &(ii[0]));
Chris@42 1811 T2E = VADD(T2C, T2D);
Chris@42 1812 T3I = VSUB(T2C, T2D);
Chris@42 1813 {
Chris@42 1814 V TG, TH, T3z, T2J, T2K, T3A;
Chris@42 1815 TG = LD(&(ri[WS(is, 58)]), ivs, &(ri[0]));
Chris@42 1816 TH = LD(&(ri[WS(is, 26)]), ivs, &(ri[0]));
Chris@42 1817 T3z = VSUB(TG, TH);
Chris@42 1818 T2J = LD(&(ii[WS(is, 58)]), ivs, &(ii[0]));
Chris@42 1819 T2K = LD(&(ii[WS(is, 26)]), ivs, &(ii[0]));
Chris@42 1820 T3A = VSUB(T2J, T2K);
Chris@42 1821 TI = VADD(TG, TH);
Chris@42 1822 T3L = VADD(T3z, T3A);
Chris@42 1823 T2L = VADD(T2J, T2K);
Chris@42 1824 T3B = VSUB(T3z, T3A);
Chris@42 1825 }
Chris@42 1826 {
Chris@42 1827 V TD, TE, T3C, T2G, T2H, T3D;
Chris@42 1828 TD = LD(&(ri[WS(is, 10)]), ivs, &(ri[0]));
Chris@42 1829 TE = LD(&(ri[WS(is, 42)]), ivs, &(ri[0]));
Chris@42 1830 T3C = VSUB(TD, TE);
Chris@42 1831 T2G = LD(&(ii[WS(is, 10)]), ivs, &(ii[0]));
Chris@42 1832 T2H = LD(&(ii[WS(is, 42)]), ivs, &(ii[0]));
Chris@42 1833 T3D = VSUB(T2G, T2H);
Chris@42 1834 TF = VADD(TD, TE);
Chris@42 1835 T3K = VSUB(T3D, T3C);
Chris@42 1836 T2I = VADD(T2G, T2H);
Chris@42 1837 T3E = VADD(T3C, T3D);
Chris@42 1838 }
Chris@42 1839 }
Chris@42 1840 {
Chris@42 1841 V TC, TJ, Taq, Tar;
Chris@42 1842 TC = VADD(Ty, TB);
Chris@42 1843 TJ = VADD(TF, TI);
Chris@42 1844 TK = VADD(TC, TJ);
Chris@42 1845 Tdd = VSUB(TC, TJ);
Chris@42 1846 Taq = VSUB(T2B, T2E);
Chris@42 1847 Tar = VSUB(TI, TF);
Chris@42 1848 Tas = VSUB(Taq, Tar);
Chris@42 1849 Tce = VADD(Tar, Taq);
Chris@42 1850 }
Chris@42 1851 {
Chris@42 1852 V Tat, Tau, T2F, T2M;
Chris@42 1853 Tat = VSUB(Ty, TB);
Chris@42 1854 Tau = VSUB(T2I, T2L);
Chris@42 1855 Tav = VSUB(Tat, Tau);
Chris@42 1856 Tcf = VADD(Tat, Tau);
Chris@42 1857 T2F = VADD(T2B, T2E);
Chris@42 1858 T2M = VADD(T2I, T2L);
Chris@42 1859 T2N = VADD(T2F, T2M);
Chris@42 1860 Tdc = VSUB(T2F, T2M);
Chris@42 1861 }
Chris@42 1862 {
Chris@42 1863 V T3y, T3F, T7M, T7N;
Chris@42 1864 T3y = VADD(T3w, T3x);
Chris@42 1865 T3F = VMUL(LDK(KP707106781), VSUB(T3B, T3E));
Chris@42 1866 T3G = VSUB(T3y, T3F);
Chris@42 1867 T6G = VADD(T3y, T3F);
Chris@42 1868 T7M = VSUB(T3x, T3w);
Chris@42 1869 T7N = VMUL(LDK(KP707106781), VADD(T3K, T3L));
Chris@42 1870 T7O = VSUB(T7M, T7N);
Chris@42 1871 T9k = VADD(T7M, T7N);
Chris@42 1872 }
Chris@42 1873 {
Chris@42 1874 V T7P, T7Q, T3J, T3M;
Chris@42 1875 T7P = VADD(T3H, T3I);
Chris@42 1876 T7Q = VMUL(LDK(KP707106781), VADD(T3E, T3B));
Chris@42 1877 T7R = VSUB(T7P, T7Q);
Chris@42 1878 T9l = VADD(T7P, T7Q);
Chris@42 1879 T3J = VSUB(T3H, T3I);
Chris@42 1880 T3M = VMUL(LDK(KP707106781), VSUB(T3K, T3L));
Chris@42 1881 T3N = VSUB(T3J, T3M);
Chris@42 1882 T6H = VADD(T3J, T3M);
Chris@42 1883 }
Chris@42 1884 }
Chris@42 1885 {
Chris@42 1886 V T1z, T53, T5L, Tbo, T1C, T5I, T56, Tbp, T1J, Tb9, T5h, T5N, T1G, Tb8, T5c;
Chris@42 1887 V T5O;
Chris@42 1888 {
Chris@42 1889 V T1x, T1y, T54, T55;
Chris@42 1890 T1x = LD(&(ri[WS(is, 63)]), ivs, &(ri[WS(is, 1)]));
Chris@42 1891 T1y = LD(&(ri[WS(is, 31)]), ivs, &(ri[WS(is, 1)]));
Chris@42 1892 T1z = VADD(T1x, T1y);
Chris@42 1893 T53 = VSUB(T1x, T1y);
Chris@42 1894 {
Chris@42 1895 V T5J, T5K, T1A, T1B;
Chris@42 1896 T5J = LD(&(ii[WS(is, 63)]), ivs, &(ii[WS(is, 1)]));
Chris@42 1897 T5K = LD(&(ii[WS(is, 31)]), ivs, &(ii[WS(is, 1)]));
Chris@42 1898 T5L = VSUB(T5J, T5K);
Chris@42 1899 Tbo = VADD(T5J, T5K);
Chris@42 1900 T1A = LD(&(ri[WS(is, 15)]), ivs, &(ri[WS(is, 1)]));
Chris@42 1901 T1B = LD(&(ri[WS(is, 47)]), ivs, &(ri[WS(is, 1)]));
Chris@42 1902 T1C = VADD(T1A, T1B);
Chris@42 1903 T5I = VSUB(T1A, T1B);
Chris@42 1904 }
Chris@42 1905 T54 = LD(&(ii[WS(is, 15)]), ivs, &(ii[WS(is, 1)]));
Chris@42 1906 T55 = LD(&(ii[WS(is, 47)]), ivs, &(ii[WS(is, 1)]));
Chris@42 1907 T56 = VSUB(T54, T55);
Chris@42 1908 Tbp = VADD(T54, T55);
Chris@42 1909 {
Chris@42 1910 V T1H, T1I, T5d, T5e, T5f, T5g;
Chris@42 1911 T1H = LD(&(ri[WS(is, 55)]), ivs, &(ri[WS(is, 1)]));
Chris@42 1912 T1I = LD(&(ri[WS(is, 23)]), ivs, &(ri[WS(is, 1)]));
Chris@42 1913 T5d = VSUB(T1H, T1I);
Chris@42 1914 T5e = LD(&(ii[WS(is, 55)]), ivs, &(ii[WS(is, 1)]));
Chris@42 1915 T5f = LD(&(ii[WS(is, 23)]), ivs, &(ii[WS(is, 1)]));
Chris@42 1916 T5g = VSUB(T5e, T5f);
Chris@42 1917 T1J = VADD(T1H, T1I);
Chris@42 1918 Tb9 = VADD(T5e, T5f);
Chris@42 1919 T5h = VADD(T5d, T5g);
Chris@42 1920 T5N = VSUB(T5d, T5g);
Chris@42 1921 }
Chris@42 1922 {
Chris@42 1923 V T1E, T1F, T5b, T58, T59, T5a;
Chris@42 1924 T1E = LD(&(ri[WS(is, 7)]), ivs, &(ri[WS(is, 1)]));
Chris@42 1925 T1F = LD(&(ri[WS(is, 39)]), ivs, &(ri[WS(is, 1)]));
Chris@42 1926 T5b = VSUB(T1E, T1F);
Chris@42 1927 T58 = LD(&(ii[WS(is, 7)]), ivs, &(ii[WS(is, 1)]));
Chris@42 1928 T59 = LD(&(ii[WS(is, 39)]), ivs, &(ii[WS(is, 1)]));
Chris@42 1929 T5a = VSUB(T58, T59);
Chris@42 1930 T1G = VADD(T1E, T1F);
Chris@42 1931 Tb8 = VADD(T58, T59);
Chris@42 1932 T5c = VSUB(T5a, T5b);
Chris@42 1933 T5O = VADD(T5b, T5a);
Chris@42 1934 }
Chris@42 1935 }
Chris@42 1936 {
Chris@42 1937 V T1D, T1K, Tbq, Tbr;
Chris@42 1938 T1D = VADD(T1z, T1C);
Chris@42 1939 T1K = VADD(T1G, T1J);
Chris@42 1940 T1L = VADD(T1D, T1K);
Chris@42 1941 Tdv = VSUB(T1D, T1K);
Chris@42 1942 Tbq = VSUB(Tbo, Tbp);
Chris@42 1943 Tbr = VSUB(T1J, T1G);
Chris@42 1944 Tbs = VSUB(Tbq, Tbr);
Chris@42 1945 Tcw = VADD(Tbr, Tbq);
Chris@42 1946 }
Chris@42 1947 {
Chris@42 1948 V TdA, TdB, T57, T5i;
Chris@42 1949 TdA = VADD(Tbo, Tbp);
Chris@42 1950 TdB = VADD(Tb8, Tb9);
Chris@42 1951 TdC = VSUB(TdA, TdB);
Chris@42 1952 Teo = VADD(TdA, TdB);
Chris@42 1953 T57 = VSUB(T53, T56);
Chris@42 1954 T5i = VMUL(LDK(KP707106781), VSUB(T5c, T5h));
Chris@42 1955 T5j = VSUB(T57, T5i);
Chris@42 1956 T6V = VADD(T57, T5i);
Chris@42 1957 }
Chris@42 1958 {
Chris@42 1959 V T5M, T5P, T8w, T8x;
Chris@42 1960 T5M = VADD(T5I, T5L);
Chris@42 1961 T5P = VMUL(LDK(KP707106781), VSUB(T5N, T5O));
Chris@42 1962 T5Q = VSUB(T5M, T5P);
Chris@42 1963 T6Y = VADD(T5M, T5P);
Chris@42 1964 T8w = VSUB(T5L, T5I);
Chris@42 1965 T8x = VMUL(LDK(KP707106781), VADD(T5c, T5h));
Chris@42 1966 T8y = VSUB(T8w, T8x);
Chris@42 1967 T9C = VADD(T8w, T8x);
Chris@42 1968 }
Chris@42 1969 {
Chris@42 1970 V Tb7, Tba, T8l, T8m;
Chris@42 1971 Tb7 = VSUB(T1z, T1C);
Chris@42 1972 Tba = VSUB(Tb8, Tb9);
Chris@42 1973 Tbb = VSUB(Tb7, Tba);
Chris@42 1974 Tct = VADD(Tb7, Tba);
Chris@42 1975 T8l = VADD(T53, T56);
Chris@42 1976 T8m = VMUL(LDK(KP707106781), VADD(T5O, T5N));
Chris@42 1977 T8n = VSUB(T8l, T8m);
Chris@42 1978 T9z = VADD(T8l, T8m);
Chris@42 1979 }
Chris@42 1980 }
Chris@42 1981 {
Chris@42 1982 V TN, T40, T2Q, T3Q, TQ, T3P, T2T, T41, TX, T44, T30, T3U, TU, T43, T2X;
Chris@42 1983 V T3X;
Chris@42 1984 {
Chris@42 1985 V TL, TM, T2R, T2S;
Chris@42 1986 TL = LD(&(ri[WS(is, 62)]), ivs, &(ri[0]));
Chris@42 1987 TM = LD(&(ri[WS(is, 30)]), ivs, &(ri[0]));
Chris@42 1988 TN = VADD(TL, TM);
Chris@42 1989 T40 = VSUB(TL, TM);
Chris@42 1990 {
Chris@42 1991 V T2O, T2P, TO, TP;
Chris@42 1992 T2O = LD(&(ii[WS(is, 62)]), ivs, &(ii[0]));
Chris@42 1993 T2P = LD(&(ii[WS(is, 30)]), ivs, &(ii[0]));
Chris@42 1994 T2Q = VADD(T2O, T2P);
Chris@42 1995 T3Q = VSUB(T2O, T2P);
Chris@42 1996 TO = LD(&(ri[WS(is, 14)]), ivs, &(ri[0]));
Chris@42 1997 TP = LD(&(ri[WS(is, 46)]), ivs, &(ri[0]));
Chris@42 1998 TQ = VADD(TO, TP);
Chris@42 1999 T3P = VSUB(TO, TP);
Chris@42 2000 }
Chris@42 2001 T2R = LD(&(ii[WS(is, 14)]), ivs, &(ii[0]));
Chris@42 2002 T2S = LD(&(ii[WS(is, 46)]), ivs, &(ii[0]));
Chris@42 2003 T2T = VADD(T2R, T2S);
Chris@42 2004 T41 = VSUB(T2R, T2S);
Chris@42 2005 {
Chris@42 2006 V TV, TW, T3S, T2Y, T2Z, T3T;
Chris@42 2007 TV = LD(&(ri[WS(is, 54)]), ivs, &(ri[0]));
Chris@42 2008 TW = LD(&(ri[WS(is, 22)]), ivs, &(ri[0]));
Chris@42 2009 T3S = VSUB(TV, TW);
Chris@42 2010 T2Y = LD(&(ii[WS(is, 54)]), ivs, &(ii[0]));
Chris@42 2011 T2Z = LD(&(ii[WS(is, 22)]), ivs, &(ii[0]));
Chris@42 2012 T3T = VSUB(T2Y, T2Z);
Chris@42 2013 TX = VADD(TV, TW);
Chris@42 2014 T44 = VADD(T3S, T3T);
Chris@42 2015 T30 = VADD(T2Y, T2Z);
Chris@42 2016 T3U = VSUB(T3S, T3T);
Chris@42 2017 }
Chris@42 2018 {
Chris@42 2019 V TS, TT, T3V, T2V, T2W, T3W;
Chris@42 2020 TS = LD(&(ri[WS(is, 6)]), ivs, &(ri[0]));
Chris@42 2021 TT = LD(&(ri[WS(is, 38)]), ivs, &(ri[0]));
Chris@42 2022 T3V = VSUB(TS, TT);
Chris@42 2023 T2V = LD(&(ii[WS(is, 6)]), ivs, &(ii[0]));
Chris@42 2024 T2W = LD(&(ii[WS(is, 38)]), ivs, &(ii[0]));
Chris@42 2025 T3W = VSUB(T2V, T2W);
Chris@42 2026 TU = VADD(TS, TT);
Chris@42 2027 T43 = VSUB(T3W, T3V);
Chris@42 2028 T2X = VADD(T2V, T2W);
Chris@42 2029 T3X = VADD(T3V, T3W);
Chris@42 2030 }
Chris@42 2031 }
Chris@42 2032 {
Chris@42 2033 V TR, TY, Tax, Tay;
Chris@42 2034 TR = VADD(TN, TQ);
Chris@42 2035 TY = VADD(TU, TX);
Chris@42 2036 TZ = VADD(TR, TY);
Chris@42 2037 Tdf = VSUB(TR, TY);
Chris@42 2038 Tax = VSUB(T2Q, T2T);
Chris@42 2039 Tay = VSUB(TX, TU);
Chris@42 2040 Taz = VSUB(Tax, Tay);
Chris@42 2041 Tch = VADD(Tay, Tax);
Chris@42 2042 }
Chris@42 2043 {
Chris@42 2044 V TaA, TaB, T2U, T31;
Chris@42 2045 TaA = VSUB(TN, TQ);
Chris@42 2046 TaB = VSUB(T2X, T30);
Chris@42 2047 TaC = VSUB(TaA, TaB);
Chris@42 2048 Tci = VADD(TaA, TaB);
Chris@42 2049 T2U = VADD(T2Q, T2T);
Chris@42 2050 T31 = VADD(T2X, T30);
Chris@42 2051 T32 = VADD(T2U, T31);
Chris@42 2052 Tdg = VSUB(T2U, T31);
Chris@42 2053 }
Chris@42 2054 {
Chris@42 2055 V T3R, T3Y, T7T, T7U;
Chris@42 2056 T3R = VADD(T3P, T3Q);
Chris@42 2057 T3Y = VMUL(LDK(KP707106781), VSUB(T3U, T3X));
Chris@42 2058 T3Z = VSUB(T3R, T3Y);
Chris@42 2059 T6J = VADD(T3R, T3Y);
Chris@42 2060 T7T = VADD(T40, T41);
Chris@42 2061 T7U = VMUL(LDK(KP707106781), VADD(T3X, T3U));
Chris@42 2062 T7V = VSUB(T7T, T7U);
Chris@42 2063 T9n = VADD(T7T, T7U);
Chris@42 2064 }
Chris@42 2065 {
Chris@42 2066 V T7W, T7X, T42, T45;
Chris@42 2067 T7W = VSUB(T3Q, T3P);
Chris@42 2068 T7X = VMUL(LDK(KP707106781), VADD(T43, T44));
Chris@42 2069 T7Y = VSUB(T7W, T7X);
Chris@42 2070 T9o = VADD(T7W, T7X);
Chris@42 2071 T42 = VSUB(T40, T41);
Chris@42 2072 T45 = VMUL(LDK(KP707106781), VSUB(T43, T44));
Chris@42 2073 T46 = VSUB(T42, T45);
Chris@42 2074 T6K = VADD(T42, T45);
Chris@42 2075 }
Chris@42 2076 }
Chris@42 2077 {
Chris@42 2078 V T14, T4P, T4d, TaG, T17, T4a, T4S, TaH, T1e, TaZ, T4j, T4V, T1b, TaY, T4o;
Chris@42 2079 V T4U;
Chris@42 2080 {
Chris@42 2081 V T12, T13, T4Q, T4R;
Chris@42 2082 T12 = LD(&(ri[WS(is, 1)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2083 T13 = LD(&(ri[WS(is, 33)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2084 T14 = VADD(T12, T13);
Chris@42 2085 T4P = VSUB(T12, T13);
Chris@42 2086 {
Chris@42 2087 V T4b, T4c, T15, T16;
Chris@42 2088 T4b = LD(&(ii[WS(is, 1)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2089 T4c = LD(&(ii[WS(is, 33)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2090 T4d = VSUB(T4b, T4c);
Chris@42 2091 TaG = VADD(T4b, T4c);
Chris@42 2092 T15 = LD(&(ri[WS(is, 17)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2093 T16 = LD(&(ri[WS(is, 49)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2094 T17 = VADD(T15, T16);
Chris@42 2095 T4a = VSUB(T15, T16);
Chris@42 2096 }
Chris@42 2097 T4Q = LD(&(ii[WS(is, 17)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2098 T4R = LD(&(ii[WS(is, 49)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2099 T4S = VSUB(T4Q, T4R);
Chris@42 2100 TaH = VADD(T4Q, T4R);
Chris@42 2101 {
Chris@42 2102 V T1c, T1d, T4f, T4g, T4h, T4i;
Chris@42 2103 T1c = LD(&(ri[WS(is, 57)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2104 T1d = LD(&(ri[WS(is, 25)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2105 T4f = VSUB(T1c, T1d);
Chris@42 2106 T4g = LD(&(ii[WS(is, 57)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2107 T4h = LD(&(ii[WS(is, 25)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2108 T4i = VSUB(T4g, T4h);
Chris@42 2109 T1e = VADD(T1c, T1d);
Chris@42 2110 TaZ = VADD(T4g, T4h);
Chris@42 2111 T4j = VSUB(T4f, T4i);
Chris@42 2112 T4V = VADD(T4f, T4i);
Chris@42 2113 }
Chris@42 2114 {
Chris@42 2115 V T19, T1a, T4k, T4l, T4m, T4n;
Chris@42 2116 T19 = LD(&(ri[WS(is, 9)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2117 T1a = LD(&(ri[WS(is, 41)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2118 T4k = VSUB(T19, T1a);
Chris@42 2119 T4l = LD(&(ii[WS(is, 9)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2120 T4m = LD(&(ii[WS(is, 41)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2121 T4n = VSUB(T4l, T4m);
Chris@42 2122 T1b = VADD(T19, T1a);
Chris@42 2123 TaY = VADD(T4l, T4m);
Chris@42 2124 T4o = VADD(T4k, T4n);
Chris@42 2125 T4U = VSUB(T4n, T4k);
Chris@42 2126 }
Chris@42 2127 }
Chris@42 2128 {
Chris@42 2129 V T18, T1f, TaX, Tb0;
Chris@42 2130 T18 = VADD(T14, T17);
Chris@42 2131 T1f = VADD(T1b, T1e);
Chris@42 2132 T1g = VADD(T18, T1f);
Chris@42 2133 Tdp = VSUB(T18, T1f);
Chris@42 2134 TaX = VSUB(T14, T17);
Chris@42 2135 Tb0 = VSUB(TaY, TaZ);
Chris@42 2136 Tb1 = VSUB(TaX, Tb0);
Chris@42 2137 Tcm = VADD(TaX, Tb0);
Chris@42 2138 }
Chris@42 2139 {
Chris@42 2140 V Tdk, Tdl, T4e, T4p;
Chris@42 2141 Tdk = VADD(TaG, TaH);
Chris@42 2142 Tdl = VADD(TaY, TaZ);
Chris@42 2143 Tdm = VSUB(Tdk, Tdl);
Chris@42 2144 Tej = VADD(Tdk, Tdl);
Chris@42 2145 T4e = VADD(T4a, T4d);
Chris@42 2146 T4p = VMUL(LDK(KP707106781), VSUB(T4j, T4o));
Chris@42 2147 T4q = VSUB(T4e, T4p);
Chris@42 2148 T6R = VADD(T4e, T4p);
Chris@42 2149 }
Chris@42 2150 {
Chris@42 2151 V T4T, T4W, T8d, T8e;
Chris@42 2152 T4T = VSUB(T4P, T4S);
Chris@42 2153 T4W = VMUL(LDK(KP707106781), VSUB(T4U, T4V));
Chris@42 2154 T4X = VSUB(T4T, T4W);
Chris@42 2155 T6O = VADD(T4T, T4W);
Chris@42 2156 T8d = VADD(T4P, T4S);
Chris@42 2157 T8e = VMUL(LDK(KP707106781), VADD(T4o, T4j));
Chris@42 2158 T8f = VSUB(T8d, T8e);
Chris@42 2159 T9s = VADD(T8d, T8e);
Chris@42 2160 }
Chris@42 2161 {
Chris@42 2162 V TaI, TaJ, T82, T83;
Chris@42 2163 TaI = VSUB(TaG, TaH);
Chris@42 2164 TaJ = VSUB(T1e, T1b);
Chris@42 2165 TaK = VSUB(TaI, TaJ);
Chris@42 2166 Tcp = VADD(TaJ, TaI);
Chris@42 2167 T82 = VSUB(T4d, T4a);
Chris@42 2168 T83 = VMUL(LDK(KP707106781), VADD(T4U, T4V));
Chris@42 2169 T84 = VSUB(T82, T83);
Chris@42 2170 T9v = VADD(T82, T83);
Chris@42 2171 }
Chris@42 2172 }
Chris@42 2173 {
Chris@42 2174 V T1j, TaR, T1m, TaS, T4G, T4L, TaT, TaQ, T89, T88, T1q, TaM, T1t, TaN, T4v;
Chris@42 2175 V T4A, TaO, TaL, T86, T85;
Chris@42 2176 {
Chris@42 2177 V T4H, T4F, T4C, T4K;
Chris@42 2178 {
Chris@42 2179 V T1h, T1i, T4D, T4E;
Chris@42 2180 T1h = LD(&(ri[WS(is, 5)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2181 T1i = LD(&(ri[WS(is, 37)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2182 T1j = VADD(T1h, T1i);
Chris@42 2183 T4H = VSUB(T1h, T1i);
Chris@42 2184 T4D = LD(&(ii[WS(is, 5)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2185 T4E = LD(&(ii[WS(is, 37)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2186 T4F = VSUB(T4D, T4E);
Chris@42 2187 TaR = VADD(T4D, T4E);
Chris@42 2188 }
Chris@42 2189 {
Chris@42 2190 V T1k, T1l, T4I, T4J;
Chris@42 2191 T1k = LD(&(ri[WS(is, 21)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2192 T1l = LD(&(ri[WS(is, 53)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2193 T1m = VADD(T1k, T1l);
Chris@42 2194 T4C = VSUB(T1k, T1l);
Chris@42 2195 T4I = LD(&(ii[WS(is, 21)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2196 T4J = LD(&(ii[WS(is, 53)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2197 T4K = VSUB(T4I, T4J);
Chris@42 2198 TaS = VADD(T4I, T4J);
Chris@42 2199 }
Chris@42 2200 T4G = VADD(T4C, T4F);
Chris@42 2201 T4L = VSUB(T4H, T4K);
Chris@42 2202 TaT = VSUB(TaR, TaS);
Chris@42 2203 TaQ = VSUB(T1j, T1m);
Chris@42 2204 T89 = VADD(T4H, T4K);
Chris@42 2205 T88 = VSUB(T4F, T4C);
Chris@42 2206 }
Chris@42 2207 {
Chris@42 2208 V T4r, T4z, T4w, T4u;
Chris@42 2209 {
Chris@42 2210 V T1o, T1p, T4x, T4y;
Chris@42 2211 T1o = LD(&(ri[WS(is, 61)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2212 T1p = LD(&(ri[WS(is, 29)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2213 T1q = VADD(T1o, T1p);
Chris@42 2214 T4r = VSUB(T1o, T1p);
Chris@42 2215 T4x = LD(&(ii[WS(is, 61)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2216 T4y = LD(&(ii[WS(is, 29)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2217 T4z = VSUB(T4x, T4y);
Chris@42 2218 TaM = VADD(T4x, T4y);
Chris@42 2219 }
Chris@42 2220 {
Chris@42 2221 V T1r, T1s, T4s, T4t;
Chris@42 2222 T1r = LD(&(ri[WS(is, 13)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2223 T1s = LD(&(ri[WS(is, 45)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2224 T1t = VADD(T1r, T1s);
Chris@42 2225 T4w = VSUB(T1r, T1s);
Chris@42 2226 T4s = LD(&(ii[WS(is, 13)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2227 T4t = LD(&(ii[WS(is, 45)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2228 T4u = VSUB(T4s, T4t);
Chris@42 2229 TaN = VADD(T4s, T4t);
Chris@42 2230 }
Chris@42 2231 T4v = VSUB(T4r, T4u);
Chris@42 2232 T4A = VADD(T4w, T4z);
Chris@42 2233 TaO = VSUB(TaM, TaN);
Chris@42 2234 TaL = VSUB(T1q, T1t);
Chris@42 2235 T86 = VSUB(T4z, T4w);
Chris@42 2236 T85 = VADD(T4r, T4u);
Chris@42 2237 }
Chris@42 2238 {
Chris@42 2239 V T1n, T1u, Tb2, Tb3;
Chris@42 2240 T1n = VADD(T1j, T1m);
Chris@42 2241 T1u = VADD(T1q, T1t);
Chris@42 2242 T1v = VADD(T1n, T1u);
Chris@42 2243 Tdn = VSUB(T1u, T1n);
Chris@42 2244 Tb2 = VSUB(TaT, TaQ);
Chris@42 2245 Tb3 = VADD(TaL, TaO);
Chris@42 2246 Tb4 = VMUL(LDK(KP707106781), VSUB(Tb2, Tb3));
Chris@42 2247 Tcq = VMUL(LDK(KP707106781), VADD(Tb2, Tb3));
Chris@42 2248 }
Chris@42 2249 {
Chris@42 2250 V Tdq, Tdr, T4B, T4M;
Chris@42 2251 Tdq = VADD(TaR, TaS);
Chris@42 2252 Tdr = VADD(TaM, TaN);
Chris@42 2253 Tds = VSUB(Tdq, Tdr);
Chris@42 2254 Tek = VADD(Tdq, Tdr);
Chris@42 2255 T4B = VFNMS(LDK(KP923879532), T4A, VMUL(LDK(KP382683432), T4v));
Chris@42 2256 T4M = VFMA(LDK(KP923879532), T4G, VMUL(LDK(KP382683432), T4L));
Chris@42 2257 T4N = VSUB(T4B, T4M);
Chris@42 2258 T6P = VADD(T4M, T4B);
Chris@42 2259 }
Chris@42 2260 {
Chris@42 2261 V T4Y, T4Z, T8g, T8h;
Chris@42 2262 T4Y = VFNMS(LDK(KP923879532), T4L, VMUL(LDK(KP382683432), T4G));
Chris@42 2263 T4Z = VFMA(LDK(KP382683432), T4A, VMUL(LDK(KP923879532), T4v));
Chris@42 2264 T50 = VSUB(T4Y, T4Z);
Chris@42 2265 T6S = VADD(T4Y, T4Z);
Chris@42 2266 T8g = VFNMS(LDK(KP382683432), T89, VMUL(LDK(KP923879532), T88));
Chris@42 2267 T8h = VFMA(LDK(KP923879532), T86, VMUL(LDK(KP382683432), T85));
Chris@42 2268 T8i = VSUB(T8g, T8h);
Chris@42 2269 T9w = VADD(T8g, T8h);
Chris@42 2270 }
Chris@42 2271 {
Chris@42 2272 V TaP, TaU, T87, T8a;
Chris@42 2273 TaP = VSUB(TaL, TaO);
Chris@42 2274 TaU = VADD(TaQ, TaT);
Chris@42 2275 TaV = VMUL(LDK(KP707106781), VSUB(TaP, TaU));
Chris@42 2276 Tcn = VMUL(LDK(KP707106781), VADD(TaU, TaP));
Chris@42 2277 T87 = VFNMS(LDK(KP382683432), T86, VMUL(LDK(KP923879532), T85));
Chris@42 2278 T8a = VFMA(LDK(KP382683432), T88, VMUL(LDK(KP923879532), T89));
Chris@42 2279 T8b = VSUB(T87, T8a);
Chris@42 2280 T9t = VADD(T8a, T87);
Chris@42 2281 }
Chris@42 2282 }
Chris@42 2283 {
Chris@42 2284 V T1O, Tbc, T1R, Tbd, T5o, T5t, Tbf, Tbe, T8p, T8o, T1V, Tbi, T1Y, Tbj, T5z;
Chris@42 2285 V T5E, Tbk, Tbh, T8s, T8r;
Chris@42 2286 {
Chris@42 2287 V T5p, T5n, T5k, T5s;
Chris@42 2288 {
Chris@42 2289 V T1M, T1N, T5l, T5m;
Chris@42 2290 T1M = LD(&(ri[WS(is, 3)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2291 T1N = LD(&(ri[WS(is, 35)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2292 T1O = VADD(T1M, T1N);
Chris@42 2293 T5p = VSUB(T1M, T1N);
Chris@42 2294 T5l = LD(&(ii[WS(is, 3)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2295 T5m = LD(&(ii[WS(is, 35)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2296 T5n = VSUB(T5l, T5m);
Chris@42 2297 Tbc = VADD(T5l, T5m);
Chris@42 2298 }
Chris@42 2299 {
Chris@42 2300 V T1P, T1Q, T5q, T5r;
Chris@42 2301 T1P = LD(&(ri[WS(is, 19)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2302 T1Q = LD(&(ri[WS(is, 51)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2303 T1R = VADD(T1P, T1Q);
Chris@42 2304 T5k = VSUB(T1P, T1Q);
Chris@42 2305 T5q = LD(&(ii[WS(is, 19)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2306 T5r = LD(&(ii[WS(is, 51)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2307 T5s = VSUB(T5q, T5r);
Chris@42 2308 Tbd = VADD(T5q, T5r);
Chris@42 2309 }
Chris@42 2310 T5o = VADD(T5k, T5n);
Chris@42 2311 T5t = VSUB(T5p, T5s);
Chris@42 2312 Tbf = VSUB(T1O, T1R);
Chris@42 2313 Tbe = VSUB(Tbc, Tbd);
Chris@42 2314 T8p = VADD(T5p, T5s);
Chris@42 2315 T8o = VSUB(T5n, T5k);
Chris@42 2316 }
Chris@42 2317 {
Chris@42 2318 V T5A, T5y, T5v, T5D;
Chris@42 2319 {
Chris@42 2320 V T1T, T1U, T5w, T5x;
Chris@42 2321 T1T = LD(&(ri[WS(is, 59)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2322 T1U = LD(&(ri[WS(is, 27)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2323 T1V = VADD(T1T, T1U);
Chris@42 2324 T5A = VSUB(T1T, T1U);
Chris@42 2325 T5w = LD(&(ii[WS(is, 59)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2326 T5x = LD(&(ii[WS(is, 27)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2327 T5y = VSUB(T5w, T5x);
Chris@42 2328 Tbi = VADD(T5w, T5x);
Chris@42 2329 }
Chris@42 2330 {
Chris@42 2331 V T1W, T1X, T5B, T5C;
Chris@42 2332 T1W = LD(&(ri[WS(is, 11)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2333 T1X = LD(&(ri[WS(is, 43)]), ivs, &(ri[WS(is, 1)]));
Chris@42 2334 T1Y = VADD(T1W, T1X);
Chris@42 2335 T5v = VSUB(T1W, T1X);
Chris@42 2336 T5B = LD(&(ii[WS(is, 11)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2337 T5C = LD(&(ii[WS(is, 43)]), ivs, &(ii[WS(is, 1)]));
Chris@42 2338 T5D = VSUB(T5B, T5C);
Chris@42 2339 Tbj = VADD(T5B, T5C);
Chris@42 2340 }
Chris@42 2341 T5z = VADD(T5v, T5y);
Chris@42 2342 T5E = VSUB(T5A, T5D);
Chris@42 2343 Tbk = VSUB(Tbi, Tbj);
Chris@42 2344 Tbh = VSUB(T1V, T1Y);
Chris@42 2345 T8s = VADD(T5A, T5D);
Chris@42 2346 T8r = VSUB(T5y, T5v);
Chris@42 2347 }
Chris@42 2348 {
Chris@42 2349 V T1S, T1Z, Tbt, Tbu;
Chris@42 2350 T1S = VADD(T1O, T1R);
Chris@42 2351 T1Z = VADD(T1V, T1Y);
Chris@42 2352 T20 = VADD(T1S, T1Z);
Chris@42 2353 TdD = VSUB(T1Z, T1S);
Chris@42 2354 Tbt = VSUB(Tbh, Tbk);
Chris@42 2355 Tbu = VADD(Tbf, Tbe);
Chris@42 2356 Tbv = VMUL(LDK(KP707106781), VSUB(Tbt, Tbu));
Chris@42 2357 Tcu = VMUL(LDK(KP707106781), VADD(Tbu, Tbt));
Chris@42 2358 }
Chris@42 2359 {
Chris@42 2360 V Tdw, Tdx, T5u, T5F;
Chris@42 2361 Tdw = VADD(Tbc, Tbd);
Chris@42 2362 Tdx = VADD(Tbi, Tbj);
Chris@42 2363 Tdy = VSUB(Tdw, Tdx);
Chris@42 2364 Tep = VADD(Tdw, Tdx);
Chris@42 2365 T5u = VFNMS(LDK(KP923879532), T5t, VMUL(LDK(KP382683432), T5o));
Chris@42 2366 T5F = VFMA(LDK(KP382683432), T5z, VMUL(LDK(KP923879532), T5E));
Chris@42 2367 T5G = VSUB(T5u, T5F);
Chris@42 2368 T6Z = VADD(T5u, T5F);
Chris@42 2369 }
Chris@42 2370 {
Chris@42 2371 V T5R, T5S, T8z, T8A;
Chris@42 2372 T5R = VFNMS(LDK(KP923879532), T5z, VMUL(LDK(KP382683432), T5E));
Chris@42 2373 T5S = VFMA(LDK(KP923879532), T5o, VMUL(LDK(KP382683432), T5t));
Chris@42 2374 T5T = VSUB(T5R, T5S);
Chris@42 2375 T6W = VADD(T5S, T5R);
Chris@42 2376 T8z = VFNMS(LDK(KP382683432), T8r, VMUL(LDK(KP923879532), T8s));
Chris@42 2377 T8A = VFMA(LDK(KP382683432), T8o, VMUL(LDK(KP923879532), T8p));
Chris@42 2378 T8B = VSUB(T8z, T8A);
Chris@42 2379 T9A = VADD(T8A, T8z);
Chris@42 2380 }
Chris@42 2381 {
Chris@42 2382 V Tbg, Tbl, T8q, T8t;
Chris@42 2383 Tbg = VSUB(Tbe, Tbf);
Chris@42 2384 Tbl = VADD(Tbh, Tbk);
Chris@42 2385 Tbm = VMUL(LDK(KP707106781), VSUB(Tbg, Tbl));
Chris@42 2386 Tcx = VMUL(LDK(KP707106781), VADD(Tbg, Tbl));
Chris@42 2387 T8q = VFNMS(LDK(KP382683432), T8p, VMUL(LDK(KP923879532), T8o));
Chris@42 2388 T8t = VFMA(LDK(KP923879532), T8r, VMUL(LDK(KP382683432), T8s));
Chris@42 2389 T8u = VSUB(T8q, T8t);
Chris@42 2390 T9D = VADD(T8q, T8t);
Chris@42 2391 }
Chris@42 2392 }
Chris@42 2393 {
Chris@42 2394 V TeJ, TeK, TeL, TeM, TeN, TeO, TeP, TeQ, TeR, TeS, TeT, TeU, TeV, TeW, TeX;
Chris@42 2395 V TeY, TeZ, Tf0, Tf1, Tf2, Tf3, Tf4, Tf5, Tf6, Tf7, Tf8, Tf9, Tfa, Tfb, Tfc;
Chris@42 2396 V Tfd, Tfe, Tff, Tfg, Tfh, Tfi, Tfj, Tfk, Tfl, Tfm, Tfn, Tfo, Tfp, Tfq, Tfr;
Chris@42 2397 V Tfs, Tft, Tfu;
Chris@42 2398 {
Chris@42 2399 V T11, TeD, TeG, TeI, T22, T23, T34, TeH;
Chris@42 2400 {
Chris@42 2401 V Tv, T10, TeE, TeF;
Chris@42 2402 Tv = VADD(Tf, Tu);
Chris@42 2403 T10 = VADD(TK, TZ);
Chris@42 2404 T11 = VADD(Tv, T10);
Chris@42 2405 TeD = VSUB(Tv, T10);
Chris@42 2406 TeE = VADD(Tej, Tek);
Chris@42 2407 TeF = VADD(Teo, Tep);
Chris@42 2408 TeG = VSUB(TeE, TeF);
Chris@42 2409 TeI = VADD(TeE, TeF);
Chris@42 2410 }
Chris@42 2411 {
Chris@42 2412 V T1w, T21, T2y, T33;
Chris@42 2413 T1w = VADD(T1g, T1v);
Chris@42 2414 T21 = VADD(T1L, T20);
Chris@42 2415 T22 = VADD(T1w, T21);
Chris@42 2416 T23 = VSUB(T21, T1w);
Chris@42 2417 T2y = VADD(T2i, T2x);
Chris@42 2418 T33 = VADD(T2N, T32);
Chris@42 2419 T34 = VSUB(T2y, T33);
Chris@42 2420 TeH = VADD(T2y, T33);
Chris@42 2421 }
Chris@42 2422 TeJ = VSUB(T11, T22);
Chris@42 2423 STM4(&(ro[32]), TeJ, ovs, &(ro[0]));
Chris@42 2424 TeK = VSUB(TeH, TeI);
Chris@42 2425 STM4(&(io[32]), TeK, ovs, &(io[0]));
Chris@42 2426 TeL = VADD(T11, T22);
Chris@42 2427 STM4(&(ro[0]), TeL, ovs, &(ro[0]));
Chris@42 2428 TeM = VADD(TeH, TeI);
Chris@42 2429 STM4(&(io[0]), TeM, ovs, &(io[0]));
Chris@42 2430 TeN = VADD(T23, T34);
Chris@42 2431 STM4(&(io[16]), TeN, ovs, &(io[0]));
Chris@42 2432 TeO = VADD(TeD, TeG);
Chris@42 2433 STM4(&(ro[16]), TeO, ovs, &(ro[0]));
Chris@42 2434 TeP = VSUB(T34, T23);
Chris@42 2435 STM4(&(io[48]), TeP, ovs, &(io[0]));
Chris@42 2436 TeQ = VSUB(TeD, TeG);
Chris@42 2437 STM4(&(ro[48]), TeQ, ovs, &(ro[0]));
Chris@42 2438 }
Chris@42 2439 {
Chris@42 2440 V Teh, Tex, Tev, TeB, Tem, Tey, Ter, Tez;
Chris@42 2441 {
Chris@42 2442 V Tef, Teg, Tet, Teu;
Chris@42 2443 Tef = VSUB(Tf, Tu);
Chris@42 2444 Teg = VSUB(T2N, T32);
Chris@42 2445 Teh = VADD(Tef, Teg);
Chris@42 2446 Tex = VSUB(Tef, Teg);
Chris@42 2447 Tet = VSUB(T2i, T2x);
Chris@42 2448 Teu = VSUB(TZ, TK);
Chris@42 2449 Tev = VSUB(Tet, Teu);
Chris@42 2450 TeB = VADD(Teu, Tet);
Chris@42 2451 }
Chris@42 2452 {
Chris@42 2453 V Tei, Tel, Ten, Teq;
Chris@42 2454 Tei = VSUB(T1g, T1v);
Chris@42 2455 Tel = VSUB(Tej, Tek);
Chris@42 2456 Tem = VADD(Tei, Tel);
Chris@42 2457 Tey = VSUB(Tel, Tei);
Chris@42 2458 Ten = VSUB(T1L, T20);
Chris@42 2459 Teq = VSUB(Teo, Tep);
Chris@42 2460 Ter = VSUB(Ten, Teq);
Chris@42 2461 Tez = VADD(Ten, Teq);
Chris@42 2462 }
Chris@42 2463 {
Chris@42 2464 V Tes, TeC, Tew, TeA;
Chris@42 2465 Tes = VMUL(LDK(KP707106781), VADD(Tem, Ter));
Chris@42 2466 TeR = VSUB(Teh, Tes);
Chris@42 2467 STM4(&(ro[40]), TeR, ovs, &(ro[0]));
Chris@42 2468 TeS = VADD(Teh, Tes);
Chris@42 2469 STM4(&(ro[8]), TeS, ovs, &(ro[0]));
Chris@42 2470 TeC = VMUL(LDK(KP707106781), VADD(Tey, Tez));
Chris@42 2471 TeT = VSUB(TeB, TeC);
Chris@42 2472 STM4(&(io[40]), TeT, ovs, &(io[0]));
Chris@42 2473 TeU = VADD(TeB, TeC);
Chris@42 2474 STM4(&(io[8]), TeU, ovs, &(io[0]));
Chris@42 2475 Tew = VMUL(LDK(KP707106781), VSUB(Ter, Tem));
Chris@42 2476 TeV = VSUB(Tev, Tew);
Chris@42 2477 STM4(&(io[56]), TeV, ovs, &(io[0]));
Chris@42 2478 TeW = VADD(Tev, Tew);
Chris@42 2479 STM4(&(io[24]), TeW, ovs, &(io[0]));
Chris@42 2480 TeA = VMUL(LDK(KP707106781), VSUB(Tey, Tez));
Chris@42 2481 TeX = VSUB(Tex, TeA);
Chris@42 2482 STM4(&(ro[56]), TeX, ovs, &(ro[0]));
Chris@42 2483 TeY = VADD(Tex, TeA);
Chris@42 2484 STM4(&(ro[24]), TeY, ovs, &(ro[0]));
Chris@42 2485 }
Chris@42 2486 }
Chris@42 2487 {
Chris@42 2488 V Tdb, TdV, Te5, TdJ, Tdi, Te6, Te3, Teb, TdM, TdW, Tdu, TdQ, Te0, Tea, TdF;
Chris@42 2489 V TdR;
Chris@42 2490 {
Chris@42 2491 V Tde, Tdh, Tdo, Tdt;
Chris@42 2492 Tdb = VSUB(Td9, Tda);
Chris@42 2493 TdV = VADD(Td9, Tda);
Chris@42 2494 Te5 = VADD(TdI, TdH);
Chris@42 2495 TdJ = VSUB(TdH, TdI);
Chris@42 2496 Tde = VSUB(Tdc, Tdd);
Chris@42 2497 Tdh = VADD(Tdf, Tdg);
Chris@42 2498 Tdi = VMUL(LDK(KP707106781), VSUB(Tde, Tdh));
Chris@42 2499 Te6 = VMUL(LDK(KP707106781), VADD(Tde, Tdh));
Chris@42 2500 {
Chris@42 2501 V Te1, Te2, TdK, TdL;
Chris@42 2502 Te1 = VADD(Tdv, Tdy);
Chris@42 2503 Te2 = VADD(TdD, TdC);
Chris@42 2504 Te3 = VFNMS(LDK(KP382683432), Te2, VMUL(LDK(KP923879532), Te1));
Chris@42 2505 Teb = VFMA(LDK(KP923879532), Te2, VMUL(LDK(KP382683432), Te1));
Chris@42 2506 TdK = VSUB(Tdf, Tdg);
Chris@42 2507 TdL = VADD(Tdd, Tdc);
Chris@42 2508 TdM = VMUL(LDK(KP707106781), VSUB(TdK, TdL));
Chris@42 2509 TdW = VMUL(LDK(KP707106781), VADD(TdL, TdK));
Chris@42 2510 }
Chris@42 2511 Tdo = VSUB(Tdm, Tdn);
Chris@42 2512 Tdt = VSUB(Tdp, Tds);
Chris@42 2513 Tdu = VFMA(LDK(KP923879532), Tdo, VMUL(LDK(KP382683432), Tdt));
Chris@42 2514 TdQ = VFNMS(LDK(KP923879532), Tdt, VMUL(LDK(KP382683432), Tdo));
Chris@42 2515 {
Chris@42 2516 V TdY, TdZ, Tdz, TdE;
Chris@42 2517 TdY = VADD(Tdn, Tdm);
Chris@42 2518 TdZ = VADD(Tdp, Tds);
Chris@42 2519 Te0 = VFMA(LDK(KP382683432), TdY, VMUL(LDK(KP923879532), TdZ));
Chris@42 2520 Tea = VFNMS(LDK(KP382683432), TdZ, VMUL(LDK(KP923879532), TdY));
Chris@42 2521 Tdz = VSUB(Tdv, Tdy);
Chris@42 2522 TdE = VSUB(TdC, TdD);
Chris@42 2523 TdF = VFNMS(LDK(KP923879532), TdE, VMUL(LDK(KP382683432), Tdz));
Chris@42 2524 TdR = VFMA(LDK(KP382683432), TdE, VMUL(LDK(KP923879532), Tdz));
Chris@42 2525 }
Chris@42 2526 }
Chris@42 2527 {
Chris@42 2528 V Tdj, TdG, TdT, TdU;
Chris@42 2529 Tdj = VADD(Tdb, Tdi);
Chris@42 2530 TdG = VADD(Tdu, TdF);
Chris@42 2531 TeZ = VSUB(Tdj, TdG);
Chris@42 2532 STM4(&(ro[44]), TeZ, ovs, &(ro[0]));
Chris@42 2533 Tf0 = VADD(Tdj, TdG);
Chris@42 2534 STM4(&(ro[12]), Tf0, ovs, &(ro[0]));
Chris@42 2535 TdT = VADD(TdJ, TdM);
Chris@42 2536 TdU = VADD(TdQ, TdR);
Chris@42 2537 Tf1 = VSUB(TdT, TdU);
Chris@42 2538 STM4(&(io[44]), Tf1, ovs, &(io[0]));
Chris@42 2539 Tf2 = VADD(TdT, TdU);
Chris@42 2540 STM4(&(io[12]), Tf2, ovs, &(io[0]));
Chris@42 2541 }
Chris@42 2542 {
Chris@42 2543 V TdN, TdO, TdP, TdS;
Chris@42 2544 TdN = VSUB(TdJ, TdM);
Chris@42 2545 TdO = VSUB(TdF, Tdu);
Chris@42 2546 Tf3 = VSUB(TdN, TdO);
Chris@42 2547 STM4(&(io[60]), Tf3, ovs, &(io[0]));
Chris@42 2548 Tf4 = VADD(TdN, TdO);
Chris@42 2549 STM4(&(io[28]), Tf4, ovs, &(io[0]));
Chris@42 2550 TdP = VSUB(Tdb, Tdi);
Chris@42 2551 TdS = VSUB(TdQ, TdR);
Chris@42 2552 Tf5 = VSUB(TdP, TdS);
Chris@42 2553 STM4(&(ro[60]), Tf5, ovs, &(ro[0]));
Chris@42 2554 Tf6 = VADD(TdP, TdS);
Chris@42 2555 STM4(&(ro[28]), Tf6, ovs, &(ro[0]));
Chris@42 2556 }
Chris@42 2557 {
Chris@42 2558 V TdX, Te4, Ted, Tee;
Chris@42 2559 TdX = VADD(TdV, TdW);
Chris@42 2560 Te4 = VADD(Te0, Te3);
Chris@42 2561 Tf7 = VSUB(TdX, Te4);
Chris@42 2562 STM4(&(ro[36]), Tf7, ovs, &(ro[0]));
Chris@42 2563 Tf8 = VADD(TdX, Te4);
Chris@42 2564 STM4(&(ro[4]), Tf8, ovs, &(ro[0]));
Chris@42 2565 Ted = VADD(Te5, Te6);
Chris@42 2566 Tee = VADD(Tea, Teb);
Chris@42 2567 Tf9 = VSUB(Ted, Tee);
Chris@42 2568 STM4(&(io[36]), Tf9, ovs, &(io[0]));
Chris@42 2569 Tfa = VADD(Ted, Tee);
Chris@42 2570 STM4(&(io[4]), Tfa, ovs, &(io[0]));
Chris@42 2571 }
Chris@42 2572 {
Chris@42 2573 V Te7, Te8, Te9, Tec;
Chris@42 2574 Te7 = VSUB(Te5, Te6);
Chris@42 2575 Te8 = VSUB(Te3, Te0);
Chris@42 2576 Tfb = VSUB(Te7, Te8);
Chris@42 2577 STM4(&(io[52]), Tfb, ovs, &(io[0]));
Chris@42 2578 Tfc = VADD(Te7, Te8);
Chris@42 2579 STM4(&(io[20]), Tfc, ovs, &(io[0]));
Chris@42 2580 Te9 = VSUB(TdV, TdW);
Chris@42 2581 Tec = VSUB(Tea, Teb);
Chris@42 2582 Tfd = VSUB(Te9, Tec);
Chris@42 2583 STM4(&(ro[52]), Tfd, ovs, &(ro[0]));
Chris@42 2584 Tfe = VADD(Te9, Tec);
Chris@42 2585 STM4(&(ro[20]), Tfe, ovs, &(ro[0]));
Chris@42 2586 }
Chris@42 2587 }
Chris@42 2588 {
Chris@42 2589 V Tcd, TcP, TcD, TcZ, Tck, Td0, TcX, Td5, Tcs, TcK, TcG, TcQ, TcU, Td4, Tcz;
Chris@42 2590 V TcL, Tcc, TcC;
Chris@42 2591 Tcc = VMUL(LDK(KP707106781), VADD(TbD, TbC));
Chris@42 2592 Tcd = VSUB(Tcb, Tcc);
Chris@42 2593 TcP = VADD(Tcb, Tcc);
Chris@42 2594 TcC = VMUL(LDK(KP707106781), VADD(Tak, Tan));
Chris@42 2595 TcD = VSUB(TcB, TcC);
Chris@42 2596 TcZ = VADD(TcB, TcC);
Chris@42 2597 {
Chris@42 2598 V Tcg, Tcj, TcV, TcW;
Chris@42 2599 Tcg = VFNMS(LDK(KP382683432), Tcf, VMUL(LDK(KP923879532), Tce));
Chris@42 2600 Tcj = VFMA(LDK(KP923879532), Tch, VMUL(LDK(KP382683432), Tci));
Chris@42 2601 Tck = VSUB(Tcg, Tcj);
Chris@42 2602 Td0 = VADD(Tcg, Tcj);
Chris@42 2603 TcV = VADD(Tct, Tcu);
Chris@42 2604 TcW = VADD(Tcw, Tcx);
Chris@42 2605 TcX = VFNMS(LDK(KP195090322), TcW, VMUL(LDK(KP980785280), TcV));
Chris@42 2606 Td5 = VFMA(LDK(KP195090322), TcV, VMUL(LDK(KP980785280), TcW));
Chris@42 2607 }
Chris@42 2608 {
Chris@42 2609 V Tco, Tcr, TcE, TcF;
Chris@42 2610 Tco = VSUB(Tcm, Tcn);
Chris@42 2611 Tcr = VSUB(Tcp, Tcq);
Chris@42 2612 Tcs = VFMA(LDK(KP555570233), Tco, VMUL(LDK(KP831469612), Tcr));
Chris@42 2613 TcK = VFNMS(LDK(KP831469612), Tco, VMUL(LDK(KP555570233), Tcr));
Chris@42 2614 TcE = VFNMS(LDK(KP382683432), Tch, VMUL(LDK(KP923879532), Tci));
Chris@42 2615 TcF = VFMA(LDK(KP382683432), Tce, VMUL(LDK(KP923879532), Tcf));
Chris@42 2616 TcG = VSUB(TcE, TcF);
Chris@42 2617 TcQ = VADD(TcF, TcE);
Chris@42 2618 }
Chris@42 2619 {
Chris@42 2620 V TcS, TcT, Tcv, Tcy;
Chris@42 2621 TcS = VADD(Tcm, Tcn);
Chris@42 2622 TcT = VADD(Tcp, Tcq);
Chris@42 2623 TcU = VFMA(LDK(KP980785280), TcS, VMUL(LDK(KP195090322), TcT));
Chris@42 2624 Td4 = VFNMS(LDK(KP195090322), TcS, VMUL(LDK(KP980785280), TcT));
Chris@42 2625 Tcv = VSUB(Tct, Tcu);
Chris@42 2626 Tcy = VSUB(Tcw, Tcx);
Chris@42 2627 Tcz = VFNMS(LDK(KP831469612), Tcy, VMUL(LDK(KP555570233), Tcv));
Chris@42 2628 TcL = VFMA(LDK(KP831469612), Tcv, VMUL(LDK(KP555570233), Tcy));
Chris@42 2629 }
Chris@42 2630 {
Chris@42 2631 V Tcl, TcA, TcN, TcO;
Chris@42 2632 Tcl = VADD(Tcd, Tck);
Chris@42 2633 TcA = VADD(Tcs, Tcz);
Chris@42 2634 Tff = VSUB(Tcl, TcA);
Chris@42 2635 STM4(&(ro[42]), Tff, ovs, &(ro[0]));
Chris@42 2636 Tfg = VADD(Tcl, TcA);
Chris@42 2637 STM4(&(ro[10]), Tfg, ovs, &(ro[0]));
Chris@42 2638 TcN = VADD(TcD, TcG);
Chris@42 2639 TcO = VADD(TcK, TcL);
Chris@42 2640 Tfh = VSUB(TcN, TcO);
Chris@42 2641 STM4(&(io[42]), Tfh, ovs, &(io[0]));
Chris@42 2642 Tfi = VADD(TcN, TcO);
Chris@42 2643 STM4(&(io[10]), Tfi, ovs, &(io[0]));
Chris@42 2644 }
Chris@42 2645 {
Chris@42 2646 V TcH, TcI, TcJ, TcM;
Chris@42 2647 TcH = VSUB(TcD, TcG);
Chris@42 2648 TcI = VSUB(Tcz, Tcs);
Chris@42 2649 Tfj = VSUB(TcH, TcI);
Chris@42 2650 STM4(&(io[58]), Tfj, ovs, &(io[0]));
Chris@42 2651 Tfk = VADD(TcH, TcI);
Chris@42 2652 STM4(&(io[26]), Tfk, ovs, &(io[0]));
Chris@42 2653 TcJ = VSUB(Tcd, Tck);
Chris@42 2654 TcM = VSUB(TcK, TcL);
Chris@42 2655 Tfl = VSUB(TcJ, TcM);
Chris@42 2656 STM4(&(ro[58]), Tfl, ovs, &(ro[0]));
Chris@42 2657 Tfm = VADD(TcJ, TcM);
Chris@42 2658 STM4(&(ro[26]), Tfm, ovs, &(ro[0]));
Chris@42 2659 }
Chris@42 2660 {
Chris@42 2661 V TcR, TcY, Td7, Td8;
Chris@42 2662 TcR = VADD(TcP, TcQ);
Chris@42 2663 TcY = VADD(TcU, TcX);
Chris@42 2664 Tfn = VSUB(TcR, TcY);
Chris@42 2665 STM4(&(ro[34]), Tfn, ovs, &(ro[0]));
Chris@42 2666 Tfo = VADD(TcR, TcY);
Chris@42 2667 STM4(&(ro[2]), Tfo, ovs, &(ro[0]));
Chris@42 2668 Td7 = VADD(TcZ, Td0);
Chris@42 2669 Td8 = VADD(Td4, Td5);
Chris@42 2670 Tfp = VSUB(Td7, Td8);
Chris@42 2671 STM4(&(io[34]), Tfp, ovs, &(io[0]));
Chris@42 2672 Tfq = VADD(Td7, Td8);
Chris@42 2673 STM4(&(io[2]), Tfq, ovs, &(io[0]));
Chris@42 2674 }
Chris@42 2675 {
Chris@42 2676 V Td1, Td2, Td3, Td6;
Chris@42 2677 Td1 = VSUB(TcZ, Td0);
Chris@42 2678 Td2 = VSUB(TcX, TcU);
Chris@42 2679 Tfr = VSUB(Td1, Td2);
Chris@42 2680 STM4(&(io[50]), Tfr, ovs, &(io[0]));
Chris@42 2681 Tfs = VADD(Td1, Td2);
Chris@42 2682 STM4(&(io[18]), Tfs, ovs, &(io[0]));
Chris@42 2683 Td3 = VSUB(TcP, TcQ);
Chris@42 2684 Td6 = VSUB(Td4, Td5);
Chris@42 2685 Tft = VSUB(Td3, Td6);
Chris@42 2686 STM4(&(ro[50]), Tft, ovs, &(ro[0]));
Chris@42 2687 Tfu = VADD(Td3, Td6);
Chris@42 2688 STM4(&(ro[18]), Tfu, ovs, &(ro[0]));
Chris@42 2689 }
Chris@42 2690 }
Chris@42 2691 {
Chris@42 2692 V Tfv, Tfw, Tfx, Tfy, Tfz, TfA, TfB, TfC, TfD, TfE, TfF, TfG, TfH, TfI, TfJ;
Chris@42 2693 V TfK, TfL, TfM, TfN, TfO, TfP, TfQ, TfR, TfS, TfT, TfU, TfV, TfW, TfX, TfY;
Chris@42 2694 V TfZ, Tg0;
Chris@42 2695 {
Chris@42 2696 V Tap, TbR, TbF, Tc1, TaE, Tc2, TbZ, Tc7, Tb6, TbM, TbI, TbS, TbW, Tc6, Tbx;
Chris@42 2697 V TbN, Tao, TbE;
Chris@42 2698 Tao = VMUL(LDK(KP707106781), VSUB(Tak, Tan));
Chris@42 2699 Tap = VSUB(Tah, Tao);
Chris@42 2700 TbR = VADD(Tah, Tao);
Chris@42 2701 TbE = VMUL(LDK(KP707106781), VSUB(TbC, TbD));
Chris@42 2702 TbF = VSUB(TbB, TbE);
Chris@42 2703 Tc1 = VADD(TbB, TbE);
Chris@42 2704 {
Chris@42 2705 V Taw, TaD, TbX, TbY;
Chris@42 2706 Taw = VFNMS(LDK(KP923879532), Tav, VMUL(LDK(KP382683432), Tas));
Chris@42 2707 TaD = VFMA(LDK(KP382683432), Taz, VMUL(LDK(KP923879532), TaC));
Chris@42 2708 TaE = VSUB(Taw, TaD);
Chris@42 2709 Tc2 = VADD(Taw, TaD);
Chris@42 2710 TbX = VADD(Tbb, Tbm);
Chris@42 2711 TbY = VADD(Tbs, Tbv);
Chris@42 2712 TbZ = VFNMS(LDK(KP555570233), TbY, VMUL(LDK(KP831469612), TbX));
Chris@42 2713 Tc7 = VFMA(LDK(KP831469612), TbY, VMUL(LDK(KP555570233), TbX));
Chris@42 2714 }
Chris@42 2715 {
Chris@42 2716 V TaW, Tb5, TbG, TbH;
Chris@42 2717 TaW = VSUB(TaK, TaV);
Chris@42 2718 Tb5 = VSUB(Tb1, Tb4);
Chris@42 2719 Tb6 = VFMA(LDK(KP980785280), TaW, VMUL(LDK(KP195090322), Tb5));
Chris@42 2720 TbM = VFNMS(LDK(KP980785280), Tb5, VMUL(LDK(KP195090322), TaW));
Chris@42 2721 TbG = VFNMS(LDK(KP923879532), Taz, VMUL(LDK(KP382683432), TaC));
Chris@42 2722 TbH = VFMA(LDK(KP923879532), Tas, VMUL(LDK(KP382683432), Tav));
Chris@42 2723 TbI = VSUB(TbG, TbH);
Chris@42 2724 TbS = VADD(TbH, TbG);
Chris@42 2725 }
Chris@42 2726 {
Chris@42 2727 V TbU, TbV, Tbn, Tbw;
Chris@42 2728 TbU = VADD(TaK, TaV);
Chris@42 2729 TbV = VADD(Tb1, Tb4);
Chris@42 2730 TbW = VFMA(LDK(KP555570233), TbU, VMUL(LDK(KP831469612), TbV));
Chris@42 2731 Tc6 = VFNMS(LDK(KP555570233), TbV, VMUL(LDK(KP831469612), TbU));
Chris@42 2732 Tbn = VSUB(Tbb, Tbm);
Chris@42 2733 Tbw = VSUB(Tbs, Tbv);
Chris@42 2734 Tbx = VFNMS(LDK(KP980785280), Tbw, VMUL(LDK(KP195090322), Tbn));
Chris@42 2735 TbN = VFMA(LDK(KP195090322), Tbw, VMUL(LDK(KP980785280), Tbn));
Chris@42 2736 }
Chris@42 2737 {
Chris@42 2738 V TaF, Tby, TbP, TbQ;
Chris@42 2739 TaF = VADD(Tap, TaE);
Chris@42 2740 Tby = VADD(Tb6, Tbx);
Chris@42 2741 Tfv = VSUB(TaF, Tby);
Chris@42 2742 STM4(&(ro[46]), Tfv, ovs, &(ro[0]));
Chris@42 2743 Tfw = VADD(TaF, Tby);
Chris@42 2744 STM4(&(ro[14]), Tfw, ovs, &(ro[0]));
Chris@42 2745 TbP = VADD(TbF, TbI);
Chris@42 2746 TbQ = VADD(TbM, TbN);
Chris@42 2747 Tfx = VSUB(TbP, TbQ);
Chris@42 2748 STM4(&(io[46]), Tfx, ovs, &(io[0]));
Chris@42 2749 Tfy = VADD(TbP, TbQ);
Chris@42 2750 STM4(&(io[14]), Tfy, ovs, &(io[0]));
Chris@42 2751 }
Chris@42 2752 {
Chris@42 2753 V TbJ, TbK, TbL, TbO;
Chris@42 2754 TbJ = VSUB(TbF, TbI);
Chris@42 2755 TbK = VSUB(Tbx, Tb6);
Chris@42 2756 Tfz = VSUB(TbJ, TbK);
Chris@42 2757 STM4(&(io[62]), Tfz, ovs, &(io[0]));
Chris@42 2758 TfA = VADD(TbJ, TbK);
Chris@42 2759 STM4(&(io[30]), TfA, ovs, &(io[0]));
Chris@42 2760 TbL = VSUB(Tap, TaE);
Chris@42 2761 TbO = VSUB(TbM, TbN);
Chris@42 2762 TfB = VSUB(TbL, TbO);
Chris@42 2763 STM4(&(ro[62]), TfB, ovs, &(ro[0]));
Chris@42 2764 TfC = VADD(TbL, TbO);
Chris@42 2765 STM4(&(ro[30]), TfC, ovs, &(ro[0]));
Chris@42 2766 }
Chris@42 2767 {
Chris@42 2768 V TbT, Tc0, Tc9, Tca;
Chris@42 2769 TbT = VADD(TbR, TbS);
Chris@42 2770 Tc0 = VADD(TbW, TbZ);
Chris@42 2771 TfD = VSUB(TbT, Tc0);
Chris@42 2772 STM4(&(ro[38]), TfD, ovs, &(ro[0]));
Chris@42 2773 TfE = VADD(TbT, Tc0);
Chris@42 2774 STM4(&(ro[6]), TfE, ovs, &(ro[0]));
Chris@42 2775 Tc9 = VADD(Tc1, Tc2);
Chris@42 2776 Tca = VADD(Tc6, Tc7);
Chris@42 2777 TfF = VSUB(Tc9, Tca);
Chris@42 2778 STM4(&(io[38]), TfF, ovs, &(io[0]));
Chris@42 2779 TfG = VADD(Tc9, Tca);
Chris@42 2780 STM4(&(io[6]), TfG, ovs, &(io[0]));
Chris@42 2781 }
Chris@42 2782 {
Chris@42 2783 V Tc3, Tc4, Tc5, Tc8;
Chris@42 2784 Tc3 = VSUB(Tc1, Tc2);
Chris@42 2785 Tc4 = VSUB(TbZ, TbW);
Chris@42 2786 TfH = VSUB(Tc3, Tc4);
Chris@42 2787 STM4(&(io[54]), TfH, ovs, &(io[0]));
Chris@42 2788 TfI = VADD(Tc3, Tc4);
Chris@42 2789 STM4(&(io[22]), TfI, ovs, &(io[0]));
Chris@42 2790 Tc5 = VSUB(TbR, TbS);
Chris@42 2791 Tc8 = VSUB(Tc6, Tc7);
Chris@42 2792 TfJ = VSUB(Tc5, Tc8);
Chris@42 2793 STM4(&(ro[54]), TfJ, ovs, &(ro[0]));
Chris@42 2794 TfK = VADD(Tc5, Tc8);
Chris@42 2795 STM4(&(ro[22]), TfK, ovs, &(ro[0]));
Chris@42 2796 }
Chris@42 2797 }
Chris@42 2798 {
Chris@42 2799 V T6F, T7h, T7m, T7w, T7p, T7x, T6M, T7s, T6U, T7c, T75, T7r, T78, T7i, T71;
Chris@42 2800 V T7d;
Chris@42 2801 {
Chris@42 2802 V T6D, T6E, T7k, T7l;
Chris@42 2803 T6D = VADD(T37, T3e);
Chris@42 2804 T6E = VADD(T65, T64);
Chris@42 2805 T6F = VSUB(T6D, T6E);
Chris@42 2806 T7h = VADD(T6D, T6E);
Chris@42 2807 T7k = VADD(T6O, T6P);
Chris@42 2808 T7l = VADD(T6R, T6S);
Chris@42 2809 T7m = VFMA(LDK(KP956940335), T7k, VMUL(LDK(KP290284677), T7l));
Chris@42 2810 T7w = VFNMS(LDK(KP290284677), T7k, VMUL(LDK(KP956940335), T7l));
Chris@42 2811 }
Chris@42 2812 {
Chris@42 2813 V T7n, T7o, T6I, T6L;
Chris@42 2814 T7n = VADD(T6V, T6W);
Chris@42 2815 T7o = VADD(T6Y, T6Z);
Chris@42 2816 T7p = VFNMS(LDK(KP290284677), T7o, VMUL(LDK(KP956940335), T7n));
Chris@42 2817 T7x = VFMA(LDK(KP290284677), T7n, VMUL(LDK(KP956940335), T7o));
Chris@42 2818 T6I = VFNMS(LDK(KP555570233), T6H, VMUL(LDK(KP831469612), T6G));
Chris@42 2819 T6L = VFMA(LDK(KP831469612), T6J, VMUL(LDK(KP555570233), T6K));
Chris@42 2820 T6M = VSUB(T6I, T6L);
Chris@42 2821 T7s = VADD(T6I, T6L);
Chris@42 2822 }
Chris@42 2823 {
Chris@42 2824 V T6Q, T6T, T73, T74;
Chris@42 2825 T6Q = VSUB(T6O, T6P);
Chris@42 2826 T6T = VSUB(T6R, T6S);
Chris@42 2827 T6U = VFMA(LDK(KP471396736), T6Q, VMUL(LDK(KP881921264), T6T));
Chris@42 2828 T7c = VFNMS(LDK(KP881921264), T6Q, VMUL(LDK(KP471396736), T6T));
Chris@42 2829 T73 = VADD(T5Z, T62);
Chris@42 2830 T74 = VADD(T3m, T3t);
Chris@42 2831 T75 = VSUB(T73, T74);
Chris@42 2832 T7r = VADD(T73, T74);
Chris@42 2833 }
Chris@42 2834 {
Chris@42 2835 V T76, T77, T6X, T70;
Chris@42 2836 T76 = VFNMS(LDK(KP555570233), T6J, VMUL(LDK(KP831469612), T6K));
Chris@42 2837 T77 = VFMA(LDK(KP555570233), T6G, VMUL(LDK(KP831469612), T6H));
Chris@42 2838 T78 = VSUB(T76, T77);
Chris@42 2839 T7i = VADD(T77, T76);
Chris@42 2840 T6X = VSUB(T6V, T6W);
Chris@42 2841 T70 = VSUB(T6Y, T6Z);
Chris@42 2842 T71 = VFNMS(LDK(KP881921264), T70, VMUL(LDK(KP471396736), T6X));
Chris@42 2843 T7d = VFMA(LDK(KP881921264), T6X, VMUL(LDK(KP471396736), T70));
Chris@42 2844 }
Chris@42 2845 {
Chris@42 2846 V T6N, T72, T7f, T7g;
Chris@42 2847 T6N = VADD(T6F, T6M);
Chris@42 2848 T72 = VADD(T6U, T71);
Chris@42 2849 TfL = VSUB(T6N, T72);
Chris@42 2850 STM4(&(ro[43]), TfL, ovs, &(ro[1]));
Chris@42 2851 TfM = VADD(T6N, T72);
Chris@42 2852 STM4(&(ro[11]), TfM, ovs, &(ro[1]));
Chris@42 2853 T7f = VADD(T75, T78);
Chris@42 2854 T7g = VADD(T7c, T7d);
Chris@42 2855 TfN = VSUB(T7f, T7g);
Chris@42 2856 STM4(&(io[43]), TfN, ovs, &(io[1]));
Chris@42 2857 TfO = VADD(T7f, T7g);
Chris@42 2858 STM4(&(io[11]), TfO, ovs, &(io[1]));
Chris@42 2859 }
Chris@42 2860 {
Chris@42 2861 V T79, T7a, T7b, T7e;
Chris@42 2862 T79 = VSUB(T75, T78);
Chris@42 2863 T7a = VSUB(T71, T6U);
Chris@42 2864 TfP = VSUB(T79, T7a);
Chris@42 2865 STM4(&(io[59]), TfP, ovs, &(io[1]));
Chris@42 2866 TfQ = VADD(T79, T7a);
Chris@42 2867 STM4(&(io[27]), TfQ, ovs, &(io[1]));
Chris@42 2868 T7b = VSUB(T6F, T6M);
Chris@42 2869 T7e = VSUB(T7c, T7d);
Chris@42 2870 TfR = VSUB(T7b, T7e);
Chris@42 2871 STM4(&(ro[59]), TfR, ovs, &(ro[1]));
Chris@42 2872 TfS = VADD(T7b, T7e);
Chris@42 2873 STM4(&(ro[27]), TfS, ovs, &(ro[1]));
Chris@42 2874 }
Chris@42 2875 {
Chris@42 2876 V T7j, T7q, T7z, T7A;
Chris@42 2877 T7j = VADD(T7h, T7i);
Chris@42 2878 T7q = VADD(T7m, T7p);
Chris@42 2879 TfT = VSUB(T7j, T7q);
Chris@42 2880 STM4(&(ro[35]), TfT, ovs, &(ro[1]));
Chris@42 2881 TfU = VADD(T7j, T7q);
Chris@42 2882 STM4(&(ro[3]), TfU, ovs, &(ro[1]));
Chris@42 2883 T7z = VADD(T7r, T7s);
Chris@42 2884 T7A = VADD(T7w, T7x);
Chris@42 2885 TfV = VSUB(T7z, T7A);
Chris@42 2886 STM4(&(io[35]), TfV, ovs, &(io[1]));
Chris@42 2887 TfW = VADD(T7z, T7A);
Chris@42 2888 STM4(&(io[3]), TfW, ovs, &(io[1]));
Chris@42 2889 }
Chris@42 2890 {
Chris@42 2891 V T7t, T7u, T7v, T7y;
Chris@42 2892 T7t = VSUB(T7r, T7s);
Chris@42 2893 T7u = VSUB(T7p, T7m);
Chris@42 2894 TfX = VSUB(T7t, T7u);
Chris@42 2895 STM4(&(io[51]), TfX, ovs, &(io[1]));
Chris@42 2896 TfY = VADD(T7t, T7u);
Chris@42 2897 STM4(&(io[19]), TfY, ovs, &(io[1]));
Chris@42 2898 T7v = VSUB(T7h, T7i);
Chris@42 2899 T7y = VSUB(T7w, T7x);
Chris@42 2900 TfZ = VSUB(T7v, T7y);
Chris@42 2901 STM4(&(ro[51]), TfZ, ovs, &(ro[1]));
Chris@42 2902 Tg0 = VADD(T7v, T7y);
Chris@42 2903 STM4(&(ro[19]), Tg0, ovs, &(ro[1]));
Chris@42 2904 }
Chris@42 2905 }
Chris@42 2906 {
Chris@42 2907 V T9j, T9V, Ta0, Taa, Ta3, Tab, T9q, Ta6, T9y, T9Q, T9J, Ta5, T9M, T9W, T9F;
Chris@42 2908 V T9R;
Chris@42 2909 {
Chris@42 2910 V T9h, T9i, T9Y, T9Z;
Chris@42 2911 T9h = VADD(T7B, T7C);
Chris@42 2912 T9i = VADD(T8J, T8I);
Chris@42 2913 T9j = VSUB(T9h, T9i);
Chris@42 2914 T9V = VADD(T9h, T9i);
Chris@42 2915 T9Y = VADD(T9s, T9t);
Chris@42 2916 T9Z = VADD(T9v, T9w);
Chris@42 2917 Ta0 = VFMA(LDK(KP995184726), T9Y, VMUL(LDK(KP098017140), T9Z));
Chris@42 2918 Taa = VFNMS(LDK(KP098017140), T9Y, VMUL(LDK(KP995184726), T9Z));
Chris@42 2919 }
Chris@42 2920 {
Chris@42 2921 V Ta1, Ta2, T9m, T9p;
Chris@42 2922 Ta1 = VADD(T9z, T9A);
Chris@42 2923 Ta2 = VADD(T9C, T9D);
Chris@42 2924 Ta3 = VFNMS(LDK(KP098017140), Ta2, VMUL(LDK(KP995184726), Ta1));
Chris@42 2925 Tab = VFMA(LDK(KP098017140), Ta1, VMUL(LDK(KP995184726), Ta2));
Chris@42 2926 T9m = VFNMS(LDK(KP195090322), T9l, VMUL(LDK(KP980785280), T9k));
Chris@42 2927 T9p = VFMA(LDK(KP195090322), T9n, VMUL(LDK(KP980785280), T9o));
Chris@42 2928 T9q = VSUB(T9m, T9p);
Chris@42 2929 Ta6 = VADD(T9m, T9p);
Chris@42 2930 }
Chris@42 2931 {
Chris@42 2932 V T9u, T9x, T9H, T9I;
Chris@42 2933 T9u = VSUB(T9s, T9t);
Chris@42 2934 T9x = VSUB(T9v, T9w);
Chris@42 2935 T9y = VFMA(LDK(KP634393284), T9u, VMUL(LDK(KP773010453), T9x));
Chris@42 2936 T9Q = VFNMS(LDK(KP773010453), T9u, VMUL(LDK(KP634393284), T9x));
Chris@42 2937 T9H = VADD(T8F, T8G);
Chris@42 2938 T9I = VADD(T7G, T7J);
Chris@42 2939 T9J = VSUB(T9H, T9I);
Chris@42 2940 Ta5 = VADD(T9H, T9I);
Chris@42 2941 }
Chris@42 2942 {
Chris@42 2943 V T9K, T9L, T9B, T9E;
Chris@42 2944 T9K = VFNMS(LDK(KP195090322), T9o, VMUL(LDK(KP980785280), T9n));
Chris@42 2945 T9L = VFMA(LDK(KP980785280), T9l, VMUL(LDK(KP195090322), T9k));
Chris@42 2946 T9M = VSUB(T9K, T9L);
Chris@42 2947 T9W = VADD(T9L, T9K);
Chris@42 2948 T9B = VSUB(T9z, T9A);
Chris@42 2949 T9E = VSUB(T9C, T9D);
Chris@42 2950 T9F = VFNMS(LDK(KP773010453), T9E, VMUL(LDK(KP634393284), T9B));
Chris@42 2951 T9R = VFMA(LDK(KP773010453), T9B, VMUL(LDK(KP634393284), T9E));
Chris@42 2952 }
Chris@42 2953 {
Chris@42 2954 V T9r, T9G, Tg1, Tg2;
Chris@42 2955 T9r = VADD(T9j, T9q);
Chris@42 2956 T9G = VADD(T9y, T9F);
Chris@42 2957 Tg1 = VSUB(T9r, T9G);
Chris@42 2958 STM4(&(ro[41]), Tg1, ovs, &(ro[1]));
Chris@42 2959 STN4(&(ro[40]), TeR, Tg1, Tff, TfL, ovs);
Chris@42 2960 Tg2 = VADD(T9r, T9G);
Chris@42 2961 STM4(&(ro[9]), Tg2, ovs, &(ro[1]));
Chris@42 2962 STN4(&(ro[8]), TeS, Tg2, Tfg, TfM, ovs);
Chris@42 2963 }
Chris@42 2964 {
Chris@42 2965 V T9T, T9U, Tg3, Tg4;
Chris@42 2966 T9T = VADD(T9J, T9M);
Chris@42 2967 T9U = VADD(T9Q, T9R);
Chris@42 2968 Tg3 = VSUB(T9T, T9U);
Chris@42 2969 STM4(&(io[41]), Tg3, ovs, &(io[1]));
Chris@42 2970 STN4(&(io[40]), TeT, Tg3, Tfh, TfN, ovs);
Chris@42 2971 Tg4 = VADD(T9T, T9U);
Chris@42 2972 STM4(&(io[9]), Tg4, ovs, &(io[1]));
Chris@42 2973 STN4(&(io[8]), TeU, Tg4, Tfi, TfO, ovs);
Chris@42 2974 }
Chris@42 2975 {
Chris@42 2976 V T9N, T9O, Tg5, Tg6;
Chris@42 2977 T9N = VSUB(T9J, T9M);
Chris@42 2978 T9O = VSUB(T9F, T9y);
Chris@42 2979 Tg5 = VSUB(T9N, T9O);
Chris@42 2980 STM4(&(io[57]), Tg5, ovs, &(io[1]));
Chris@42 2981 STN4(&(io[56]), TeV, Tg5, Tfj, TfP, ovs);
Chris@42 2982 Tg6 = VADD(T9N, T9O);
Chris@42 2983 STM4(&(io[25]), Tg6, ovs, &(io[1]));
Chris@42 2984 STN4(&(io[24]), TeW, Tg6, Tfk, TfQ, ovs);
Chris@42 2985 }
Chris@42 2986 {
Chris@42 2987 V T9P, T9S, Tg7, Tg8;
Chris@42 2988 T9P = VSUB(T9j, T9q);
Chris@42 2989 T9S = VSUB(T9Q, T9R);
Chris@42 2990 Tg7 = VSUB(T9P, T9S);
Chris@42 2991 STM4(&(ro[57]), Tg7, ovs, &(ro[1]));
Chris@42 2992 STN4(&(ro[56]), TeX, Tg7, Tfl, TfR, ovs);
Chris@42 2993 Tg8 = VADD(T9P, T9S);
Chris@42 2994 STM4(&(ro[25]), Tg8, ovs, &(ro[1]));
Chris@42 2995 STN4(&(ro[24]), TeY, Tg8, Tfm, TfS, ovs);
Chris@42 2996 }
Chris@42 2997 {
Chris@42 2998 V T9X, Ta4, Tg9, Tga;
Chris@42 2999 T9X = VADD(T9V, T9W);
Chris@42 3000 Ta4 = VADD(Ta0, Ta3);
Chris@42 3001 Tg9 = VSUB(T9X, Ta4);
Chris@42 3002 STM4(&(ro[33]), Tg9, ovs, &(ro[1]));
Chris@42 3003 STN4(&(ro[32]), TeJ, Tg9, Tfn, TfT, ovs);
Chris@42 3004 Tga = VADD(T9X, Ta4);
Chris@42 3005 STM4(&(ro[1]), Tga, ovs, &(ro[1]));
Chris@42 3006 STN4(&(ro[0]), TeL, Tga, Tfo, TfU, ovs);
Chris@42 3007 }
Chris@42 3008 {
Chris@42 3009 V Tad, Tae, Tgb, Tgc;
Chris@42 3010 Tad = VADD(Ta5, Ta6);
Chris@42 3011 Tae = VADD(Taa, Tab);
Chris@42 3012 Tgb = VSUB(Tad, Tae);
Chris@42 3013 STM4(&(io[33]), Tgb, ovs, &(io[1]));
Chris@42 3014 STN4(&(io[32]), TeK, Tgb, Tfp, TfV, ovs);
Chris@42 3015 Tgc = VADD(Tad, Tae);
Chris@42 3016 STM4(&(io[1]), Tgc, ovs, &(io[1]));
Chris@42 3017 STN4(&(io[0]), TeM, Tgc, Tfq, TfW, ovs);
Chris@42 3018 }
Chris@42 3019 {
Chris@42 3020 V Ta7, Ta8, Tgd, Tge;
Chris@42 3021 Ta7 = VSUB(Ta5, Ta6);
Chris@42 3022 Ta8 = VSUB(Ta3, Ta0);
Chris@42 3023 Tgd = VSUB(Ta7, Ta8);
Chris@42 3024 STM4(&(io[49]), Tgd, ovs, &(io[1]));
Chris@42 3025 STN4(&(io[48]), TeP, Tgd, Tfr, TfX, ovs);
Chris@42 3026 Tge = VADD(Ta7, Ta8);
Chris@42 3027 STM4(&(io[17]), Tge, ovs, &(io[1]));
Chris@42 3028 STN4(&(io[16]), TeN, Tge, Tfs, TfY, ovs);
Chris@42 3029 }
Chris@42 3030 {
Chris@42 3031 V Ta9, Tac, Tgf, Tgg;
Chris@42 3032 Ta9 = VSUB(T9V, T9W);
Chris@42 3033 Tac = VSUB(Taa, Tab);
Chris@42 3034 Tgf = VSUB(Ta9, Tac);
Chris@42 3035 STM4(&(ro[49]), Tgf, ovs, &(ro[1]));
Chris@42 3036 STN4(&(ro[48]), TeQ, Tgf, Tft, TfZ, ovs);
Chris@42 3037 Tgg = VADD(Ta9, Tac);
Chris@42 3038 STM4(&(ro[17]), Tgg, ovs, &(ro[1]));
Chris@42 3039 STN4(&(ro[16]), TeO, Tgg, Tfu, Tg0, ovs);
Chris@42 3040 }
Chris@42 3041 }
Chris@42 3042 {
Chris@42 3043 V Tgh, Tgi, Tgj, Tgk, Tgl, Tgm, Tgn, Tgo, Tgp, Tgq, Tgr, Tgs, Tgt, Tgu, Tgv;
Chris@42 3044 V Tgw;
Chris@42 3045 {
Chris@42 3046 V T3v, T6j, T6o, T6y, T6r, T6z, T48, T6u, T52, T6e, T67, T6t, T6a, T6k, T5V;
Chris@42 3047 V T6f;
Chris@42 3048 {
Chris@42 3049 V T3f, T3u, T6m, T6n;
Chris@42 3050 T3f = VSUB(T37, T3e);
Chris@42 3051 T3u = VSUB(T3m, T3t);
Chris@42 3052 T3v = VSUB(T3f, T3u);
Chris@42 3053 T6j = VADD(T3f, T3u);
Chris@42 3054 T6m = VADD(T4q, T4N);
Chris@42 3055 T6n = VADD(T4X, T50);
Chris@42 3056 T6o = VFMA(LDK(KP634393284), T6m, VMUL(LDK(KP773010453), T6n));
Chris@42 3057 T6y = VFNMS(LDK(KP634393284), T6n, VMUL(LDK(KP773010453), T6m));
Chris@42 3058 }
Chris@42 3059 {
Chris@42 3060 V T6p, T6q, T3O, T47;
Chris@42 3061 T6p = VADD(T5j, T5G);
Chris@42 3062 T6q = VADD(T5Q, T5T);
Chris@42 3063 T6r = VFNMS(LDK(KP634393284), T6q, VMUL(LDK(KP773010453), T6p));
Chris@42 3064 T6z = VFMA(LDK(KP773010453), T6q, VMUL(LDK(KP634393284), T6p));
Chris@42 3065 T3O = VFNMS(LDK(KP980785280), T3N, VMUL(LDK(KP195090322), T3G));
Chris@42 3066 T47 = VFMA(LDK(KP195090322), T3Z, VMUL(LDK(KP980785280), T46));
Chris@42 3067 T48 = VSUB(T3O, T47);
Chris@42 3068 T6u = VADD(T3O, T47);
Chris@42 3069 }
Chris@42 3070 {
Chris@42 3071 V T4O, T51, T63, T66;
Chris@42 3072 T4O = VSUB(T4q, T4N);
Chris@42 3073 T51 = VSUB(T4X, T50);
Chris@42 3074 T52 = VFMA(LDK(KP995184726), T4O, VMUL(LDK(KP098017140), T51));
Chris@42 3075 T6e = VFNMS(LDK(KP995184726), T51, VMUL(LDK(KP098017140), T4O));
Chris@42 3076 T63 = VSUB(T5Z, T62);
Chris@42 3077 T66 = VSUB(T64, T65);
Chris@42 3078 T67 = VSUB(T63, T66);
Chris@42 3079 T6t = VADD(T63, T66);
Chris@42 3080 }
Chris@42 3081 {
Chris@42 3082 V T68, T69, T5H, T5U;
Chris@42 3083 T68 = VFNMS(LDK(KP980785280), T3Z, VMUL(LDK(KP195090322), T46));
Chris@42 3084 T69 = VFMA(LDK(KP980785280), T3G, VMUL(LDK(KP195090322), T3N));
Chris@42 3085 T6a = VSUB(T68, T69);
Chris@42 3086 T6k = VADD(T69, T68);
Chris@42 3087 T5H = VSUB(T5j, T5G);
Chris@42 3088 T5U = VSUB(T5Q, T5T);
Chris@42 3089 T5V = VFNMS(LDK(KP995184726), T5U, VMUL(LDK(KP098017140), T5H));
Chris@42 3090 T6f = VFMA(LDK(KP098017140), T5U, VMUL(LDK(KP995184726), T5H));
Chris@42 3091 }
Chris@42 3092 {
Chris@42 3093 V T49, T5W, T6h, T6i;
Chris@42 3094 T49 = VADD(T3v, T48);
Chris@42 3095 T5W = VADD(T52, T5V);
Chris@42 3096 Tgh = VSUB(T49, T5W);
Chris@42 3097 STM4(&(ro[47]), Tgh, ovs, &(ro[1]));
Chris@42 3098 Tgi = VADD(T49, T5W);
Chris@42 3099 STM4(&(ro[15]), Tgi, ovs, &(ro[1]));
Chris@42 3100 T6h = VADD(T67, T6a);
Chris@42 3101 T6i = VADD(T6e, T6f);
Chris@42 3102 Tgj = VSUB(T6h, T6i);
Chris@42 3103 STM4(&(io[47]), Tgj, ovs, &(io[1]));
Chris@42 3104 Tgk = VADD(T6h, T6i);
Chris@42 3105 STM4(&(io[15]), Tgk, ovs, &(io[1]));
Chris@42 3106 }
Chris@42 3107 {
Chris@42 3108 V T6b, T6c, T6d, T6g;
Chris@42 3109 T6b = VSUB(T67, T6a);
Chris@42 3110 T6c = VSUB(T5V, T52);
Chris@42 3111 Tgl = VSUB(T6b, T6c);
Chris@42 3112 STM4(&(io[63]), Tgl, ovs, &(io[1]));
Chris@42 3113 Tgm = VADD(T6b, T6c);
Chris@42 3114 STM4(&(io[31]), Tgm, ovs, &(io[1]));
Chris@42 3115 T6d = VSUB(T3v, T48);
Chris@42 3116 T6g = VSUB(T6e, T6f);
Chris@42 3117 Tgn = VSUB(T6d, T6g);
Chris@42 3118 STM4(&(ro[63]), Tgn, ovs, &(ro[1]));
Chris@42 3119 Tgo = VADD(T6d, T6g);
Chris@42 3120 STM4(&(ro[31]), Tgo, ovs, &(ro[1]));
Chris@42 3121 }
Chris@42 3122 {
Chris@42 3123 V T6l, T6s, T6B, T6C;
Chris@42 3124 T6l = VADD(T6j, T6k);
Chris@42 3125 T6s = VADD(T6o, T6r);
Chris@42 3126 Tgp = VSUB(T6l, T6s);
Chris@42 3127 STM4(&(ro[39]), Tgp, ovs, &(ro[1]));
Chris@42 3128 Tgq = VADD(T6l, T6s);
Chris@42 3129 STM4(&(ro[7]), Tgq, ovs, &(ro[1]));
Chris@42 3130 T6B = VADD(T6t, T6u);
Chris@42 3131 T6C = VADD(T6y, T6z);
Chris@42 3132 Tgr = VSUB(T6B, T6C);
Chris@42 3133 STM4(&(io[39]), Tgr, ovs, &(io[1]));
Chris@42 3134 Tgs = VADD(T6B, T6C);
Chris@42 3135 STM4(&(io[7]), Tgs, ovs, &(io[1]));
Chris@42 3136 }
Chris@42 3137 {
Chris@42 3138 V T6v, T6w, T6x, T6A;
Chris@42 3139 T6v = VSUB(T6t, T6u);
Chris@42 3140 T6w = VSUB(T6r, T6o);
Chris@42 3141 Tgt = VSUB(T6v, T6w);
Chris@42 3142 STM4(&(io[55]), Tgt, ovs, &(io[1]));
Chris@42 3143 Tgu = VADD(T6v, T6w);
Chris@42 3144 STM4(&(io[23]), Tgu, ovs, &(io[1]));
Chris@42 3145 T6x = VSUB(T6j, T6k);
Chris@42 3146 T6A = VSUB(T6y, T6z);
Chris@42 3147 Tgv = VSUB(T6x, T6A);
Chris@42 3148 STM4(&(ro[55]), Tgv, ovs, &(ro[1]));
Chris@42 3149 Tgw = VADD(T6x, T6A);
Chris@42 3150 STM4(&(ro[23]), Tgw, ovs, &(ro[1]));
Chris@42 3151 }
Chris@42 3152 }
Chris@42 3153 {
Chris@42 3154 V T7L, T8X, T92, T9c, T95, T9d, T80, T98, T8k, T8S, T8L, T97, T8O, T8Y, T8D;
Chris@42 3155 V T8T;
Chris@42 3156 {
Chris@42 3157 V T7D, T7K, T90, T91;
Chris@42 3158 T7D = VSUB(T7B, T7C);
Chris@42 3159 T7K = VSUB(T7G, T7J);
Chris@42 3160 T7L = VSUB(T7D, T7K);
Chris@42 3161 T8X = VADD(T7D, T7K);
Chris@42 3162 T90 = VADD(T84, T8b);
Chris@42 3163 T91 = VADD(T8f, T8i);
Chris@42 3164 T92 = VFMA(LDK(KP471396736), T90, VMUL(LDK(KP881921264), T91));
Chris@42 3165 T9c = VFNMS(LDK(KP471396736), T91, VMUL(LDK(KP881921264), T90));
Chris@42 3166 }
Chris@42 3167 {
Chris@42 3168 V T93, T94, T7S, T7Z;
Chris@42 3169 T93 = VADD(T8n, T8u);
Chris@42 3170 T94 = VADD(T8y, T8B);
Chris@42 3171 T95 = VFNMS(LDK(KP471396736), T94, VMUL(LDK(KP881921264), T93));
Chris@42 3172 T9d = VFMA(LDK(KP881921264), T94, VMUL(LDK(KP471396736), T93));
Chris@42 3173 T7S = VFNMS(LDK(KP831469612), T7R, VMUL(LDK(KP555570233), T7O));
Chris@42 3174 T7Z = VFMA(LDK(KP831469612), T7V, VMUL(LDK(KP555570233), T7Y));
Chris@42 3175 T80 = VSUB(T7S, T7Z);
Chris@42 3176 T98 = VADD(T7S, T7Z);
Chris@42 3177 }
Chris@42 3178 {
Chris@42 3179 V T8c, T8j, T8H, T8K;
Chris@42 3180 T8c = VSUB(T84, T8b);
Chris@42 3181 T8j = VSUB(T8f, T8i);
Chris@42 3182 T8k = VFMA(LDK(KP956940335), T8c, VMUL(LDK(KP290284677), T8j));
Chris@42 3183 T8S = VFNMS(LDK(KP956940335), T8j, VMUL(LDK(KP290284677), T8c));
Chris@42 3184 T8H = VSUB(T8F, T8G);
Chris@42 3185 T8K = VSUB(T8I, T8J);
Chris@42 3186 T8L = VSUB(T8H, T8K);
Chris@42 3187 T97 = VADD(T8H, T8K);
Chris@42 3188 }
Chris@42 3189 {
Chris@42 3190 V T8M, T8N, T8v, T8C;
Chris@42 3191 T8M = VFNMS(LDK(KP831469612), T7Y, VMUL(LDK(KP555570233), T7V));
Chris@42 3192 T8N = VFMA(LDK(KP555570233), T7R, VMUL(LDK(KP831469612), T7O));
Chris@42 3193 T8O = VSUB(T8M, T8N);
Chris@42 3194 T8Y = VADD(T8N, T8M);
Chris@42 3195 T8v = VSUB(T8n, T8u);
Chris@42 3196 T8C = VSUB(T8y, T8B);
Chris@42 3197 T8D = VFNMS(LDK(KP956940335), T8C, VMUL(LDK(KP290284677), T8v));
Chris@42 3198 T8T = VFMA(LDK(KP290284677), T8C, VMUL(LDK(KP956940335), T8v));
Chris@42 3199 }
Chris@42 3200 {
Chris@42 3201 V T81, T8E, Tgx, Tgy;
Chris@42 3202 T81 = VADD(T7L, T80);
Chris@42 3203 T8E = VADD(T8k, T8D);
Chris@42 3204 Tgx = VSUB(T81, T8E);
Chris@42 3205 STM4(&(ro[45]), Tgx, ovs, &(ro[1]));
Chris@42 3206 STN4(&(ro[44]), TeZ, Tgx, Tfv, Tgh, ovs);
Chris@42 3207 Tgy = VADD(T81, T8E);
Chris@42 3208 STM4(&(ro[13]), Tgy, ovs, &(ro[1]));
Chris@42 3209 STN4(&(ro[12]), Tf0, Tgy, Tfw, Tgi, ovs);
Chris@42 3210 }
Chris@42 3211 {
Chris@42 3212 V T8V, T8W, Tgz, TgA;
Chris@42 3213 T8V = VADD(T8L, T8O);
Chris@42 3214 T8W = VADD(T8S, T8T);
Chris@42 3215 Tgz = VSUB(T8V, T8W);
Chris@42 3216 STM4(&(io[45]), Tgz, ovs, &(io[1]));
Chris@42 3217 STN4(&(io[44]), Tf1, Tgz, Tfx, Tgj, ovs);
Chris@42 3218 TgA = VADD(T8V, T8W);
Chris@42 3219 STM4(&(io[13]), TgA, ovs, &(io[1]));
Chris@42 3220 STN4(&(io[12]), Tf2, TgA, Tfy, Tgk, ovs);
Chris@42 3221 }
Chris@42 3222 {
Chris@42 3223 V T8P, T8Q, TgB, TgC;
Chris@42 3224 T8P = VSUB(T8L, T8O);
Chris@42 3225 T8Q = VSUB(T8D, T8k);
Chris@42 3226 TgB = VSUB(T8P, T8Q);
Chris@42 3227 STM4(&(io[61]), TgB, ovs, &(io[1]));
Chris@42 3228 STN4(&(io[60]), Tf3, TgB, Tfz, Tgl, ovs);
Chris@42 3229 TgC = VADD(T8P, T8Q);
Chris@42 3230 STM4(&(io[29]), TgC, ovs, &(io[1]));
Chris@42 3231 STN4(&(io[28]), Tf4, TgC, TfA, Tgm, ovs);
Chris@42 3232 }
Chris@42 3233 {
Chris@42 3234 V T8R, T8U, TgD, TgE;
Chris@42 3235 T8R = VSUB(T7L, T80);
Chris@42 3236 T8U = VSUB(T8S, T8T);
Chris@42 3237 TgD = VSUB(T8R, T8U);
Chris@42 3238 STM4(&(ro[61]), TgD, ovs, &(ro[1]));
Chris@42 3239 STN4(&(ro[60]), Tf5, TgD, TfB, Tgn, ovs);
Chris@42 3240 TgE = VADD(T8R, T8U);
Chris@42 3241 STM4(&(ro[29]), TgE, ovs, &(ro[1]));
Chris@42 3242 STN4(&(ro[28]), Tf6, TgE, TfC, Tgo, ovs);
Chris@42 3243 }
Chris@42 3244 {
Chris@42 3245 V T8Z, T96, TgF, TgG;
Chris@42 3246 T8Z = VADD(T8X, T8Y);
Chris@42 3247 T96 = VADD(T92, T95);
Chris@42 3248 TgF = VSUB(T8Z, T96);
Chris@42 3249 STM4(&(ro[37]), TgF, ovs, &(ro[1]));
Chris@42 3250 STN4(&(ro[36]), Tf7, TgF, TfD, Tgp, ovs);
Chris@42 3251 TgG = VADD(T8Z, T96);
Chris@42 3252 STM4(&(ro[5]), TgG, ovs, &(ro[1]));
Chris@42 3253 STN4(&(ro[4]), Tf8, TgG, TfE, Tgq, ovs);
Chris@42 3254 }
Chris@42 3255 {
Chris@42 3256 V T9f, T9g, TgH, TgI;
Chris@42 3257 T9f = VADD(T97, T98);
Chris@42 3258 T9g = VADD(T9c, T9d);
Chris@42 3259 TgH = VSUB(T9f, T9g);
Chris@42 3260 STM4(&(io[37]), TgH, ovs, &(io[1]));
Chris@42 3261 STN4(&(io[36]), Tf9, TgH, TfF, Tgr, ovs);
Chris@42 3262 TgI = VADD(T9f, T9g);
Chris@42 3263 STM4(&(io[5]), TgI, ovs, &(io[1]));
Chris@42 3264 STN4(&(io[4]), Tfa, TgI, TfG, Tgs, ovs);
Chris@42 3265 }
Chris@42 3266 {
Chris@42 3267 V T99, T9a, TgJ, TgK;
Chris@42 3268 T99 = VSUB(T97, T98);
Chris@42 3269 T9a = VSUB(T95, T92);
Chris@42 3270 TgJ = VSUB(T99, T9a);
Chris@42 3271 STM4(&(io[53]), TgJ, ovs, &(io[1]));
Chris@42 3272 STN4(&(io[52]), Tfb, TgJ, TfH, Tgt, ovs);
Chris@42 3273 TgK = VADD(T99, T9a);
Chris@42 3274 STM4(&(io[21]), TgK, ovs, &(io[1]));
Chris@42 3275 STN4(&(io[20]), Tfc, TgK, TfI, Tgu, ovs);
Chris@42 3276 }
Chris@42 3277 {
Chris@42 3278 V T9b, T9e, TgL, TgM;
Chris@42 3279 T9b = VSUB(T8X, T8Y);
Chris@42 3280 T9e = VSUB(T9c, T9d);
Chris@42 3281 TgL = VSUB(T9b, T9e);
Chris@42 3282 STM4(&(ro[53]), TgL, ovs, &(ro[1]));
Chris@42 3283 STN4(&(ro[52]), Tfd, TgL, TfJ, Tgv, ovs);
Chris@42 3284 TgM = VADD(T9b, T9e);
Chris@42 3285 STM4(&(ro[21]), TgM, ovs, &(ro[1]));
Chris@42 3286 STN4(&(ro[20]), Tfe, TgM, TfK, Tgw, ovs);
Chris@42 3287 }
Chris@42 3288 }
Chris@42 3289 }
Chris@42 3290 }
Chris@42 3291 }
Chris@42 3292 }
Chris@42 3293 }
Chris@42 3294 VLEAVE();
Chris@42 3295 }
Chris@42 3296
Chris@42 3297 static const kdft_desc desc = { 64, XSIMD_STRING("n2sv_64"), {808, 144, 104, 0}, &GENUS, 0, 1, 0, 0 };
Chris@42 3298
Chris@42 3299 void XSIMD(codelet_n2sv_64) (planner *p) {
Chris@42 3300 X(kdft_register) (p, n2sv_64, &desc);
Chris@42 3301 }
Chris@42 3302
Chris@42 3303 #endif /* HAVE_FMA */