To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t1_64.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (101 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:15 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -n 64 -name t1_64 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 1038 FP additions, 644 FP multiplications,
32
 * (or, 520 additions, 126 multiplications, 518 fused multiply/add),
33
 * 190 stack variables, 15 constants, and 256 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t1_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
40
     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
41
     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
42
     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
43
     DK(KP098491403, +0.098491403357164253077197521291327432293052451);
44
     DK(KP820678790, +0.820678790828660330972281985331011598767386482);
45
     DK(KP303346683, +0.303346683607342391675883946941299872384187453);
46
     DK(KP534511135, +0.534511135950791641089685961295362908582039528);
47
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
48
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
49
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
50
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
51
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
52
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
53
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
54
     {
55
          INT m;
56
          for (m = mb, W = W + (mb * 126); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) {
57
               E Tm, TeM, TjR, Tkl, T7e, TcA, TiV, Tjm, T1G, TeW, TeZ, Ths, T7Q, TcJ, T7X;
58
               E TcI, T29, Tf8, Tf5, Thv, T87, TcN, T8u, TcQ, T5K, Tg9, TfU, ThS, Taq, Tdm;
59
               E Tbj, Tdx, TN, Tjl, TeP, TiP, T7l, TcB, T7s, TcC, T1f, TeR, TeU, Thr, T7B;
60
               E TcG, T7I, TcF, T32, Tfj, Tfg, ThB, T8G, TcU, T93, TcX, T3X, TfI, Tft, ThH;
61
               E T9h, Td3, Taa, Tde, T2A, Tf6, Tfb, Thw, T8m, TcR, T8x, TcO, T3t, Tfh, Tfm;
62
               E ThC, T8V, TcY, T96, TcV, T4o, Tfu, TfL, ThI, T9w, Tdf, Tad, Td4, T6b, TfV;
63
               E Tgc, ThT, TaF, Tdy, Tbm, Tdn, T4Q, ThN, TfA, TfN, Ta1, Tdh, Taf, Td8, T5h;
64
               E ThO, TfF, TfO, T9M, Tdi, Tag, Tdb, T6D, ThY, Tg1, Tge, Tba, TdA, Tbo, Tdr;
65
               E T74, ThZ, Tg6, Tgf, TaV, TdB, Tbp, Tdu;
66
               {
67
                    E T1, TiT, T7, TiS, Te, T7a, Tk, T7c;
68
                    T1 = ri[0];
69
                    TiT = ii[0];
70
                    {
71
                         E T3, T6, T4, TiR, T2, T5;
72
                         T3 = ri[WS(rs, 32)];
73
                         T6 = ii[WS(rs, 32)];
74
                         T2 = W[62];
75
                         T4 = T2 * T3;
76
                         TiR = T2 * T6;
77
                         T5 = W[63];
78
                         T7 = FMA(T5, T6, T4);
79
                         TiS = FNMS(T5, T3, TiR);
80
                    }
81
                    {
82
                         E Ta, Td, Tb, T79, T9, Tc;
83
                         Ta = ri[WS(rs, 16)];
84
                         Td = ii[WS(rs, 16)];
85
                         T9 = W[30];
86
                         Tb = T9 * Ta;
87
                         T79 = T9 * Td;
88
                         Tc = W[31];
89
                         Te = FMA(Tc, Td, Tb);
90
                         T7a = FNMS(Tc, Ta, T79);
91
                    }
92
                    {
93
                         E Tg, Tj, Th, T7b, Tf, Ti;
94
                         Tg = ri[WS(rs, 48)];
95
                         Tj = ii[WS(rs, 48)];
96
                         Tf = W[94];
97
                         Th = Tf * Tg;
98
                         T7b = Tf * Tj;
99
                         Ti = W[95];
100
                         Tk = FMA(Ti, Tj, Th);
101
                         T7c = FNMS(Ti, Tg, T7b);
102
                    }
103
                    {
104
                         E T8, Tl, TjP, TjQ;
105
                         T8 = T1 + T7;
106
                         Tl = Te + Tk;
107
                         Tm = T8 + Tl;
108
                         TeM = T8 - Tl;
109
                         TjP = TiT - TiS;
110
                         TjQ = Te - Tk;
111
                         TjR = TjP - TjQ;
112
                         Tkl = TjQ + TjP;
113
                    }
114
                    {
115
                         E T78, T7d, TiQ, TiU;
116
                         T78 = T1 - T7;
117
                         T7d = T7a - T7c;
118
                         T7e = T78 - T7d;
119
                         TcA = T78 + T7d;
120
                         TiQ = T7a + T7c;
121
                         TiU = TiS + TiT;
122
                         TiV = TiQ + TiU;
123
                         Tjm = TiU - TiQ;
124
                    }
125
               }
126
               {
127
                    E T1l, T7L, T1E, T7V, T1r, T7N, T1y, T7T;
128
                    {
129
                         E T1h, T1k, T1i, T7K, T1g, T1j;
130
                         T1h = ri[WS(rs, 60)];
131
                         T1k = ii[WS(rs, 60)];
132
                         T1g = W[118];
133
                         T1i = T1g * T1h;
134
                         T7K = T1g * T1k;
135
                         T1j = W[119];
136
                         T1l = FMA(T1j, T1k, T1i);
137
                         T7L = FNMS(T1j, T1h, T7K);
138
                    }
139
                    {
140
                         E T1A, T1D, T1B, T7U, T1z, T1C;
141
                         T1A = ri[WS(rs, 44)];
142
                         T1D = ii[WS(rs, 44)];
143
                         T1z = W[86];
144
                         T1B = T1z * T1A;
145
                         T7U = T1z * T1D;
146
                         T1C = W[87];
147
                         T1E = FMA(T1C, T1D, T1B);
148
                         T7V = FNMS(T1C, T1A, T7U);
149
                    }
150
                    {
151
                         E T1n, T1q, T1o, T7M, T1m, T1p;
152
                         T1n = ri[WS(rs, 28)];
153
                         T1q = ii[WS(rs, 28)];
154
                         T1m = W[54];
155
                         T1o = T1m * T1n;
156
                         T7M = T1m * T1q;
157
                         T1p = W[55];
158
                         T1r = FMA(T1p, T1q, T1o);
159
                         T7N = FNMS(T1p, T1n, T7M);
160
                    }
161
                    {
162
                         E T1u, T1x, T1v, T7S, T1t, T1w;
163
                         T1u = ri[WS(rs, 12)];
164
                         T1x = ii[WS(rs, 12)];
165
                         T1t = W[22];
166
                         T1v = T1t * T1u;
167
                         T7S = T1t * T1x;
168
                         T1w = W[23];
169
                         T1y = FMA(T1w, T1x, T1v);
170
                         T7T = FNMS(T1w, T1u, T7S);
171
                    }
172
                    {
173
                         E T1s, T1F, TeX, TeY;
174
                         T1s = T1l + T1r;
175
                         T1F = T1y + T1E;
176
                         T1G = T1s + T1F;
177
                         TeW = T1s - T1F;
178
                         TeX = T7L + T7N;
179
                         TeY = T7T + T7V;
180
                         TeZ = TeX - TeY;
181
                         Ths = TeX + TeY;
182
                    }
183
                    {
184
                         E T7O, T7P, T7R, T7W;
185
                         T7O = T7L - T7N;
186
                         T7P = T1y - T1E;
187
                         T7Q = T7O + T7P;
188
                         TcJ = T7O - T7P;
189
                         T7R = T1l - T1r;
190
                         T7W = T7T - T7V;
191
                         T7X = T7R - T7W;
192
                         TcI = T7R + T7W;
193
                    }
194
               }
195
               {
196
                    E T1O, T82, T27, T8s, T1U, T84, T21, T8q;
197
                    {
198
                         E T1K, T1N, T1L, T81, T1J, T1M;
199
                         T1K = ri[WS(rs, 2)];
200
                         T1N = ii[WS(rs, 2)];
201
                         T1J = W[2];
202
                         T1L = T1J * T1K;
203
                         T81 = T1J * T1N;
204
                         T1M = W[3];
205
                         T1O = FMA(T1M, T1N, T1L);
206
                         T82 = FNMS(T1M, T1K, T81);
207
                    }
208
                    {
209
                         E T23, T26, T24, T8r, T22, T25;
210
                         T23 = ri[WS(rs, 50)];
211
                         T26 = ii[WS(rs, 50)];
212
                         T22 = W[98];
213
                         T24 = T22 * T23;
214
                         T8r = T22 * T26;
215
                         T25 = W[99];
216
                         T27 = FMA(T25, T26, T24);
217
                         T8s = FNMS(T25, T23, T8r);
218
                    }
219
                    {
220
                         E T1Q, T1T, T1R, T83, T1P, T1S;
221
                         T1Q = ri[WS(rs, 34)];
222
                         T1T = ii[WS(rs, 34)];
223
                         T1P = W[66];
224
                         T1R = T1P * T1Q;
225
                         T83 = T1P * T1T;
226
                         T1S = W[67];
227
                         T1U = FMA(T1S, T1T, T1R);
228
                         T84 = FNMS(T1S, T1Q, T83);
229
                    }
230
                    {
231
                         E T1X, T20, T1Y, T8p, T1W, T1Z;
232
                         T1X = ri[WS(rs, 18)];
233
                         T20 = ii[WS(rs, 18)];
234
                         T1W = W[34];
235
                         T1Y = T1W * T1X;
236
                         T8p = T1W * T20;
237
                         T1Z = W[35];
238
                         T21 = FMA(T1Z, T20, T1Y);
239
                         T8q = FNMS(T1Z, T1X, T8p);
240
                    }
241
                    {
242
                         E T1V, T28, Tf3, Tf4;
243
                         T1V = T1O + T1U;
244
                         T28 = T21 + T27;
245
                         T29 = T1V + T28;
246
                         Tf8 = T1V - T28;
247
                         Tf3 = T82 + T84;
248
                         Tf4 = T8q + T8s;
249
                         Tf5 = Tf3 - Tf4;
250
                         Thv = Tf3 + Tf4;
251
                    }
252
                    {
253
                         E T85, T86, T8o, T8t;
254
                         T85 = T82 - T84;
255
                         T86 = T21 - T27;
256
                         T87 = T85 + T86;
257
                         TcN = T85 - T86;
258
                         T8o = T1O - T1U;
259
                         T8t = T8q - T8s;
260
                         T8u = T8o - T8t;
261
                         TcQ = T8o + T8t;
262
                    }
263
               }
264
               {
265
                    E T5p, Tal, T5I, Tbh, T5v, Tan, T5C, Tbf;
266
                    {
267
                         E T5l, T5o, T5m, Tak, T5k, T5n;
268
                         T5l = ri[WS(rs, 63)];
269
                         T5o = ii[WS(rs, 63)];
270
                         T5k = W[124];
271
                         T5m = T5k * T5l;
272
                         Tak = T5k * T5o;
273
                         T5n = W[125];
274
                         T5p = FMA(T5n, T5o, T5m);
275
                         Tal = FNMS(T5n, T5l, Tak);
276
                    }
277
                    {
278
                         E T5E, T5H, T5F, Tbg, T5D, T5G;
279
                         T5E = ri[WS(rs, 47)];
280
                         T5H = ii[WS(rs, 47)];
281
                         T5D = W[92];
282
                         T5F = T5D * T5E;
283
                         Tbg = T5D * T5H;
284
                         T5G = W[93];
285
                         T5I = FMA(T5G, T5H, T5F);
286
                         Tbh = FNMS(T5G, T5E, Tbg);
287
                    }
288
                    {
289
                         E T5r, T5u, T5s, Tam, T5q, T5t;
290
                         T5r = ri[WS(rs, 31)];
291
                         T5u = ii[WS(rs, 31)];
292
                         T5q = W[60];
293
                         T5s = T5q * T5r;
294
                         Tam = T5q * T5u;
295
                         T5t = W[61];
296
                         T5v = FMA(T5t, T5u, T5s);
297
                         Tan = FNMS(T5t, T5r, Tam);
298
                    }
299
                    {
300
                         E T5y, T5B, T5z, Tbe, T5x, T5A;
301
                         T5y = ri[WS(rs, 15)];
302
                         T5B = ii[WS(rs, 15)];
303
                         T5x = W[28];
304
                         T5z = T5x * T5y;
305
                         Tbe = T5x * T5B;
306
                         T5A = W[29];
307
                         T5C = FMA(T5A, T5B, T5z);
308
                         Tbf = FNMS(T5A, T5y, Tbe);
309
                    }
310
                    {
311
                         E T5w, T5J, TfS, TfT;
312
                         T5w = T5p + T5v;
313
                         T5J = T5C + T5I;
314
                         T5K = T5w + T5J;
315
                         Tg9 = T5w - T5J;
316
                         TfS = Tal + Tan;
317
                         TfT = Tbf + Tbh;
318
                         TfU = TfS - TfT;
319
                         ThS = TfS + TfT;
320
                    }
321
                    {
322
                         E Tao, Tap, Tbd, Tbi;
323
                         Tao = Tal - Tan;
324
                         Tap = T5C - T5I;
325
                         Taq = Tao + Tap;
326
                         Tdm = Tao - Tap;
327
                         Tbd = T5p - T5v;
328
                         Tbi = Tbf - Tbh;
329
                         Tbj = Tbd - Tbi;
330
                         Tdx = Tbd + Tbi;
331
                    }
332
               }
333
               {
334
                    E Ts, T7g, TL, T7q, Ty, T7i, TF, T7o;
335
                    {
336
                         E To, Tr, Tp, T7f, Tn, Tq;
337
                         To = ri[WS(rs, 8)];
338
                         Tr = ii[WS(rs, 8)];
339
                         Tn = W[14];
340
                         Tp = Tn * To;
341
                         T7f = Tn * Tr;
342
                         Tq = W[15];
343
                         Ts = FMA(Tq, Tr, Tp);
344
                         T7g = FNMS(Tq, To, T7f);
345
                    }
346
                    {
347
                         E TH, TK, TI, T7p, TG, TJ;
348
                         TH = ri[WS(rs, 24)];
349
                         TK = ii[WS(rs, 24)];
350
                         TG = W[46];
351
                         TI = TG * TH;
352
                         T7p = TG * TK;
353
                         TJ = W[47];
354
                         TL = FMA(TJ, TK, TI);
355
                         T7q = FNMS(TJ, TH, T7p);
356
                    }
357
                    {
358
                         E Tu, Tx, Tv, T7h, Tt, Tw;
359
                         Tu = ri[WS(rs, 40)];
360
                         Tx = ii[WS(rs, 40)];
361
                         Tt = W[78];
362
                         Tv = Tt * Tu;
363
                         T7h = Tt * Tx;
364
                         Tw = W[79];
365
                         Ty = FMA(Tw, Tx, Tv);
366
                         T7i = FNMS(Tw, Tu, T7h);
367
                    }
368
                    {
369
                         E TB, TE, TC, T7n, TA, TD;
370
                         TB = ri[WS(rs, 56)];
371
                         TE = ii[WS(rs, 56)];
372
                         TA = W[110];
373
                         TC = TA * TB;
374
                         T7n = TA * TE;
375
                         TD = W[111];
376
                         TF = FMA(TD, TE, TC);
377
                         T7o = FNMS(TD, TB, T7n);
378
                    }
379
                    {
380
                         E Tz, TM, TeN, TeO;
381
                         Tz = Ts + Ty;
382
                         TM = TF + TL;
383
                         TN = Tz + TM;
384
                         Tjl = TM - Tz;
385
                         TeN = T7g + T7i;
386
                         TeO = T7o + T7q;
387
                         TeP = TeN - TeO;
388
                         TiP = TeN + TeO;
389
                    }
390
                    {
391
                         E T7j, T7k, T7m, T7r;
392
                         T7j = T7g - T7i;
393
                         T7k = Ts - Ty;
394
                         T7l = T7j - T7k;
395
                         TcB = T7k + T7j;
396
                         T7m = TF - TL;
397
                         T7r = T7o - T7q;
398
                         T7s = T7m + T7r;
399
                         TcC = T7m - T7r;
400
                    }
401
               }
402
               {
403
                    E TU, T7w, T1d, T7G, T10, T7y, T17, T7E;
404
                    {
405
                         E TQ, TT, TR, T7v, TP, TS;
406
                         TQ = ri[WS(rs, 4)];
407
                         TT = ii[WS(rs, 4)];
408
                         TP = W[6];
409
                         TR = TP * TQ;
410
                         T7v = TP * TT;
411
                         TS = W[7];
412
                         TU = FMA(TS, TT, TR);
413
                         T7w = FNMS(TS, TQ, T7v);
414
                    }
415
                    {
416
                         E T19, T1c, T1a, T7F, T18, T1b;
417
                         T19 = ri[WS(rs, 52)];
418
                         T1c = ii[WS(rs, 52)];
419
                         T18 = W[102];
420
                         T1a = T18 * T19;
421
                         T7F = T18 * T1c;
422
                         T1b = W[103];
423
                         T1d = FMA(T1b, T1c, T1a);
424
                         T7G = FNMS(T1b, T19, T7F);
425
                    }
426
                    {
427
                         E TW, TZ, TX, T7x, TV, TY;
428
                         TW = ri[WS(rs, 36)];
429
                         TZ = ii[WS(rs, 36)];
430
                         TV = W[70];
431
                         TX = TV * TW;
432
                         T7x = TV * TZ;
433
                         TY = W[71];
434
                         T10 = FMA(TY, TZ, TX);
435
                         T7y = FNMS(TY, TW, T7x);
436
                    }
437
                    {
438
                         E T13, T16, T14, T7D, T12, T15;
439
                         T13 = ri[WS(rs, 20)];
440
                         T16 = ii[WS(rs, 20)];
441
                         T12 = W[38];
442
                         T14 = T12 * T13;
443
                         T7D = T12 * T16;
444
                         T15 = W[39];
445
                         T17 = FMA(T15, T16, T14);
446
                         T7E = FNMS(T15, T13, T7D);
447
                    }
448
                    {
449
                         E T11, T1e, TeS, TeT;
450
                         T11 = TU + T10;
451
                         T1e = T17 + T1d;
452
                         T1f = T11 + T1e;
453
                         TeR = T11 - T1e;
454
                         TeS = T7w + T7y;
455
                         TeT = T7E + T7G;
456
                         TeU = TeS - TeT;
457
                         Thr = TeS + TeT;
458
                    }
459
                    {
460
                         E T7z, T7A, T7C, T7H;
461
                         T7z = T7w - T7y;
462
                         T7A = T17 - T1d;
463
                         T7B = T7z + T7A;
464
                         TcG = T7z - T7A;
465
                         T7C = TU - T10;
466
                         T7H = T7E - T7G;
467
                         T7I = T7C - T7H;
468
                         TcF = T7C + T7H;
469
                    }
470
               }
471
               {
472
                    E T2H, T8B, T30, T91, T2N, T8D, T2U, T8Z;
473
                    {
474
                         E T2D, T2G, T2E, T8A, T2C, T2F;
475
                         T2D = ri[WS(rs, 62)];
476
                         T2G = ii[WS(rs, 62)];
477
                         T2C = W[122];
478
                         T2E = T2C * T2D;
479
                         T8A = T2C * T2G;
480
                         T2F = W[123];
481
                         T2H = FMA(T2F, T2G, T2E);
482
                         T8B = FNMS(T2F, T2D, T8A);
483
                    }
484
                    {
485
                         E T2W, T2Z, T2X, T90, T2V, T2Y;
486
                         T2W = ri[WS(rs, 46)];
487
                         T2Z = ii[WS(rs, 46)];
488
                         T2V = W[90];
489
                         T2X = T2V * T2W;
490
                         T90 = T2V * T2Z;
491
                         T2Y = W[91];
492
                         T30 = FMA(T2Y, T2Z, T2X);
493
                         T91 = FNMS(T2Y, T2W, T90);
494
                    }
495
                    {
496
                         E T2J, T2M, T2K, T8C, T2I, T2L;
497
                         T2J = ri[WS(rs, 30)];
498
                         T2M = ii[WS(rs, 30)];
499
                         T2I = W[58];
500
                         T2K = T2I * T2J;
501
                         T8C = T2I * T2M;
502
                         T2L = W[59];
503
                         T2N = FMA(T2L, T2M, T2K);
504
                         T8D = FNMS(T2L, T2J, T8C);
505
                    }
506
                    {
507
                         E T2Q, T2T, T2R, T8Y, T2P, T2S;
508
                         T2Q = ri[WS(rs, 14)];
509
                         T2T = ii[WS(rs, 14)];
510
                         T2P = W[26];
511
                         T2R = T2P * T2Q;
512
                         T8Y = T2P * T2T;
513
                         T2S = W[27];
514
                         T2U = FMA(T2S, T2T, T2R);
515
                         T8Z = FNMS(T2S, T2Q, T8Y);
516
                    }
517
                    {
518
                         E T2O, T31, Tfe, Tff;
519
                         T2O = T2H + T2N;
520
                         T31 = T2U + T30;
521
                         T32 = T2O + T31;
522
                         Tfj = T2O - T31;
523
                         Tfe = T8B + T8D;
524
                         Tff = T8Z + T91;
525
                         Tfg = Tfe - Tff;
526
                         ThB = Tfe + Tff;
527
                    }
528
                    {
529
                         E T8E, T8F, T8X, T92;
530
                         T8E = T8B - T8D;
531
                         T8F = T2U - T30;
532
                         T8G = T8E + T8F;
533
                         TcU = T8E - T8F;
534
                         T8X = T2H - T2N;
535
                         T92 = T8Z - T91;
536
                         T93 = T8X - T92;
537
                         TcX = T8X + T92;
538
                    }
539
               }
540
               {
541
                    E T3C, T9c, T3V, Ta8, T3I, T9e, T3P, Ta6;
542
                    {
543
                         E T3y, T3B, T3z, T9b, T3x, T3A;
544
                         T3y = ri[WS(rs, 1)];
545
                         T3B = ii[WS(rs, 1)];
546
                         T3x = W[0];
547
                         T3z = T3x * T3y;
548
                         T9b = T3x * T3B;
549
                         T3A = W[1];
550
                         T3C = FMA(T3A, T3B, T3z);
551
                         T9c = FNMS(T3A, T3y, T9b);
552
                    }
553
                    {
554
                         E T3R, T3U, T3S, Ta7, T3Q, T3T;
555
                         T3R = ri[WS(rs, 49)];
556
                         T3U = ii[WS(rs, 49)];
557
                         T3Q = W[96];
558
                         T3S = T3Q * T3R;
559
                         Ta7 = T3Q * T3U;
560
                         T3T = W[97];
561
                         T3V = FMA(T3T, T3U, T3S);
562
                         Ta8 = FNMS(T3T, T3R, Ta7);
563
                    }
564
                    {
565
                         E T3E, T3H, T3F, T9d, T3D, T3G;
566
                         T3E = ri[WS(rs, 33)];
567
                         T3H = ii[WS(rs, 33)];
568
                         T3D = W[64];
569
                         T3F = T3D * T3E;
570
                         T9d = T3D * T3H;
571
                         T3G = W[65];
572
                         T3I = FMA(T3G, T3H, T3F);
573
                         T9e = FNMS(T3G, T3E, T9d);
574
                    }
575
                    {
576
                         E T3L, T3O, T3M, Ta5, T3K, T3N;
577
                         T3L = ri[WS(rs, 17)];
578
                         T3O = ii[WS(rs, 17)];
579
                         T3K = W[32];
580
                         T3M = T3K * T3L;
581
                         Ta5 = T3K * T3O;
582
                         T3N = W[33];
583
                         T3P = FMA(T3N, T3O, T3M);
584
                         Ta6 = FNMS(T3N, T3L, Ta5);
585
                    }
586
                    {
587
                         E T3J, T3W, Tfr, Tfs;
588
                         T3J = T3C + T3I;
589
                         T3W = T3P + T3V;
590
                         T3X = T3J + T3W;
591
                         TfI = T3J - T3W;
592
                         Tfr = T9c + T9e;
593
                         Tfs = Ta6 + Ta8;
594
                         Tft = Tfr - Tfs;
595
                         ThH = Tfr + Tfs;
596
                    }
597
                    {
598
                         E T9f, T9g, Ta4, Ta9;
599
                         T9f = T9c - T9e;
600
                         T9g = T3P - T3V;
601
                         T9h = T9f + T9g;
602
                         Td3 = T9f - T9g;
603
                         Ta4 = T3C - T3I;
604
                         Ta9 = Ta6 - Ta8;
605
                         Taa = Ta4 - Ta9;
606
                         Tde = Ta4 + Ta9;
607
                    }
608
               }
609
               {
610
                    E T2f, T8a, T2y, T8j, T2l, T8c, T2s, T8h;
611
                    {
612
                         E T2b, T2e, T2c, T89, T2a, T2d;
613
                         T2b = ri[WS(rs, 10)];
614
                         T2e = ii[WS(rs, 10)];
615
                         T2a = W[18];
616
                         T2c = T2a * T2b;
617
                         T89 = T2a * T2e;
618
                         T2d = W[19];
619
                         T2f = FMA(T2d, T2e, T2c);
620
                         T8a = FNMS(T2d, T2b, T89);
621
                    }
622
                    {
623
                         E T2u, T2x, T2v, T8i, T2t, T2w;
624
                         T2u = ri[WS(rs, 26)];
625
                         T2x = ii[WS(rs, 26)];
626
                         T2t = W[50];
627
                         T2v = T2t * T2u;
628
                         T8i = T2t * T2x;
629
                         T2w = W[51];
630
                         T2y = FMA(T2w, T2x, T2v);
631
                         T8j = FNMS(T2w, T2u, T8i);
632
                    }
633
                    {
634
                         E T2h, T2k, T2i, T8b, T2g, T2j;
635
                         T2h = ri[WS(rs, 42)];
636
                         T2k = ii[WS(rs, 42)];
637
                         T2g = W[82];
638
                         T2i = T2g * T2h;
639
                         T8b = T2g * T2k;
640
                         T2j = W[83];
641
                         T2l = FMA(T2j, T2k, T2i);
642
                         T8c = FNMS(T2j, T2h, T8b);
643
                    }
644
                    {
645
                         E T2o, T2r, T2p, T8g, T2n, T2q;
646
                         T2o = ri[WS(rs, 58)];
647
                         T2r = ii[WS(rs, 58)];
648
                         T2n = W[114];
649
                         T2p = T2n * T2o;
650
                         T8g = T2n * T2r;
651
                         T2q = W[115];
652
                         T2s = FMA(T2q, T2r, T2p);
653
                         T8h = FNMS(T2q, T2o, T8g);
654
                    }
655
                    {
656
                         E T2m, T2z, Tf9, Tfa;
657
                         T2m = T2f + T2l;
658
                         T2z = T2s + T2y;
659
                         T2A = T2m + T2z;
660
                         Tf6 = T2z - T2m;
661
                         Tf9 = T8a + T8c;
662
                         Tfa = T8h + T8j;
663
                         Tfb = Tf9 - Tfa;
664
                         Thw = Tf9 + Tfa;
665
                         {
666
                              E T8e, T8w, T8l, T8v;
667
                              {
668
                                   E T88, T8d, T8f, T8k;
669
                                   T88 = T2f - T2l;
670
                                   T8d = T8a - T8c;
671
                                   T8e = T88 + T8d;
672
                                   T8w = T8d - T88;
673
                                   T8f = T2s - T2y;
674
                                   T8k = T8h - T8j;
675
                                   T8l = T8f - T8k;
676
                                   T8v = T8f + T8k;
677
                              }
678
                              T8m = T8e - T8l;
679
                              TcR = T8e + T8l;
680
                              T8x = T8v - T8w;
681
                              TcO = T8w + T8v;
682
                         }
683
                    }
684
               }
685
               {
686
                    E T38, T8J, T3r, T8S, T3e, T8L, T3l, T8Q;
687
                    {
688
                         E T34, T37, T35, T8I, T33, T36;
689
                         T34 = ri[WS(rs, 6)];
690
                         T37 = ii[WS(rs, 6)];
691
                         T33 = W[10];
692
                         T35 = T33 * T34;
693
                         T8I = T33 * T37;
694
                         T36 = W[11];
695
                         T38 = FMA(T36, T37, T35);
696
                         T8J = FNMS(T36, T34, T8I);
697
                    }
698
                    {
699
                         E T3n, T3q, T3o, T8R, T3m, T3p;
700
                         T3n = ri[WS(rs, 22)];
701
                         T3q = ii[WS(rs, 22)];
702
                         T3m = W[42];
703
                         T3o = T3m * T3n;
704
                         T8R = T3m * T3q;
705
                         T3p = W[43];
706
                         T3r = FMA(T3p, T3q, T3o);
707
                         T8S = FNMS(T3p, T3n, T8R);
708
                    }
709
                    {
710
                         E T3a, T3d, T3b, T8K, T39, T3c;
711
                         T3a = ri[WS(rs, 38)];
712
                         T3d = ii[WS(rs, 38)];
713
                         T39 = W[74];
714
                         T3b = T39 * T3a;
715
                         T8K = T39 * T3d;
716
                         T3c = W[75];
717
                         T3e = FMA(T3c, T3d, T3b);
718
                         T8L = FNMS(T3c, T3a, T8K);
719
                    }
720
                    {
721
                         E T3h, T3k, T3i, T8P, T3g, T3j;
722
                         T3h = ri[WS(rs, 54)];
723
                         T3k = ii[WS(rs, 54)];
724
                         T3g = W[106];
725
                         T3i = T3g * T3h;
726
                         T8P = T3g * T3k;
727
                         T3j = W[107];
728
                         T3l = FMA(T3j, T3k, T3i);
729
                         T8Q = FNMS(T3j, T3h, T8P);
730
                    }
731
                    {
732
                         E T3f, T3s, Tfk, Tfl;
733
                         T3f = T38 + T3e;
734
                         T3s = T3l + T3r;
735
                         T3t = T3f + T3s;
736
                         Tfh = T3s - T3f;
737
                         Tfk = T8J + T8L;
738
                         Tfl = T8Q + T8S;
739
                         Tfm = Tfk - Tfl;
740
                         ThC = Tfk + Tfl;
741
                         {
742
                              E T8N, T95, T8U, T94;
743
                              {
744
                                   E T8H, T8M, T8O, T8T;
745
                                   T8H = T38 - T3e;
746
                                   T8M = T8J - T8L;
747
                                   T8N = T8H + T8M;
748
                                   T95 = T8M - T8H;
749
                                   T8O = T3l - T3r;
750
                                   T8T = T8Q - T8S;
751
                                   T8U = T8O - T8T;
752
                                   T94 = T8O + T8T;
753
                              }
754
                              T8V = T8N - T8U;
755
                              TcY = T8N + T8U;
756
                              T96 = T94 - T95;
757
                              TcV = T95 + T94;
758
                         }
759
                    }
760
               }
761
               {
762
                    E T43, T9k, T4m, T9t, T49, T9m, T4g, T9r;
763
                    {
764
                         E T3Z, T42, T40, T9j, T3Y, T41;
765
                         T3Z = ri[WS(rs, 9)];
766
                         T42 = ii[WS(rs, 9)];
767
                         T3Y = W[16];
768
                         T40 = T3Y * T3Z;
769
                         T9j = T3Y * T42;
770
                         T41 = W[17];
771
                         T43 = FMA(T41, T42, T40);
772
                         T9k = FNMS(T41, T3Z, T9j);
773
                    }
774
                    {
775
                         E T4i, T4l, T4j, T9s, T4h, T4k;
776
                         T4i = ri[WS(rs, 25)];
777
                         T4l = ii[WS(rs, 25)];
778
                         T4h = W[48];
779
                         T4j = T4h * T4i;
780
                         T9s = T4h * T4l;
781
                         T4k = W[49];
782
                         T4m = FMA(T4k, T4l, T4j);
783
                         T9t = FNMS(T4k, T4i, T9s);
784
                    }
785
                    {
786
                         E T45, T48, T46, T9l, T44, T47;
787
                         T45 = ri[WS(rs, 41)];
788
                         T48 = ii[WS(rs, 41)];
789
                         T44 = W[80];
790
                         T46 = T44 * T45;
791
                         T9l = T44 * T48;
792
                         T47 = W[81];
793
                         T49 = FMA(T47, T48, T46);
794
                         T9m = FNMS(T47, T45, T9l);
795
                    }
796
                    {
797
                         E T4c, T4f, T4d, T9q, T4b, T4e;
798
                         T4c = ri[WS(rs, 57)];
799
                         T4f = ii[WS(rs, 57)];
800
                         T4b = W[112];
801
                         T4d = T4b * T4c;
802
                         T9q = T4b * T4f;
803
                         T4e = W[113];
804
                         T4g = FMA(T4e, T4f, T4d);
805
                         T9r = FNMS(T4e, T4c, T9q);
806
                    }
807
                    {
808
                         E T4a, T4n, TfJ, TfK;
809
                         T4a = T43 + T49;
810
                         T4n = T4g + T4m;
811
                         T4o = T4a + T4n;
812
                         Tfu = T4n - T4a;
813
                         TfJ = T9k + T9m;
814
                         TfK = T9r + T9t;
815
                         TfL = TfJ - TfK;
816
                         ThI = TfJ + TfK;
817
                         {
818
                              E T9o, Tac, T9v, Tab;
819
                              {
820
                                   E T9i, T9n, T9p, T9u;
821
                                   T9i = T43 - T49;
822
                                   T9n = T9k - T9m;
823
                                   T9o = T9i + T9n;
824
                                   Tac = T9n - T9i;
825
                                   T9p = T4g - T4m;
826
                                   T9u = T9r - T9t;
827
                                   T9v = T9p - T9u;
828
                                   Tab = T9p + T9u;
829
                              }
830
                              T9w = T9o - T9v;
831
                              Tdf = T9o + T9v;
832
                              Tad = Tab - Tac;
833
                              Td4 = Tac + Tab;
834
                         }
835
                    }
836
               }
837
               {
838
                    E T5Q, Tat, T69, TaC, T5W, Tav, T63, TaA;
839
                    {
840
                         E T5M, T5P, T5N, Tas, T5L, T5O;
841
                         T5M = ri[WS(rs, 7)];
842
                         T5P = ii[WS(rs, 7)];
843
                         T5L = W[12];
844
                         T5N = T5L * T5M;
845
                         Tas = T5L * T5P;
846
                         T5O = W[13];
847
                         T5Q = FMA(T5O, T5P, T5N);
848
                         Tat = FNMS(T5O, T5M, Tas);
849
                    }
850
                    {
851
                         E T65, T68, T66, TaB, T64, T67;
852
                         T65 = ri[WS(rs, 23)];
853
                         T68 = ii[WS(rs, 23)];
854
                         T64 = W[44];
855
                         T66 = T64 * T65;
856
                         TaB = T64 * T68;
857
                         T67 = W[45];
858
                         T69 = FMA(T67, T68, T66);
859
                         TaC = FNMS(T67, T65, TaB);
860
                    }
861
                    {
862
                         E T5S, T5V, T5T, Tau, T5R, T5U;
863
                         T5S = ri[WS(rs, 39)];
864
                         T5V = ii[WS(rs, 39)];
865
                         T5R = W[76];
866
                         T5T = T5R * T5S;
867
                         Tau = T5R * T5V;
868
                         T5U = W[77];
869
                         T5W = FMA(T5U, T5V, T5T);
870
                         Tav = FNMS(T5U, T5S, Tau);
871
                    }
872
                    {
873
                         E T5Z, T62, T60, Taz, T5Y, T61;
874
                         T5Z = ri[WS(rs, 55)];
875
                         T62 = ii[WS(rs, 55)];
876
                         T5Y = W[108];
877
                         T60 = T5Y * T5Z;
878
                         Taz = T5Y * T62;
879
                         T61 = W[109];
880
                         T63 = FMA(T61, T62, T60);
881
                         TaA = FNMS(T61, T5Z, Taz);
882
                    }
883
                    {
884
                         E T5X, T6a, Tga, Tgb;
885
                         T5X = T5Q + T5W;
886
                         T6a = T63 + T69;
887
                         T6b = T5X + T6a;
888
                         TfV = T6a - T5X;
889
                         Tga = Tat + Tav;
890
                         Tgb = TaA + TaC;
891
                         Tgc = Tga - Tgb;
892
                         ThT = Tga + Tgb;
893
                         {
894
                              E Tax, Tbl, TaE, Tbk;
895
                              {
896
                                   E Tar, Taw, Tay, TaD;
897
                                   Tar = T5Q - T5W;
898
                                   Taw = Tat - Tav;
899
                                   Tax = Tar + Taw;
900
                                   Tbl = Taw - Tar;
901
                                   Tay = T63 - T69;
902
                                   TaD = TaA - TaC;
903
                                   TaE = Tay - TaD;
904
                                   Tbk = Tay + TaD;
905
                              }
906
                              TaF = Tax - TaE;
907
                              Tdy = Tax + TaE;
908
                              Tbm = Tbk - Tbl;
909
                              Tdn = Tbl + Tbk;
910
                         }
911
                    }
912
               }
913
               {
914
                    E T4v, T9V, T4O, T9R, T4B, T9X, T4I, T9P;
915
                    {
916
                         E T4r, T4u, T4s, T9U, T4q, T4t;
917
                         T4r = ri[WS(rs, 5)];
918
                         T4u = ii[WS(rs, 5)];
919
                         T4q = W[8];
920
                         T4s = T4q * T4r;
921
                         T9U = T4q * T4u;
922
                         T4t = W[9];
923
                         T4v = FMA(T4t, T4u, T4s);
924
                         T9V = FNMS(T4t, T4r, T9U);
925
                    }
926
                    {
927
                         E T4K, T4N, T4L, T9Q, T4J, T4M;
928
                         T4K = ri[WS(rs, 53)];
929
                         T4N = ii[WS(rs, 53)];
930
                         T4J = W[104];
931
                         T4L = T4J * T4K;
932
                         T9Q = T4J * T4N;
933
                         T4M = W[105];
934
                         T4O = FMA(T4M, T4N, T4L);
935
                         T9R = FNMS(T4M, T4K, T9Q);
936
                    }
937
                    {
938
                         E T4x, T4A, T4y, T9W, T4w, T4z;
939
                         T4x = ri[WS(rs, 37)];
940
                         T4A = ii[WS(rs, 37)];
941
                         T4w = W[72];
942
                         T4y = T4w * T4x;
943
                         T9W = T4w * T4A;
944
                         T4z = W[73];
945
                         T4B = FMA(T4z, T4A, T4y);
946
                         T9X = FNMS(T4z, T4x, T9W);
947
                    }
948
                    {
949
                         E T4E, T4H, T4F, T9O, T4D, T4G;
950
                         T4E = ri[WS(rs, 21)];
951
                         T4H = ii[WS(rs, 21)];
952
                         T4D = W[40];
953
                         T4F = T4D * T4E;
954
                         T9O = T4D * T4H;
955
                         T4G = W[41];
956
                         T4I = FMA(T4G, T4H, T4F);
957
                         T9P = FNMS(T4G, T4E, T9O);
958
                    }
959
                    {
960
                         E T4C, T4P, Tfz, Tfw, Tfx, Tfy;
961
                         T4C = T4v + T4B;
962
                         T4P = T4I + T4O;
963
                         Tfz = T4C - T4P;
964
                         Tfw = T9V + T9X;
965
                         Tfx = T9P + T9R;
966
                         Tfy = Tfw - Tfx;
967
                         T4Q = T4C + T4P;
968
                         ThN = Tfw + Tfx;
969
                         TfA = Tfy - Tfz;
970
                         TfN = Tfz + Tfy;
971
                    }
972
                    {
973
                         E T9T, Td7, Ta0, Td6;
974
                         {
975
                              E T9N, T9S, T9Y, T9Z;
976
                              T9N = T4v - T4B;
977
                              T9S = T9P - T9R;
978
                              T9T = T9N - T9S;
979
                              Td7 = T9N + T9S;
980
                              T9Y = T9V - T9X;
981
                              T9Z = T4I - T4O;
982
                              Ta0 = T9Y + T9Z;
983
                              Td6 = T9Y - T9Z;
984
                         }
985
                         Ta1 = FNMS(KP414213562, Ta0, T9T);
986
                         Tdh = FMA(KP414213562, Td6, Td7);
987
                         Taf = FMA(KP414213562, T9T, Ta0);
988
                         Td8 = FNMS(KP414213562, Td7, Td6);
989
                    }
990
               }
991
               {
992
                    E T4W, T9G, T5f, T9C, T52, T9I, T59, T9A;
993
                    {
994
                         E T4S, T4V, T4T, T9F, T4R, T4U;
995
                         T4S = ri[WS(rs, 61)];
996
                         T4V = ii[WS(rs, 61)];
997
                         T4R = W[120];
998
                         T4T = T4R * T4S;
999
                         T9F = T4R * T4V;
1000
                         T4U = W[121];
1001
                         T4W = FMA(T4U, T4V, T4T);
1002
                         T9G = FNMS(T4U, T4S, T9F);
1003
                    }
1004
                    {
1005
                         E T5b, T5e, T5c, T9B, T5a, T5d;
1006
                         T5b = ri[WS(rs, 45)];
1007
                         T5e = ii[WS(rs, 45)];
1008
                         T5a = W[88];
1009
                         T5c = T5a * T5b;
1010
                         T9B = T5a * T5e;
1011
                         T5d = W[89];
1012
                         T5f = FMA(T5d, T5e, T5c);
1013
                         T9C = FNMS(T5d, T5b, T9B);
1014
                    }
1015
                    {
1016
                         E T4Y, T51, T4Z, T9H, T4X, T50;
1017
                         T4Y = ri[WS(rs, 29)];
1018
                         T51 = ii[WS(rs, 29)];
1019
                         T4X = W[56];
1020
                         T4Z = T4X * T4Y;
1021
                         T9H = T4X * T51;
1022
                         T50 = W[57];
1023
                         T52 = FMA(T50, T51, T4Z);
1024
                         T9I = FNMS(T50, T4Y, T9H);
1025
                    }
1026
                    {
1027
                         E T55, T58, T56, T9z, T54, T57;
1028
                         T55 = ri[WS(rs, 13)];
1029
                         T58 = ii[WS(rs, 13)];
1030
                         T54 = W[24];
1031
                         T56 = T54 * T55;
1032
                         T9z = T54 * T58;
1033
                         T57 = W[25];
1034
                         T59 = FMA(T57, T58, T56);
1035
                         T9A = FNMS(T57, T55, T9z);
1036
                    }
1037
                    {
1038
                         E T53, T5g, TfB, TfC, TfD, TfE;
1039
                         T53 = T4W + T52;
1040
                         T5g = T59 + T5f;
1041
                         TfB = T53 - T5g;
1042
                         TfC = T9G + T9I;
1043
                         TfD = T9A + T9C;
1044
                         TfE = TfC - TfD;
1045
                         T5h = T53 + T5g;
1046
                         ThO = TfC + TfD;
1047
                         TfF = TfB + TfE;
1048
                         TfO = TfB - TfE;
1049
                    }
1050
                    {
1051
                         E T9E, Tda, T9L, Td9;
1052
                         {
1053
                              E T9y, T9D, T9J, T9K;
1054
                              T9y = T4W - T52;
1055
                              T9D = T9A - T9C;
1056
                              T9E = T9y - T9D;
1057
                              Tda = T9y + T9D;
1058
                              T9J = T9G - T9I;
1059
                              T9K = T59 - T5f;
1060
                              T9L = T9J + T9K;
1061
                              Td9 = T9J - T9K;
1062
                         }
1063
                         T9M = FMA(KP414213562, T9L, T9E);
1064
                         Tdi = FNMS(KP414213562, Td9, Tda);
1065
                         Tag = FNMS(KP414213562, T9E, T9L);
1066
                         Tdb = FMA(KP414213562, Tda, Td9);
1067
                    }
1068
               }
1069
               {
1070
                    E T6i, Tb4, T6B, Tb0, T6o, Tb6, T6v, TaY;
1071
                    {
1072
                         E T6e, T6h, T6f, Tb3, T6d, T6g;
1073
                         T6e = ri[WS(rs, 3)];
1074
                         T6h = ii[WS(rs, 3)];
1075
                         T6d = W[4];
1076
                         T6f = T6d * T6e;
1077
                         Tb3 = T6d * T6h;
1078
                         T6g = W[5];
1079
                         T6i = FMA(T6g, T6h, T6f);
1080
                         Tb4 = FNMS(T6g, T6e, Tb3);
1081
                    }
1082
                    {
1083
                         E T6x, T6A, T6y, TaZ, T6w, T6z;
1084
                         T6x = ri[WS(rs, 51)];
1085
                         T6A = ii[WS(rs, 51)];
1086
                         T6w = W[100];
1087
                         T6y = T6w * T6x;
1088
                         TaZ = T6w * T6A;
1089
                         T6z = W[101];
1090
                         T6B = FMA(T6z, T6A, T6y);
1091
                         Tb0 = FNMS(T6z, T6x, TaZ);
1092
                    }
1093
                    {
1094
                         E T6k, T6n, T6l, Tb5, T6j, T6m;
1095
                         T6k = ri[WS(rs, 35)];
1096
                         T6n = ii[WS(rs, 35)];
1097
                         T6j = W[68];
1098
                         T6l = T6j * T6k;
1099
                         Tb5 = T6j * T6n;
1100
                         T6m = W[69];
1101
                         T6o = FMA(T6m, T6n, T6l);
1102
                         Tb6 = FNMS(T6m, T6k, Tb5);
1103
                    }
1104
                    {
1105
                         E T6r, T6u, T6s, TaX, T6q, T6t;
1106
                         T6r = ri[WS(rs, 19)];
1107
                         T6u = ii[WS(rs, 19)];
1108
                         T6q = W[36];
1109
                         T6s = T6q * T6r;
1110
                         TaX = T6q * T6u;
1111
                         T6t = W[37];
1112
                         T6v = FMA(T6t, T6u, T6s);
1113
                         TaY = FNMS(T6t, T6r, TaX);
1114
                    }
1115
                    {
1116
                         E T6p, T6C, Tg0, TfX, TfY, TfZ;
1117
                         T6p = T6i + T6o;
1118
                         T6C = T6v + T6B;
1119
                         Tg0 = T6p - T6C;
1120
                         TfX = Tb4 + Tb6;
1121
                         TfY = TaY + Tb0;
1122
                         TfZ = TfX - TfY;
1123
                         T6D = T6p + T6C;
1124
                         ThY = TfX + TfY;
1125
                         Tg1 = TfZ - Tg0;
1126
                         Tge = Tg0 + TfZ;
1127
                    }
1128
                    {
1129
                         E Tb2, Tdq, Tb9, Tdp;
1130
                         {
1131
                              E TaW, Tb1, Tb7, Tb8;
1132
                              TaW = T6i - T6o;
1133
                              Tb1 = TaY - Tb0;
1134
                              Tb2 = TaW - Tb1;
1135
                              Tdq = TaW + Tb1;
1136
                              Tb7 = Tb4 - Tb6;
1137
                              Tb8 = T6v - T6B;
1138
                              Tb9 = Tb7 + Tb8;
1139
                              Tdp = Tb7 - Tb8;
1140
                         }
1141
                         Tba = FNMS(KP414213562, Tb9, Tb2);
1142
                         TdA = FMA(KP414213562, Tdp, Tdq);
1143
                         Tbo = FMA(KP414213562, Tb2, Tb9);
1144
                         Tdr = FNMS(KP414213562, Tdq, Tdp);
1145
                    }
1146
               }
1147
               {
1148
                    E T6J, TaP, T72, TaL, T6P, TaR, T6W, TaJ;
1149
                    {
1150
                         E T6F, T6I, T6G, TaO, T6E, T6H;
1151
                         T6F = ri[WS(rs, 59)];
1152
                         T6I = ii[WS(rs, 59)];
1153
                         T6E = W[116];
1154
                         T6G = T6E * T6F;
1155
                         TaO = T6E * T6I;
1156
                         T6H = W[117];
1157
                         T6J = FMA(T6H, T6I, T6G);
1158
                         TaP = FNMS(T6H, T6F, TaO);
1159
                    }
1160
                    {
1161
                         E T6Y, T71, T6Z, TaK, T6X, T70;
1162
                         T6Y = ri[WS(rs, 43)];
1163
                         T71 = ii[WS(rs, 43)];
1164
                         T6X = W[84];
1165
                         T6Z = T6X * T6Y;
1166
                         TaK = T6X * T71;
1167
                         T70 = W[85];
1168
                         T72 = FMA(T70, T71, T6Z);
1169
                         TaL = FNMS(T70, T6Y, TaK);
1170
                    }
1171
                    {
1172
                         E T6L, T6O, T6M, TaQ, T6K, T6N;
1173
                         T6L = ri[WS(rs, 27)];
1174
                         T6O = ii[WS(rs, 27)];
1175
                         T6K = W[52];
1176
                         T6M = T6K * T6L;
1177
                         TaQ = T6K * T6O;
1178
                         T6N = W[53];
1179
                         T6P = FMA(T6N, T6O, T6M);
1180
                         TaR = FNMS(T6N, T6L, TaQ);
1181
                    }
1182
                    {
1183
                         E T6S, T6V, T6T, TaI, T6R, T6U;
1184
                         T6S = ri[WS(rs, 11)];
1185
                         T6V = ii[WS(rs, 11)];
1186
                         T6R = W[20];
1187
                         T6T = T6R * T6S;
1188
                         TaI = T6R * T6V;
1189
                         T6U = W[21];
1190
                         T6W = FMA(T6U, T6V, T6T);
1191
                         TaJ = FNMS(T6U, T6S, TaI);
1192
                    }
1193
                    {
1194
                         E T6Q, T73, Tg2, Tg3, Tg4, Tg5;
1195
                         T6Q = T6J + T6P;
1196
                         T73 = T6W + T72;
1197
                         Tg2 = T6Q - T73;
1198
                         Tg3 = TaP + TaR;
1199
                         Tg4 = TaJ + TaL;
1200
                         Tg5 = Tg3 - Tg4;
1201
                         T74 = T6Q + T73;
1202
                         ThZ = Tg3 + Tg4;
1203
                         Tg6 = Tg2 + Tg5;
1204
                         Tgf = Tg2 - Tg5;
1205
                    }
1206
                    {
1207
                         E TaN, Tdt, TaU, Tds;
1208
                         {
1209
                              E TaH, TaM, TaS, TaT;
1210
                              TaH = T6J - T6P;
1211
                              TaM = TaJ - TaL;
1212
                              TaN = TaH - TaM;
1213
                              Tdt = TaH + TaM;
1214
                              TaS = TaP - TaR;
1215
                              TaT = T6W - T72;
1216
                              TaU = TaS + TaT;
1217
                              Tds = TaS - TaT;
1218
                         }
1219
                         TaV = FMA(KP414213562, TaU, TaN);
1220
                         TdB = FNMS(KP414213562, Tds, Tdt);
1221
                         Tbp = FNMS(KP414213562, TaN, TaU);
1222
                         Tdu = FMA(KP414213562, Tdt, Tds);
1223
                    }
1224
               }
1225
               {
1226
                    E T1I, Tio, T3v, Tj1, TiX, Tj2, Tir, TiN, T76, TiK, TiC, TiG, T5j, TiJ, Tix;
1227
                    E TiF;
1228
                    {
1229
                         E TO, T1H, Tip, Tiq;
1230
                         TO = Tm + TN;
1231
                         T1H = T1f + T1G;
1232
                         T1I = TO + T1H;
1233
                         Tio = TO - T1H;
1234
                         {
1235
                              E T2B, T3u, TiO, TiW;
1236
                              T2B = T29 + T2A;
1237
                              T3u = T32 + T3t;
1238
                              T3v = T2B + T3u;
1239
                              Tj1 = T3u - T2B;
1240
                              TiO = Thr + Ths;
1241
                              TiW = TiP + TiV;
1242
                              TiX = TiO + TiW;
1243
                              Tj2 = TiW - TiO;
1244
                         }
1245
                         Tip = Thv + Thw;
1246
                         Tiq = ThB + ThC;
1247
                         Tir = Tip - Tiq;
1248
                         TiN = Tip + Tiq;
1249
                         {
1250
                              E T6c, T75, Tiy, Tiz, TiA, TiB;
1251
                              T6c = T5K + T6b;
1252
                              T75 = T6D + T74;
1253
                              Tiy = T6c - T75;
1254
                              Tiz = ThS + ThT;
1255
                              TiA = ThY + ThZ;
1256
                              TiB = Tiz - TiA;
1257
                              T76 = T6c + T75;
1258
                              TiK = Tiz + TiA;
1259
                              TiC = Tiy - TiB;
1260
                              TiG = Tiy + TiB;
1261
                         }
1262
                         {
1263
                              E T4p, T5i, Tit, Tiu, Tiv, Tiw;
1264
                              T4p = T3X + T4o;
1265
                              T5i = T4Q + T5h;
1266
                              Tit = T4p - T5i;
1267
                              Tiu = ThH + ThI;
1268
                              Tiv = ThN + ThO;
1269
                              Tiw = Tiu - Tiv;
1270
                              T5j = T4p + T5i;
1271
                              TiJ = Tiu + Tiv;
1272
                              Tix = Tit + Tiw;
1273
                              TiF = Tiw - Tit;
1274
                         }
1275
                    }
1276
                    {
1277
                         E T3w, T77, TiM, TiY;
1278
                         T3w = T1I + T3v;
1279
                         T77 = T5j + T76;
1280
                         ri[WS(rs, 32)] = T3w - T77;
1281
                         ri[0] = T3w + T77;
1282
                         TiM = TiJ + TiK;
1283
                         TiY = TiN + TiX;
1284
                         ii[0] = TiM + TiY;
1285
                         ii[WS(rs, 32)] = TiY - TiM;
1286
                    }
1287
                    {
1288
                         E Tis, TiD, Tj3, Tj4;
1289
                         Tis = Tio + Tir;
1290
                         TiD = Tix + TiC;
1291
                         ri[WS(rs, 40)] = FNMS(KP707106781, TiD, Tis);
1292
                         ri[WS(rs, 8)] = FMA(KP707106781, TiD, Tis);
1293
                         Tj3 = Tj1 + Tj2;
1294
                         Tj4 = TiF + TiG;
1295
                         ii[WS(rs, 8)] = FMA(KP707106781, Tj4, Tj3);
1296
                         ii[WS(rs, 40)] = FNMS(KP707106781, Tj4, Tj3);
1297
                    }
1298
                    {
1299
                         E TiE, TiH, Tj5, Tj6;
1300
                         TiE = Tio - Tir;
1301
                         TiH = TiF - TiG;
1302
                         ri[WS(rs, 56)] = FNMS(KP707106781, TiH, TiE);
1303
                         ri[WS(rs, 24)] = FMA(KP707106781, TiH, TiE);
1304
                         Tj5 = Tj2 - Tj1;
1305
                         Tj6 = TiC - Tix;
1306
                         ii[WS(rs, 24)] = FMA(KP707106781, Tj6, Tj5);
1307
                         ii[WS(rs, 56)] = FNMS(KP707106781, Tj6, Tj5);
1308
                    }
1309
                    {
1310
                         E TiI, TiL, TiZ, Tj0;
1311
                         TiI = T1I - T3v;
1312
                         TiL = TiJ - TiK;
1313
                         ri[WS(rs, 48)] = TiI - TiL;
1314
                         ri[WS(rs, 16)] = TiI + TiL;
1315
                         TiZ = T76 - T5j;
1316
                         Tj0 = TiX - TiN;
1317
                         ii[WS(rs, 16)] = TiZ + Tj0;
1318
                         ii[WS(rs, 48)] = Tj0 - TiZ;
1319
                    }
1320
               }
1321
               {
1322
                    E Thu, Ti8, Tj9, Tjf, ThF, Tjg, Tib, Tja, ThR, Til, Ti5, Tif, Ti2, Tim, Ti6;
1323
                    E Tii;
1324
                    {
1325
                         E Thq, Tht, Tj7, Tj8;
1326
                         Thq = Tm - TN;
1327
                         Tht = Thr - Ths;
1328
                         Thu = Thq - Tht;
1329
                         Ti8 = Thq + Tht;
1330
                         Tj7 = T1G - T1f;
1331
                         Tj8 = TiV - TiP;
1332
                         Tj9 = Tj7 + Tj8;
1333
                         Tjf = Tj8 - Tj7;
1334
                    }
1335
                    {
1336
                         E Thz, Ti9, ThE, Tia;
1337
                         {
1338
                              E Thx, Thy, ThA, ThD;
1339
                              Thx = Thv - Thw;
1340
                              Thy = T29 - T2A;
1341
                              Thz = Thx - Thy;
1342
                              Ti9 = Thy + Thx;
1343
                              ThA = T32 - T3t;
1344
                              ThD = ThB - ThC;
1345
                              ThE = ThA + ThD;
1346
                              Tia = ThA - ThD;
1347
                         }
1348
                         ThF = Thz - ThE;
1349
                         Tjg = Tia - Ti9;
1350
                         Tib = Ti9 + Tia;
1351
                         Tja = Thz + ThE;
1352
                    }
1353
                    {
1354
                         E ThL, Tie, ThQ, Tid;
1355
                         {
1356
                              E ThJ, ThK, ThM, ThP;
1357
                              ThJ = ThH - ThI;
1358
                              ThK = T5h - T4Q;
1359
                              ThL = ThJ - ThK;
1360
                              Tie = ThJ + ThK;
1361
                              ThM = T3X - T4o;
1362
                              ThP = ThN - ThO;
1363
                              ThQ = ThM - ThP;
1364
                              Tid = ThM + ThP;
1365
                         }
1366
                         ThR = FMA(KP414213562, ThQ, ThL);
1367
                         Til = FNMS(KP414213562, Tid, Tie);
1368
                         Ti5 = FNMS(KP414213562, ThL, ThQ);
1369
                         Tif = FMA(KP414213562, Tie, Tid);
1370
                    }
1371
                    {
1372
                         E ThW, Tih, Ti1, Tig;
1373
                         {
1374
                              E ThU, ThV, ThX, Ti0;
1375
                              ThU = ThS - ThT;
1376
                              ThV = T74 - T6D;
1377
                              ThW = ThU - ThV;
1378
                              Tih = ThU + ThV;
1379
                              ThX = T5K - T6b;
1380
                              Ti0 = ThY - ThZ;
1381
                              Ti1 = ThX - Ti0;
1382
                              Tig = ThX + Ti0;
1383
                         }
1384
                         Ti2 = FNMS(KP414213562, Ti1, ThW);
1385
                         Tim = FMA(KP414213562, Tig, Tih);
1386
                         Ti6 = FMA(KP414213562, ThW, Ti1);
1387
                         Tii = FNMS(KP414213562, Tih, Tig);
1388
                    }
1389
                    {
1390
                         E ThG, Ti3, Tjh, Tji;
1391
                         ThG = FMA(KP707106781, ThF, Thu);
1392
                         Ti3 = ThR - Ti2;
1393
                         ri[WS(rs, 44)] = FNMS(KP923879532, Ti3, ThG);
1394
                         ri[WS(rs, 12)] = FMA(KP923879532, Ti3, ThG);
1395
                         Tjh = FMA(KP707106781, Tjg, Tjf);
1396
                         Tji = Ti6 - Ti5;
1397
                         ii[WS(rs, 12)] = FMA(KP923879532, Tji, Tjh);
1398
                         ii[WS(rs, 44)] = FNMS(KP923879532, Tji, Tjh);
1399
                    }
1400
                    {
1401
                         E Ti4, Ti7, Tjj, Tjk;
1402
                         Ti4 = FNMS(KP707106781, ThF, Thu);
1403
                         Ti7 = Ti5 + Ti6;
1404
                         ri[WS(rs, 28)] = FNMS(KP923879532, Ti7, Ti4);
1405
                         ri[WS(rs, 60)] = FMA(KP923879532, Ti7, Ti4);
1406
                         Tjj = FNMS(KP707106781, Tjg, Tjf);
1407
                         Tjk = ThR + Ti2;
1408
                         ii[WS(rs, 28)] = FNMS(KP923879532, Tjk, Tjj);
1409
                         ii[WS(rs, 60)] = FMA(KP923879532, Tjk, Tjj);
1410
                    }
1411
                    {
1412
                         E Tic, Tij, Tjb, Tjc;
1413
                         Tic = FMA(KP707106781, Tib, Ti8);
1414
                         Tij = Tif + Tii;
1415
                         ri[WS(rs, 36)] = FNMS(KP923879532, Tij, Tic);
1416
                         ri[WS(rs, 4)] = FMA(KP923879532, Tij, Tic);
1417
                         Tjb = FMA(KP707106781, Tja, Tj9);
1418
                         Tjc = Til + Tim;
1419
                         ii[WS(rs, 4)] = FMA(KP923879532, Tjc, Tjb);
1420
                         ii[WS(rs, 36)] = FNMS(KP923879532, Tjc, Tjb);
1421
                    }
1422
                    {
1423
                         E Tik, Tin, Tjd, Tje;
1424
                         Tik = FNMS(KP707106781, Tib, Ti8);
1425
                         Tin = Til - Tim;
1426
                         ri[WS(rs, 52)] = FNMS(KP923879532, Tin, Tik);
1427
                         ri[WS(rs, 20)] = FMA(KP923879532, Tin, Tik);
1428
                         Tjd = FNMS(KP707106781, Tja, Tj9);
1429
                         Tje = Tii - Tif;
1430
                         ii[WS(rs, 20)] = FMA(KP923879532, Tje, Tjd);
1431
                         ii[WS(rs, 52)] = FNMS(KP923879532, Tje, Tjd);
1432
                    }
1433
               }
1434
               {
1435
                    E Tf2, TjJ, Tgo, TjD, TgI, Tjv, Tha, Tjp, Tfp, Tjw, Tgr, Tjq, Th4, Tho, Th8;
1436
                    E Thk, TfR, TgB, Tgl, Tgv, TgP, TjK, Thd, TjE, TgX, Thn, Th7, Thh, Tgi, TgC;
1437
                    E Tgm, Tgy;
1438
                    {
1439
                         E TeQ, TjB, Tf1, TjC, TeV, Tf0;
1440
                         TeQ = TeM + TeP;
1441
                         TjB = Tjm - Tjl;
1442
                         TeV = TeR + TeU;
1443
                         Tf0 = TeW - TeZ;
1444
                         Tf1 = TeV + Tf0;
1445
                         TjC = Tf0 - TeV;
1446
                         Tf2 = FNMS(KP707106781, Tf1, TeQ);
1447
                         TjJ = FNMS(KP707106781, TjC, TjB);
1448
                         Tgo = FMA(KP707106781, Tf1, TeQ);
1449
                         TjD = FMA(KP707106781, TjC, TjB);
1450
                    }
1451
                    {
1452
                         E TgE, Tjn, TgH, Tjo, TgF, TgG;
1453
                         TgE = TeM - TeP;
1454
                         Tjn = Tjl + Tjm;
1455
                         TgF = TeU - TeR;
1456
                         TgG = TeW + TeZ;
1457
                         TgH = TgF - TgG;
1458
                         Tjo = TgF + TgG;
1459
                         TgI = FMA(KP707106781, TgH, TgE);
1460
                         Tjv = FNMS(KP707106781, Tjo, Tjn);
1461
                         Tha = FNMS(KP707106781, TgH, TgE);
1462
                         Tjp = FMA(KP707106781, Tjo, Tjn);
1463
                    }
1464
                    {
1465
                         E Tfd, Tgp, Tfo, Tgq;
1466
                         {
1467
                              E Tf7, Tfc, Tfi, Tfn;
1468
                              Tf7 = Tf5 + Tf6;
1469
                              Tfc = Tf8 + Tfb;
1470
                              Tfd = FNMS(KP414213562, Tfc, Tf7);
1471
                              Tgp = FMA(KP414213562, Tf7, Tfc);
1472
                              Tfi = Tfg + Tfh;
1473
                              Tfn = Tfj + Tfm;
1474
                              Tfo = FMA(KP414213562, Tfn, Tfi);
1475
                              Tgq = FNMS(KP414213562, Tfi, Tfn);
1476
                         }
1477
                         Tfp = Tfd - Tfo;
1478
                         Tjw = Tgq - Tgp;
1479
                         Tgr = Tgp + Tgq;
1480
                         Tjq = Tfd + Tfo;
1481
                    }
1482
                    {
1483
                         E Th0, Thj, Th3, Thi;
1484
                         {
1485
                              E TgY, TgZ, Th1, Th2;
1486
                              TgY = Tg9 - Tgc;
1487
                              TgZ = Tg6 - Tg1;
1488
                              Th0 = FNMS(KP707106781, TgZ, TgY);
1489
                              Thj = FMA(KP707106781, TgZ, TgY);
1490
                              Th1 = TfU - TfV;
1491
                              Th2 = Tge - Tgf;
1492
                              Th3 = FNMS(KP707106781, Th2, Th1);
1493
                              Thi = FMA(KP707106781, Th2, Th1);
1494
                         }
1495
                         Th4 = FNMS(KP668178637, Th3, Th0);
1496
                         Tho = FMA(KP198912367, Thi, Thj);
1497
                         Th8 = FMA(KP668178637, Th0, Th3);
1498
                         Thk = FNMS(KP198912367, Thj, Thi);
1499
                    }
1500
                    {
1501
                         E TfH, Tgu, TfQ, Tgt;
1502
                         {
1503
                              E Tfv, TfG, TfM, TfP;
1504
                              Tfv = Tft + Tfu;
1505
                              TfG = TfA + TfF;
1506
                              TfH = FNMS(KP707106781, TfG, Tfv);
1507
                              Tgu = FMA(KP707106781, TfG, Tfv);
1508
                              TfM = TfI + TfL;
1509
                              TfP = TfN + TfO;
1510
                              TfQ = FNMS(KP707106781, TfP, TfM);
1511
                              Tgt = FMA(KP707106781, TfP, TfM);
1512
                         }
1513
                         TfR = FMA(KP668178637, TfQ, TfH);
1514
                         TgB = FNMS(KP198912367, Tgt, Tgu);
1515
                         Tgl = FNMS(KP668178637, TfH, TfQ);
1516
                         Tgv = FMA(KP198912367, Tgu, Tgt);
1517
                    }
1518
                    {
1519
                         E TgL, Thb, TgO, Thc;
1520
                         {
1521
                              E TgJ, TgK, TgM, TgN;
1522
                              TgJ = Tf5 - Tf6;
1523
                              TgK = Tf8 - Tfb;
1524
                              TgL = FMA(KP414213562, TgK, TgJ);
1525
                              Thb = FNMS(KP414213562, TgJ, TgK);
1526
                              TgM = Tfg - Tfh;
1527
                              TgN = Tfj - Tfm;
1528
                              TgO = FNMS(KP414213562, TgN, TgM);
1529
                              Thc = FMA(KP414213562, TgM, TgN);
1530
                         }
1531
                         TgP = TgL - TgO;
1532
                         TjK = TgL + TgO;
1533
                         Thd = Thb + Thc;
1534
                         TjE = Thc - Thb;
1535
                    }
1536
                    {
1537
                         E TgT, Thg, TgW, Thf;
1538
                         {
1539
                              E TgR, TgS, TgU, TgV;
1540
                              TgR = TfI - TfL;
1541
                              TgS = TfF - TfA;
1542
                              TgT = FNMS(KP707106781, TgS, TgR);
1543
                              Thg = FMA(KP707106781, TgS, TgR);
1544
                              TgU = Tft - Tfu;
1545
                              TgV = TfN - TfO;
1546
                              TgW = FNMS(KP707106781, TgV, TgU);
1547
                              Thf = FMA(KP707106781, TgV, TgU);
1548
                         }
1549
                         TgX = FMA(KP668178637, TgW, TgT);
1550
                         Thn = FNMS(KP198912367, Thf, Thg);
1551
                         Th7 = FNMS(KP668178637, TgT, TgW);
1552
                         Thh = FMA(KP198912367, Thg, Thf);
1553
                    }
1554
                    {
1555
                         E Tg8, Tgx, Tgh, Tgw;
1556
                         {
1557
                              E TfW, Tg7, Tgd, Tgg;
1558
                              TfW = TfU + TfV;
1559
                              Tg7 = Tg1 + Tg6;
1560
                              Tg8 = FNMS(KP707106781, Tg7, TfW);
1561
                              Tgx = FMA(KP707106781, Tg7, TfW);
1562
                              Tgd = Tg9 + Tgc;
1563
                              Tgg = Tge + Tgf;
1564
                              Tgh = FNMS(KP707106781, Tgg, Tgd);
1565
                              Tgw = FMA(KP707106781, Tgg, Tgd);
1566
                         }
1567
                         Tgi = FNMS(KP668178637, Tgh, Tg8);
1568
                         TgC = FMA(KP198912367, Tgw, Tgx);
1569
                         Tgm = FMA(KP668178637, Tg8, Tgh);
1570
                         Tgy = FNMS(KP198912367, Tgx, Tgw);
1571
                    }
1572
                    {
1573
                         E Tfq, Tgj, Tjx, Tjy;
1574
                         Tfq = FMA(KP923879532, Tfp, Tf2);
1575
                         Tgj = TfR - Tgi;
1576
                         ri[WS(rs, 42)] = FNMS(KP831469612, Tgj, Tfq);
1577
                         ri[WS(rs, 10)] = FMA(KP831469612, Tgj, Tfq);
1578
                         Tjx = FMA(KP923879532, Tjw, Tjv);
1579
                         Tjy = Tgm - Tgl;
1580
                         ii[WS(rs, 10)] = FMA(KP831469612, Tjy, Tjx);
1581
                         ii[WS(rs, 42)] = FNMS(KP831469612, Tjy, Tjx);
1582
                    }
1583
                    {
1584
                         E Tgk, Tgn, Tjz, TjA;
1585
                         Tgk = FNMS(KP923879532, Tfp, Tf2);
1586
                         Tgn = Tgl + Tgm;
1587
                         ri[WS(rs, 26)] = FNMS(KP831469612, Tgn, Tgk);
1588
                         ri[WS(rs, 58)] = FMA(KP831469612, Tgn, Tgk);
1589
                         Tjz = FNMS(KP923879532, Tjw, Tjv);
1590
                         TjA = TfR + Tgi;
1591
                         ii[WS(rs, 26)] = FNMS(KP831469612, TjA, Tjz);
1592
                         ii[WS(rs, 58)] = FMA(KP831469612, TjA, Tjz);
1593
                    }
1594
                    {
1595
                         E Tgs, Tgz, Tjr, Tjs;
1596
                         Tgs = FMA(KP923879532, Tgr, Tgo);
1597
                         Tgz = Tgv + Tgy;
1598
                         ri[WS(rs, 34)] = FNMS(KP980785280, Tgz, Tgs);
1599
                         ri[WS(rs, 2)] = FMA(KP980785280, Tgz, Tgs);
1600
                         Tjr = FMA(KP923879532, Tjq, Tjp);
1601
                         Tjs = TgB + TgC;
1602
                         ii[WS(rs, 2)] = FMA(KP980785280, Tjs, Tjr);
1603
                         ii[WS(rs, 34)] = FNMS(KP980785280, Tjs, Tjr);
1604
                    }
1605
                    {
1606
                         E TgA, TgD, Tjt, Tju;
1607
                         TgA = FNMS(KP923879532, Tgr, Tgo);
1608
                         TgD = TgB - TgC;
1609
                         ri[WS(rs, 50)] = FNMS(KP980785280, TgD, TgA);
1610
                         ri[WS(rs, 18)] = FMA(KP980785280, TgD, TgA);
1611
                         Tjt = FNMS(KP923879532, Tjq, Tjp);
1612
                         Tju = Tgy - Tgv;
1613
                         ii[WS(rs, 18)] = FMA(KP980785280, Tju, Tjt);
1614
                         ii[WS(rs, 50)] = FNMS(KP980785280, Tju, Tjt);
1615
                    }
1616
                    {
1617
                         E TgQ, Th5, TjF, TjG;
1618
                         TgQ = FMA(KP923879532, TgP, TgI);
1619
                         Th5 = TgX + Th4;
1620
                         ri[WS(rs, 38)] = FNMS(KP831469612, Th5, TgQ);
1621
                         ri[WS(rs, 6)] = FMA(KP831469612, Th5, TgQ);
1622
                         TjF = FMA(KP923879532, TjE, TjD);
1623
                         TjG = Th7 + Th8;
1624
                         ii[WS(rs, 6)] = FMA(KP831469612, TjG, TjF);
1625
                         ii[WS(rs, 38)] = FNMS(KP831469612, TjG, TjF);
1626
                    }
1627
                    {
1628
                         E Th6, Th9, TjH, TjI;
1629
                         Th6 = FNMS(KP923879532, TgP, TgI);
1630
                         Th9 = Th7 - Th8;
1631
                         ri[WS(rs, 54)] = FNMS(KP831469612, Th9, Th6);
1632
                         ri[WS(rs, 22)] = FMA(KP831469612, Th9, Th6);
1633
                         TjH = FNMS(KP923879532, TjE, TjD);
1634
                         TjI = Th4 - TgX;
1635
                         ii[WS(rs, 22)] = FMA(KP831469612, TjI, TjH);
1636
                         ii[WS(rs, 54)] = FNMS(KP831469612, TjI, TjH);
1637
                    }
1638
                    {
1639
                         E The, Thl, TjL, TjM;
1640
                         The = FNMS(KP923879532, Thd, Tha);
1641
                         Thl = Thh - Thk;
1642
                         ri[WS(rs, 46)] = FNMS(KP980785280, Thl, The);
1643
                         ri[WS(rs, 14)] = FMA(KP980785280, Thl, The);
1644
                         TjL = FNMS(KP923879532, TjK, TjJ);
1645
                         TjM = Tho - Thn;
1646
                         ii[WS(rs, 14)] = FMA(KP980785280, TjM, TjL);
1647
                         ii[WS(rs, 46)] = FNMS(KP980785280, TjM, TjL);
1648
                    }
1649
                    {
1650
                         E Thm, Thp, TjN, TjO;
1651
                         Thm = FMA(KP923879532, Thd, Tha);
1652
                         Thp = Thn + Tho;
1653
                         ri[WS(rs, 30)] = FNMS(KP980785280, Thp, Thm);
1654
                         ri[WS(rs, 62)] = FMA(KP980785280, Thp, Thm);
1655
                         TjN = FMA(KP923879532, TjK, TjJ);
1656
                         TjO = Thh + Thk;
1657
                         ii[WS(rs, 30)] = FNMS(KP980785280, TjO, TjN);
1658
                         ii[WS(rs, 62)] = FMA(KP980785280, TjO, TjN);
1659
                    }
1660
               }
1661
               {
1662
                    E T99, Tkw, TbB, Tkq, Taj, TbL, Tbv, TbF, Tce, Tcy, Tci, Tcu, Tc7, Tcx, Tch;
1663
                    E Tcr, TbZ, TkK, Tcn, TkE, Tbs, TbM, Tbw, TbI, T80, TkD, TkJ, Tby, TbS, Tkp;
1664
                    E Tkv, Tck;
1665
                    {
1666
                         E T8z, Tbz, T98, TbA;
1667
                         {
1668
                              E T8n, T8y, T8W, T97;
1669
                              T8n = FNMS(KP707106781, T8m, T87);
1670
                              T8y = FNMS(KP707106781, T8x, T8u);
1671
                              T8z = FNMS(KP668178637, T8y, T8n);
1672
                              Tbz = FMA(KP668178637, T8n, T8y);
1673
                              T8W = FNMS(KP707106781, T8V, T8G);
1674
                              T97 = FNMS(KP707106781, T96, T93);
1675
                              T98 = FMA(KP668178637, T97, T8W);
1676
                              TbA = FNMS(KP668178637, T8W, T97);
1677
                         }
1678
                         T99 = T8z - T98;
1679
                         Tkw = TbA - Tbz;
1680
                         TbB = Tbz + TbA;
1681
                         Tkq = T8z + T98;
1682
                    }
1683
                    {
1684
                         E Ta3, TbE, Tai, TbD;
1685
                         {
1686
                              E T9x, Ta2, Tae, Tah;
1687
                              T9x = FNMS(KP707106781, T9w, T9h);
1688
                              Ta2 = T9M - Ta1;
1689
                              Ta3 = FNMS(KP923879532, Ta2, T9x);
1690
                              TbE = FMA(KP923879532, Ta2, T9x);
1691
                              Tae = FNMS(KP707106781, Tad, Taa);
1692
                              Tah = Taf - Tag;
1693
                              Tai = FNMS(KP923879532, Tah, Tae);
1694
                              TbD = FMA(KP923879532, Tah, Tae);
1695
                         }
1696
                         Taj = FMA(KP534511135, Tai, Ta3);
1697
                         TbL = FNMS(KP303346683, TbD, TbE);
1698
                         Tbv = FNMS(KP534511135, Ta3, Tai);
1699
                         TbF = FMA(KP303346683, TbE, TbD);
1700
                    }
1701
                    {
1702
                         E Tca, Tct, Tcd, Tcs;
1703
                         {
1704
                              E Tc8, Tc9, Tcb, Tcc;
1705
                              Tc8 = FMA(KP707106781, Tbm, Tbj);
1706
                              Tc9 = Tba + TaV;
1707
                              Tca = FNMS(KP923879532, Tc9, Tc8);
1708
                              Tct = FMA(KP923879532, Tc9, Tc8);
1709
                              Tcb = FMA(KP707106781, TaF, Taq);
1710
                              Tcc = Tbo + Tbp;
1711
                              Tcd = FNMS(KP923879532, Tcc, Tcb);
1712
                              Tcs = FMA(KP923879532, Tcc, Tcb);
1713
                         }
1714
                         Tce = FNMS(KP820678790, Tcd, Tca);
1715
                         Tcy = FMA(KP098491403, Tcs, Tct);
1716
                         Tci = FMA(KP820678790, Tca, Tcd);
1717
                         Tcu = FNMS(KP098491403, Tct, Tcs);
1718
                    }
1719
                    {
1720
                         E Tc3, Tcq, Tc6, Tcp;
1721
                         {
1722
                              E Tc1, Tc2, Tc4, Tc5;
1723
                              Tc1 = FMA(KP707106781, Tad, Taa);
1724
                              Tc2 = Ta1 + T9M;
1725
                              Tc3 = FNMS(KP923879532, Tc2, Tc1);
1726
                              Tcq = FMA(KP923879532, Tc2, Tc1);
1727
                              Tc4 = FMA(KP707106781, T9w, T9h);
1728
                              Tc5 = Taf + Tag;
1729
                              Tc6 = FNMS(KP923879532, Tc5, Tc4);
1730
                              Tcp = FMA(KP923879532, Tc5, Tc4);
1731
                         }
1732
                         Tc7 = FMA(KP820678790, Tc6, Tc3);
1733
                         Tcx = FNMS(KP098491403, Tcp, Tcq);
1734
                         Tch = FNMS(KP820678790, Tc3, Tc6);
1735
                         Tcr = FMA(KP098491403, Tcq, Tcp);
1736
                    }
1737
                    {
1738
                         E TbV, Tcl, TbY, Tcm;
1739
                         {
1740
                              E TbT, TbU, TbW, TbX;
1741
                              TbT = FMA(KP707106781, T8m, T87);
1742
                              TbU = FMA(KP707106781, T8x, T8u);
1743
                              TbV = FMA(KP198912367, TbU, TbT);
1744
                              Tcl = FNMS(KP198912367, TbT, TbU);
1745
                              TbW = FMA(KP707106781, T8V, T8G);
1746
                              TbX = FMA(KP707106781, T96, T93);
1747
                              TbY = FNMS(KP198912367, TbX, TbW);
1748
                              Tcm = FMA(KP198912367, TbW, TbX);
1749
                         }
1750
                         TbZ = TbV - TbY;
1751
                         TkK = TbV + TbY;
1752
                         Tcn = Tcl + Tcm;
1753
                         TkE = Tcm - Tcl;
1754
                    }
1755
                    {
1756
                         E Tbc, TbH, Tbr, TbG;
1757
                         {
1758
                              E TaG, Tbb, Tbn, Tbq;
1759
                              TaG = FNMS(KP707106781, TaF, Taq);
1760
                              Tbb = TaV - Tba;
1761
                              Tbc = FNMS(KP923879532, Tbb, TaG);
1762
                              TbH = FMA(KP923879532, Tbb, TaG);
1763
                              Tbn = FNMS(KP707106781, Tbm, Tbj);
1764
                              Tbq = Tbo - Tbp;
1765
                              Tbr = FNMS(KP923879532, Tbq, Tbn);
1766
                              TbG = FMA(KP923879532, Tbq, Tbn);
1767
                         }
1768
                         Tbs = FNMS(KP534511135, Tbr, Tbc);
1769
                         TbM = FMA(KP303346683, TbG, TbH);
1770
                         Tbw = FMA(KP534511135, Tbc, Tbr);
1771
                         TbI = FNMS(KP303346683, TbH, TbG);
1772
                    }
1773
                    {
1774
                         E T7u, TbO, Tkn, TkB, T7Z, TkC, TbR, Tko, T7t, Tkm;
1775
                         T7t = T7l - T7s;
1776
                         T7u = FMA(KP707106781, T7t, T7e);
1777
                         TbO = FNMS(KP707106781, T7t, T7e);
1778
                         Tkm = TcC - TcB;
1779
                         Tkn = FMA(KP707106781, Tkm, Tkl);
1780
                         TkB = FNMS(KP707106781, Tkm, Tkl);
1781
                         {
1782
                              E T7J, T7Y, TbP, TbQ;
1783
                              T7J = FMA(KP414213562, T7I, T7B);
1784
                              T7Y = FNMS(KP414213562, T7X, T7Q);
1785
                              T7Z = T7J - T7Y;
1786
                              TkC = T7J + T7Y;
1787
                              TbP = FNMS(KP414213562, T7B, T7I);
1788
                              TbQ = FMA(KP414213562, T7Q, T7X);
1789
                              TbR = TbP + TbQ;
1790
                              Tko = TbQ - TbP;
1791
                         }
1792
                         T80 = FNMS(KP923879532, T7Z, T7u);
1793
                         TkD = FNMS(KP923879532, TkC, TkB);
1794
                         TkJ = FMA(KP923879532, TkC, TkB);
1795
                         Tby = FMA(KP923879532, T7Z, T7u);
1796
                         TbS = FNMS(KP923879532, TbR, TbO);
1797
                         Tkp = FMA(KP923879532, Tko, Tkn);
1798
                         Tkv = FNMS(KP923879532, Tko, Tkn);
1799
                         Tck = FMA(KP923879532, TbR, TbO);
1800
                    }
1801
                    {
1802
                         E T9a, Tbt, Tkx, Tky;
1803
                         T9a = FMA(KP831469612, T99, T80);
1804
                         Tbt = Taj - Tbs;
1805
                         ri[WS(rs, 43)] = FNMS(KP881921264, Tbt, T9a);
1806
                         ri[WS(rs, 11)] = FMA(KP881921264, Tbt, T9a);
1807
                         Tkx = FMA(KP831469612, Tkw, Tkv);
1808
                         Tky = Tbw - Tbv;
1809
                         ii[WS(rs, 11)] = FMA(KP881921264, Tky, Tkx);
1810
                         ii[WS(rs, 43)] = FNMS(KP881921264, Tky, Tkx);
1811
                    }
1812
                    {
1813
                         E Tbu, Tbx, Tkz, TkA;
1814
                         Tbu = FNMS(KP831469612, T99, T80);
1815
                         Tbx = Tbv + Tbw;
1816
                         ri[WS(rs, 27)] = FNMS(KP881921264, Tbx, Tbu);
1817
                         ri[WS(rs, 59)] = FMA(KP881921264, Tbx, Tbu);
1818
                         Tkz = FNMS(KP831469612, Tkw, Tkv);
1819
                         TkA = Taj + Tbs;
1820
                         ii[WS(rs, 27)] = FNMS(KP881921264, TkA, Tkz);
1821
                         ii[WS(rs, 59)] = FMA(KP881921264, TkA, Tkz);
1822
                    }
1823
                    {
1824
                         E TbC, TbJ, Tkr, Tks;
1825
                         TbC = FMA(KP831469612, TbB, Tby);
1826
                         TbJ = TbF + TbI;
1827
                         ri[WS(rs, 35)] = FNMS(KP956940335, TbJ, TbC);
1828
                         ri[WS(rs, 3)] = FMA(KP956940335, TbJ, TbC);
1829
                         Tkr = FMA(KP831469612, Tkq, Tkp);
1830
                         Tks = TbL + TbM;
1831
                         ii[WS(rs, 3)] = FMA(KP956940335, Tks, Tkr);
1832
                         ii[WS(rs, 35)] = FNMS(KP956940335, Tks, Tkr);
1833
                    }
1834
                    {
1835
                         E TbK, TbN, Tkt, Tku;
1836
                         TbK = FNMS(KP831469612, TbB, Tby);
1837
                         TbN = TbL - TbM;
1838
                         ri[WS(rs, 51)] = FNMS(KP956940335, TbN, TbK);
1839
                         ri[WS(rs, 19)] = FMA(KP956940335, TbN, TbK);
1840
                         Tkt = FNMS(KP831469612, Tkq, Tkp);
1841
                         Tku = TbI - TbF;
1842
                         ii[WS(rs, 19)] = FMA(KP956940335, Tku, Tkt);
1843
                         ii[WS(rs, 51)] = FNMS(KP956940335, Tku, Tkt);
1844
                    }
1845
                    {
1846
                         E Tc0, Tcf, TkF, TkG;
1847
                         Tc0 = FMA(KP980785280, TbZ, TbS);
1848
                         Tcf = Tc7 + Tce;
1849
                         ri[WS(rs, 39)] = FNMS(KP773010453, Tcf, Tc0);
1850
                         ri[WS(rs, 7)] = FMA(KP773010453, Tcf, Tc0);
1851
                         TkF = FMA(KP980785280, TkE, TkD);
1852
                         TkG = Tch + Tci;
1853
                         ii[WS(rs, 7)] = FMA(KP773010453, TkG, TkF);
1854
                         ii[WS(rs, 39)] = FNMS(KP773010453, TkG, TkF);
1855
                    }
1856
                    {
1857
                         E Tcg, Tcj, TkH, TkI;
1858
                         Tcg = FNMS(KP980785280, TbZ, TbS);
1859
                         Tcj = Tch - Tci;
1860
                         ri[WS(rs, 55)] = FNMS(KP773010453, Tcj, Tcg);
1861
                         ri[WS(rs, 23)] = FMA(KP773010453, Tcj, Tcg);
1862
                         TkH = FNMS(KP980785280, TkE, TkD);
1863
                         TkI = Tce - Tc7;
1864
                         ii[WS(rs, 23)] = FMA(KP773010453, TkI, TkH);
1865
                         ii[WS(rs, 55)] = FNMS(KP773010453, TkI, TkH);
1866
                    }
1867
                    {
1868
                         E Tco, Tcv, TkL, TkM;
1869
                         Tco = FNMS(KP980785280, Tcn, Tck);
1870
                         Tcv = Tcr - Tcu;
1871
                         ri[WS(rs, 47)] = FNMS(KP995184726, Tcv, Tco);
1872
                         ri[WS(rs, 15)] = FMA(KP995184726, Tcv, Tco);
1873
                         TkL = FNMS(KP980785280, TkK, TkJ);
1874
                         TkM = Tcy - Tcx;
1875
                         ii[WS(rs, 15)] = FMA(KP995184726, TkM, TkL);
1876
                         ii[WS(rs, 47)] = FNMS(KP995184726, TkM, TkL);
1877
                    }
1878
                    {
1879
                         E Tcw, Tcz, TkN, TkO;
1880
                         Tcw = FMA(KP980785280, Tcn, Tck);
1881
                         Tcz = Tcx + Tcy;
1882
                         ri[WS(rs, 31)] = FNMS(KP995184726, Tcz, Tcw);
1883
                         ri[WS(rs, 63)] = FMA(KP995184726, Tcz, Tcw);
1884
                         TkN = FMA(KP980785280, TkK, TkJ);
1885
                         TkO = Tcr + Tcu;
1886
                         ii[WS(rs, 31)] = FNMS(KP995184726, TkO, TkN);
1887
                         ii[WS(rs, 63)] = FMA(KP995184726, TkO, TkN);
1888
                    }
1889
               }
1890
               {
1891
                    E Td1, Tk2, TdN, TjW, Tdl, TdX, TdH, TdR, Teq, TeK, Teu, TeG, Tej, TeJ, Tet;
1892
                    E TeD, Teb, Tkg, Tez, Tka, TdE, TdY, TdI, TdU, TcM, Tk9, Tkf, TdK, Te4, TjV;
1893
                    E Tk1, Tew;
1894
                    {
1895
                         E TcT, TdL, Td0, TdM;
1896
                         {
1897
                              E TcP, TcS, TcW, TcZ;
1898
                              TcP = FMA(KP707106781, TcO, TcN);
1899
                              TcS = FMA(KP707106781, TcR, TcQ);
1900
                              TcT = FNMS(KP198912367, TcS, TcP);
1901
                              TdL = FMA(KP198912367, TcP, TcS);
1902
                              TcW = FMA(KP707106781, TcV, TcU);
1903
                              TcZ = FMA(KP707106781, TcY, TcX);
1904
                              Td0 = FMA(KP198912367, TcZ, TcW);
1905
                              TdM = FNMS(KP198912367, TcW, TcZ);
1906
                         }
1907
                         Td1 = TcT - Td0;
1908
                         Tk2 = TdM - TdL;
1909
                         TdN = TdL + TdM;
1910
                         TjW = TcT + Td0;
1911
                    }
1912
                    {
1913
                         E Tdd, TdQ, Tdk, TdP;
1914
                         {
1915
                              E Td5, Tdc, Tdg, Tdj;
1916
                              Td5 = FMA(KP707106781, Td4, Td3);
1917
                              Tdc = Td8 + Tdb;
1918
                              Tdd = FNMS(KP923879532, Tdc, Td5);
1919
                              TdQ = FMA(KP923879532, Tdc, Td5);
1920
                              Tdg = FMA(KP707106781, Tdf, Tde);
1921
                              Tdj = Tdh + Tdi;
1922
                              Tdk = FNMS(KP923879532, Tdj, Tdg);
1923
                              TdP = FMA(KP923879532, Tdj, Tdg);
1924
                         }
1925
                         Tdl = FMA(KP820678790, Tdk, Tdd);
1926
                         TdX = FNMS(KP098491403, TdP, TdQ);
1927
                         TdH = FNMS(KP820678790, Tdd, Tdk);
1928
                         TdR = FMA(KP098491403, TdQ, TdP);
1929
                    }
1930
                    {
1931
                         E Tem, TeF, Tep, TeE;
1932
                         {
1933
                              E Tek, Tel, Ten, Teo;
1934
                              Tek = FNMS(KP707106781, Tdy, Tdx);
1935
                              Tel = Tdu - Tdr;
1936
                              Tem = FNMS(KP923879532, Tel, Tek);
1937
                              TeF = FMA(KP923879532, Tel, Tek);
1938
                              Ten = FNMS(KP707106781, Tdn, Tdm);
1939
                              Teo = TdA - TdB;
1940
                              Tep = FNMS(KP923879532, Teo, Ten);
1941
                              TeE = FMA(KP923879532, Teo, Ten);
1942
                         }
1943
                         Teq = FNMS(KP534511135, Tep, Tem);
1944
                         TeK = FMA(KP303346683, TeE, TeF);
1945
                         Teu = FMA(KP534511135, Tem, Tep);
1946
                         TeG = FNMS(KP303346683, TeF, TeE);
1947
                    }
1948
                    {
1949
                         E Tef, TeC, Tei, TeB;
1950
                         {
1951
                              E Ted, Tee, Teg, Teh;
1952
                              Ted = FNMS(KP707106781, Tdf, Tde);
1953
                              Tee = Tdb - Td8;
1954
                              Tef = FNMS(KP923879532, Tee, Ted);
1955
                              TeC = FMA(KP923879532, Tee, Ted);
1956
                              Teg = FNMS(KP707106781, Td4, Td3);
1957
                              Teh = Tdh - Tdi;
1958
                              Tei = FNMS(KP923879532, Teh, Teg);
1959
                              TeB = FMA(KP923879532, Teh, Teg);
1960
                         }
1961
                         Tej = FMA(KP534511135, Tei, Tef);
1962
                         TeJ = FNMS(KP303346683, TeB, TeC);
1963
                         Tet = FNMS(KP534511135, Tef, Tei);
1964
                         TeD = FMA(KP303346683, TeC, TeB);
1965
                    }
1966
                    {
1967
                         E Te7, Tex, Tea, Tey;
1968
                         {
1969
                              E Te5, Te6, Te8, Te9;
1970
                              Te5 = FNMS(KP707106781, TcO, TcN);
1971
                              Te6 = FNMS(KP707106781, TcR, TcQ);
1972
                              Te7 = FMA(KP668178637, Te6, Te5);
1973
                              Tex = FNMS(KP668178637, Te5, Te6);
1974
                              Te8 = FNMS(KP707106781, TcV, TcU);
1975
                              Te9 = FNMS(KP707106781, TcY, TcX);
1976
                              Tea = FNMS(KP668178637, Te9, Te8);
1977
                              Tey = FMA(KP668178637, Te8, Te9);
1978
                         }
1979
                         Teb = Te7 - Tea;
1980
                         Tkg = Te7 + Tea;
1981
                         Tez = Tex + Tey;
1982
                         Tka = Tey - Tex;
1983
                    }
1984
                    {
1985
                         E Tdw, TdT, TdD, TdS;
1986
                         {
1987
                              E Tdo, Tdv, Tdz, TdC;
1988
                              Tdo = FMA(KP707106781, Tdn, Tdm);
1989
                              Tdv = Tdr + Tdu;
1990
                              Tdw = FNMS(KP923879532, Tdv, Tdo);
1991
                              TdT = FMA(KP923879532, Tdv, Tdo);
1992
                              Tdz = FMA(KP707106781, Tdy, Tdx);
1993
                              TdC = TdA + TdB;
1994
                              TdD = FNMS(KP923879532, TdC, Tdz);
1995
                              TdS = FMA(KP923879532, TdC, Tdz);
1996
                         }
1997
                         TdE = FNMS(KP820678790, TdD, Tdw);
1998
                         TdY = FMA(KP098491403, TdS, TdT);
1999
                         TdI = FMA(KP820678790, Tdw, TdD);
2000
                         TdU = FNMS(KP098491403, TdT, TdS);
2001
                    }
2002
                    {
2003
                         E TcE, Te0, TjT, Tk7, TcL, Tk8, Te3, TjU, TcD, TjS;
2004
                         TcD = TcB + TcC;
2005
                         TcE = FMA(KP707106781, TcD, TcA);
2006
                         Te0 = FNMS(KP707106781, TcD, TcA);
2007
                         TjS = T7l + T7s;
2008
                         TjT = FMA(KP707106781, TjS, TjR);
2009
                         Tk7 = FNMS(KP707106781, TjS, TjR);
2010
                         {
2011
                              E TcH, TcK, Te1, Te2;
2012
                              TcH = FMA(KP414213562, TcG, TcF);
2013
                              TcK = FNMS(KP414213562, TcJ, TcI);
2014
                              TcL = TcH + TcK;
2015
                              Tk8 = TcK - TcH;
2016
                              Te1 = FNMS(KP414213562, TcF, TcG);
2017
                              Te2 = FMA(KP414213562, TcI, TcJ);
2018
                              Te3 = Te1 - Te2;
2019
                              TjU = Te1 + Te2;
2020
                         }
2021
                         TcM = FNMS(KP923879532, TcL, TcE);
2022
                         Tk9 = FMA(KP923879532, Tk8, Tk7);
2023
                         Tkf = FNMS(KP923879532, Tk8, Tk7);
2024
                         TdK = FMA(KP923879532, TcL, TcE);
2025
                         Te4 = FMA(KP923879532, Te3, Te0);
2026
                         TjV = FMA(KP923879532, TjU, TjT);
2027
                         Tk1 = FNMS(KP923879532, TjU, TjT);
2028
                         Tew = FNMS(KP923879532, Te3, Te0);
2029
                    }
2030
                    {
2031
                         E Td2, TdF, Tk3, Tk4;
2032
                         Td2 = FMA(KP980785280, Td1, TcM);
2033
                         TdF = Tdl - TdE;
2034
                         ri[WS(rs, 41)] = FNMS(KP773010453, TdF, Td2);
2035
                         ri[WS(rs, 9)] = FMA(KP773010453, TdF, Td2);
2036
                         Tk3 = FMA(KP980785280, Tk2, Tk1);
2037
                         Tk4 = TdI - TdH;
2038
                         ii[WS(rs, 9)] = FMA(KP773010453, Tk4, Tk3);
2039
                         ii[WS(rs, 41)] = FNMS(KP773010453, Tk4, Tk3);
2040
                    }
2041
                    {
2042
                         E TdG, TdJ, Tk5, Tk6;
2043
                         TdG = FNMS(KP980785280, Td1, TcM);
2044
                         TdJ = TdH + TdI;
2045
                         ri[WS(rs, 25)] = FNMS(KP773010453, TdJ, TdG);
2046
                         ri[WS(rs, 57)] = FMA(KP773010453, TdJ, TdG);
2047
                         Tk5 = FNMS(KP980785280, Tk2, Tk1);
2048
                         Tk6 = Tdl + TdE;
2049
                         ii[WS(rs, 25)] = FNMS(KP773010453, Tk6, Tk5);
2050
                         ii[WS(rs, 57)] = FMA(KP773010453, Tk6, Tk5);
2051
                    }
2052
                    {
2053
                         E TdO, TdV, TjX, TjY;
2054
                         TdO = FMA(KP980785280, TdN, TdK);
2055
                         TdV = TdR + TdU;
2056
                         ri[WS(rs, 33)] = FNMS(KP995184726, TdV, TdO);
2057
                         ri[WS(rs, 1)] = FMA(KP995184726, TdV, TdO);
2058
                         TjX = FMA(KP980785280, TjW, TjV);
2059
                         TjY = TdX + TdY;
2060
                         ii[WS(rs, 1)] = FMA(KP995184726, TjY, TjX);
2061
                         ii[WS(rs, 33)] = FNMS(KP995184726, TjY, TjX);
2062
                    }
2063
                    {
2064
                         E TdW, TdZ, TjZ, Tk0;
2065
                         TdW = FNMS(KP980785280, TdN, TdK);
2066
                         TdZ = TdX - TdY;
2067
                         ri[WS(rs, 49)] = FNMS(KP995184726, TdZ, TdW);
2068
                         ri[WS(rs, 17)] = FMA(KP995184726, TdZ, TdW);
2069
                         TjZ = FNMS(KP980785280, TjW, TjV);
2070
                         Tk0 = TdU - TdR;
2071
                         ii[WS(rs, 17)] = FMA(KP995184726, Tk0, TjZ);
2072
                         ii[WS(rs, 49)] = FNMS(KP995184726, Tk0, TjZ);
2073
                    }
2074
                    {
2075
                         E Tec, Ter, Tkb, Tkc;
2076
                         Tec = FMA(KP831469612, Teb, Te4);
2077
                         Ter = Tej + Teq;
2078
                         ri[WS(rs, 37)] = FNMS(KP881921264, Ter, Tec);
2079
                         ri[WS(rs, 5)] = FMA(KP881921264, Ter, Tec);
2080
                         Tkb = FMA(KP831469612, Tka, Tk9);
2081
                         Tkc = Tet + Teu;
2082
                         ii[WS(rs, 5)] = FMA(KP881921264, Tkc, Tkb);
2083
                         ii[WS(rs, 37)] = FNMS(KP881921264, Tkc, Tkb);
2084
                    }
2085
                    {
2086
                         E Tes, Tev, Tkd, Tke;
2087
                         Tes = FNMS(KP831469612, Teb, Te4);
2088
                         Tev = Tet - Teu;
2089
                         ri[WS(rs, 53)] = FNMS(KP881921264, Tev, Tes);
2090
                         ri[WS(rs, 21)] = FMA(KP881921264, Tev, Tes);
2091
                         Tkd = FNMS(KP831469612, Tka, Tk9);
2092
                         Tke = Teq - Tej;
2093
                         ii[WS(rs, 21)] = FMA(KP881921264, Tke, Tkd);
2094
                         ii[WS(rs, 53)] = FNMS(KP881921264, Tke, Tkd);
2095
                    }
2096
                    {
2097
                         E TeA, TeH, Tkh, Tki;
2098
                         TeA = FNMS(KP831469612, Tez, Tew);
2099
                         TeH = TeD - TeG;
2100
                         ri[WS(rs, 45)] = FNMS(KP956940335, TeH, TeA);
2101
                         ri[WS(rs, 13)] = FMA(KP956940335, TeH, TeA);
2102
                         Tkh = FNMS(KP831469612, Tkg, Tkf);
2103
                         Tki = TeK - TeJ;
2104
                         ii[WS(rs, 13)] = FMA(KP956940335, Tki, Tkh);
2105
                         ii[WS(rs, 45)] = FNMS(KP956940335, Tki, Tkh);
2106
                    }
2107
                    {
2108
                         E TeI, TeL, Tkj, Tkk;
2109
                         TeI = FMA(KP831469612, Tez, Tew);
2110
                         TeL = TeJ + TeK;
2111
                         ri[WS(rs, 29)] = FNMS(KP956940335, TeL, TeI);
2112
                         ri[WS(rs, 61)] = FMA(KP956940335, TeL, TeI);
2113
                         Tkj = FMA(KP831469612, Tkg, Tkf);
2114
                         Tkk = TeD + TeG;
2115
                         ii[WS(rs, 29)] = FNMS(KP956940335, Tkk, Tkj);
2116
                         ii[WS(rs, 61)] = FMA(KP956940335, Tkk, Tkj);
2117
                    }
2118
               }
2119
          }
2120
     }
2121
}
2122

    
2123
static const tw_instr twinstr[] = {
2124
     {TW_FULL, 0, 64},
2125
     {TW_NEXT, 1, 0}
2126
};
2127

    
2128
static const ct_desc desc = { 64, "t1_64", twinstr, &GENUS, {520, 126, 518, 0}, 0, 0, 0 };
2129

    
2130
void X(codelet_t1_64) (planner *p) {
2131
     X(kdft_dit_register) (p, t1_64, &desc);
2132
}
2133
#else
2134

    
2135
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -n 64 -name t1_64 -include dft/scalar/t.h */
2136

    
2137
/*
2138
 * This function contains 1038 FP additions, 500 FP multiplications,
2139
 * (or, 808 additions, 270 multiplications, 230 fused multiply/add),
2140
 * 176 stack variables, 15 constants, and 256 memory accesses
2141
 */
2142
#include "dft/scalar/t.h"
2143

    
2144
static void t1_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
2145
{
2146
     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
2147
     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
2148
     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
2149
     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
2150
     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
2151
     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
2152
     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
2153
     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
2154
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
2155
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
2156
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
2157
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
2158
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
2159
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
2160
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
2161
     {
2162
          INT m;
2163
          for (m = mb, W = W + (mb * 126); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 126, MAKE_VOLATILE_STRIDE(128, rs)) {
2164
               E Tj, TcL, ThT, Tin, T6b, Taz, TgT, Thn, TG, Thm, TcO, TgO, T6m, ThQ, TaC;
2165
               E Tim, T14, Tfq, T6y, T9O, TaG, Tc0, TcU, TeE, T1r, Tfr, T6J, T9P, TaJ, Tc1;
2166
               E TcZ, TeF, T1Q, T2d, Tfx, Tfu, Tfv, Tfw, T6Q, TaM, Tdb, TeJ, T71, TaQ, T7a;
2167
               E TaN, Td6, TeI, T77, TaP, T2B, T2Y, Tfz, TfA, TfB, TfC, T7h, TaW, Tdm, TeM;
2168
               E T7s, TaU, T7B, TaX, Tdh, TeL, T7y, TaT, T5j, TfR, Tec, Tf0, TfY, Tgy, T8D;
2169
               E Tbl, T8O, Tbx, T9l, Tbm, TdV, TeX, T9i, Tbw, T3M, TfL, TdL, TeQ, TfI, Tgt;
2170
               E T7K, Tb2, T7V, Tbe, T8s, Tb3, Tdu, TeT, T8p, Tbd, T4x, TfJ, TdE, TdM, TfO;
2171
               E Tgu, T87, T8v, T8i, T8u, Tba, Tbg, Tdz, TdN, Tb7, Tbh, T64, TfZ, Te5, Ted;
2172
               E TfU, Tgz, T90, T9o, T9b, T9n, Tbt, Tbz, Te0, Tee, Tbq, TbA;
2173
               {
2174
                    E T1, TgR, T6, TgQ, Tc, T68, Th, T69;
2175
                    T1 = ri[0];
2176
                    TgR = ii[0];
2177
                    {
2178
                         E T3, T5, T2, T4;
2179
                         T3 = ri[WS(rs, 32)];
2180
                         T5 = ii[WS(rs, 32)];
2181
                         T2 = W[62];
2182
                         T4 = W[63];
2183
                         T6 = FMA(T2, T3, T4 * T5);
2184
                         TgQ = FNMS(T4, T3, T2 * T5);
2185
                    }
2186
                    {
2187
                         E T9, Tb, T8, Ta;
2188
                         T9 = ri[WS(rs, 16)];
2189
                         Tb = ii[WS(rs, 16)];
2190
                         T8 = W[30];
2191
                         Ta = W[31];
2192
                         Tc = FMA(T8, T9, Ta * Tb);
2193
                         T68 = FNMS(Ta, T9, T8 * Tb);
2194
                    }
2195
                    {
2196
                         E Te, Tg, Td, Tf;
2197
                         Te = ri[WS(rs, 48)];
2198
                         Tg = ii[WS(rs, 48)];
2199
                         Td = W[94];
2200
                         Tf = W[95];
2201
                         Th = FMA(Td, Te, Tf * Tg);
2202
                         T69 = FNMS(Tf, Te, Td * Tg);
2203
                    }
2204
                    {
2205
                         E T7, Ti, ThR, ThS;
2206
                         T7 = T1 + T6;
2207
                         Ti = Tc + Th;
2208
                         Tj = T7 + Ti;
2209
                         TcL = T7 - Ti;
2210
                         ThR = TgR - TgQ;
2211
                         ThS = Tc - Th;
2212
                         ThT = ThR - ThS;
2213
                         Tin = ThS + ThR;
2214
                    }
2215
                    {
2216
                         E T67, T6a, TgP, TgS;
2217
                         T67 = T1 - T6;
2218
                         T6a = T68 - T69;
2219
                         T6b = T67 - T6a;
2220
                         Taz = T67 + T6a;
2221
                         TgP = T68 + T69;
2222
                         TgS = TgQ + TgR;
2223
                         TgT = TgP + TgS;
2224
                         Thn = TgS - TgP;
2225
                    }
2226
               }
2227
               {
2228
                    E To, T6c, Tt, T6d, T6e, T6f, Tz, T6i, TE, T6j, T6h, T6k;
2229
                    {
2230
                         E Tl, Tn, Tk, Tm;
2231
                         Tl = ri[WS(rs, 8)];
2232
                         Tn = ii[WS(rs, 8)];
2233
                         Tk = W[14];
2234
                         Tm = W[15];
2235
                         To = FMA(Tk, Tl, Tm * Tn);
2236
                         T6c = FNMS(Tm, Tl, Tk * Tn);
2237
                    }
2238
                    {
2239
                         E Tq, Ts, Tp, Tr;
2240
                         Tq = ri[WS(rs, 40)];
2241
                         Ts = ii[WS(rs, 40)];
2242
                         Tp = W[78];
2243
                         Tr = W[79];
2244
                         Tt = FMA(Tp, Tq, Tr * Ts);
2245
                         T6d = FNMS(Tr, Tq, Tp * Ts);
2246
                    }
2247
                    T6e = T6c - T6d;
2248
                    T6f = To - Tt;
2249
                    {
2250
                         E Tw, Ty, Tv, Tx;
2251
                         Tw = ri[WS(rs, 56)];
2252
                         Ty = ii[WS(rs, 56)];
2253
                         Tv = W[110];
2254
                         Tx = W[111];
2255
                         Tz = FMA(Tv, Tw, Tx * Ty);
2256
                         T6i = FNMS(Tx, Tw, Tv * Ty);
2257
                    }
2258
                    {
2259
                         E TB, TD, TA, TC;
2260
                         TB = ri[WS(rs, 24)];
2261
                         TD = ii[WS(rs, 24)];
2262
                         TA = W[46];
2263
                         TC = W[47];
2264
                         TE = FMA(TA, TB, TC * TD);
2265
                         T6j = FNMS(TC, TB, TA * TD);
2266
                    }
2267
                    T6h = Tz - TE;
2268
                    T6k = T6i - T6j;
2269
                    {
2270
                         E Tu, TF, TcM, TcN;
2271
                         Tu = To + Tt;
2272
                         TF = Tz + TE;
2273
                         TG = Tu + TF;
2274
                         Thm = TF - Tu;
2275
                         TcM = T6c + T6d;
2276
                         TcN = T6i + T6j;
2277
                         TcO = TcM - TcN;
2278
                         TgO = TcM + TcN;
2279
                    }
2280
                    {
2281
                         E T6g, T6l, TaA, TaB;
2282
                         T6g = T6e - T6f;
2283
                         T6l = T6h + T6k;
2284
                         T6m = KP707106781 * (T6g - T6l);
2285
                         ThQ = KP707106781 * (T6g + T6l);
2286
                         TaA = T6f + T6e;
2287
                         TaB = T6h - T6k;
2288
                         TaC = KP707106781 * (TaA + TaB);
2289
                         Tim = KP707106781 * (TaB - TaA);
2290
                    }
2291
               }
2292
               {
2293
                    E TS, TcQ, T6q, T6t, T13, TcR, T6r, T6w, T6s, T6x;
2294
                    {
2295
                         E TM, T6o, TR, T6p;
2296
                         {
2297
                              E TJ, TL, TI, TK;
2298
                              TJ = ri[WS(rs, 4)];
2299
                              TL = ii[WS(rs, 4)];
2300
                              TI = W[6];
2301
                              TK = W[7];
2302
                              TM = FMA(TI, TJ, TK * TL);
2303
                              T6o = FNMS(TK, TJ, TI * TL);
2304
                         }
2305
                         {
2306
                              E TO, TQ, TN, TP;
2307
                              TO = ri[WS(rs, 36)];
2308
                              TQ = ii[WS(rs, 36)];
2309
                              TN = W[70];
2310
                              TP = W[71];
2311
                              TR = FMA(TN, TO, TP * TQ);
2312
                              T6p = FNMS(TP, TO, TN * TQ);
2313
                         }
2314
                         TS = TM + TR;
2315
                         TcQ = T6o + T6p;
2316
                         T6q = T6o - T6p;
2317
                         T6t = TM - TR;
2318
                    }
2319
                    {
2320
                         E TX, T6u, T12, T6v;
2321
                         {
2322
                              E TU, TW, TT, TV;
2323
                              TU = ri[WS(rs, 20)];
2324
                              TW = ii[WS(rs, 20)];
2325
                              TT = W[38];
2326
                              TV = W[39];
2327
                              TX = FMA(TT, TU, TV * TW);
2328
                              T6u = FNMS(TV, TU, TT * TW);
2329
                         }
2330
                         {
2331
                              E TZ, T11, TY, T10;
2332
                              TZ = ri[WS(rs, 52)];
2333
                              T11 = ii[WS(rs, 52)];
2334
                              TY = W[102];
2335
                              T10 = W[103];
2336
                              T12 = FMA(TY, TZ, T10 * T11);
2337
                              T6v = FNMS(T10, TZ, TY * T11);
2338
                         }
2339
                         T13 = TX + T12;
2340
                         TcR = T6u + T6v;
2341
                         T6r = TX - T12;
2342
                         T6w = T6u - T6v;
2343
                    }
2344
                    T14 = TS + T13;
2345
                    Tfq = TcQ + TcR;
2346
                    T6s = T6q + T6r;
2347
                    T6x = T6t - T6w;
2348
                    T6y = FNMS(KP923879532, T6x, KP382683432 * T6s);
2349
                    T9O = FMA(KP923879532, T6s, KP382683432 * T6x);
2350
                    {
2351
                         E TaE, TaF, TcS, TcT;
2352
                         TaE = T6q - T6r;
2353
                         TaF = T6t + T6w;
2354
                         TaG = FNMS(KP382683432, TaF, KP923879532 * TaE);
2355
                         Tc0 = FMA(KP382683432, TaE, KP923879532 * TaF);
2356
                         TcS = TcQ - TcR;
2357
                         TcT = TS - T13;
2358
                         TcU = TcS - TcT;
2359
                         TeE = TcT + TcS;
2360
                    }
2361
               }
2362
               {
2363
                    E T1f, TcW, T6B, T6E, T1q, TcX, T6C, T6H, T6D, T6I;
2364
                    {
2365
                         E T19, T6z, T1e, T6A;
2366
                         {
2367
                              E T16, T18, T15, T17;
2368
                              T16 = ri[WS(rs, 60)];
2369
                              T18 = ii[WS(rs, 60)];
2370
                              T15 = W[118];
2371
                              T17 = W[119];
2372
                              T19 = FMA(T15, T16, T17 * T18);
2373
                              T6z = FNMS(T17, T16, T15 * T18);
2374
                         }
2375
                         {
2376
                              E T1b, T1d, T1a, T1c;
2377
                              T1b = ri[WS(rs, 28)];
2378
                              T1d = ii[WS(rs, 28)];
2379
                              T1a = W[54];
2380
                              T1c = W[55];
2381
                              T1e = FMA(T1a, T1b, T1c * T1d);
2382
                              T6A = FNMS(T1c, T1b, T1a * T1d);
2383
                         }
2384
                         T1f = T19 + T1e;
2385
                         TcW = T6z + T6A;
2386
                         T6B = T6z - T6A;
2387
                         T6E = T19 - T1e;
2388
                    }
2389
                    {
2390
                         E T1k, T6F, T1p, T6G;
2391
                         {
2392
                              E T1h, T1j, T1g, T1i;
2393
                              T1h = ri[WS(rs, 12)];
2394
                              T1j = ii[WS(rs, 12)];
2395
                              T1g = W[22];
2396
                              T1i = W[23];
2397
                              T1k = FMA(T1g, T1h, T1i * T1j);
2398
                              T6F = FNMS(T1i, T1h, T1g * T1j);
2399
                         }
2400
                         {
2401
                              E T1m, T1o, T1l, T1n;
2402
                              T1m = ri[WS(rs, 44)];
2403
                              T1o = ii[WS(rs, 44)];
2404
                              T1l = W[86];
2405
                              T1n = W[87];
2406
                              T1p = FMA(T1l, T1m, T1n * T1o);
2407
                              T6G = FNMS(T1n, T1m, T1l * T1o);
2408
                         }
2409
                         T1q = T1k + T1p;
2410
                         TcX = T6F + T6G;
2411
                         T6C = T1k - T1p;
2412
                         T6H = T6F - T6G;
2413
                    }
2414
                    T1r = T1f + T1q;
2415
                    Tfr = TcW + TcX;
2416
                    T6D = T6B + T6C;
2417
                    T6I = T6E - T6H;
2418
                    T6J = FMA(KP382683432, T6D, KP923879532 * T6I);
2419
                    T9P = FNMS(KP923879532, T6D, KP382683432 * T6I);
2420
                    {
2421
                         E TaH, TaI, TcV, TcY;
2422
                         TaH = T6B - T6C;
2423
                         TaI = T6E + T6H;
2424
                         TaJ = FMA(KP923879532, TaH, KP382683432 * TaI);
2425
                         Tc1 = FNMS(KP382683432, TaH, KP923879532 * TaI);
2426
                         TcV = T1f - T1q;
2427
                         TcY = TcW - TcX;
2428
                         TcZ = TcV + TcY;
2429
                         TeF = TcV - TcY;
2430
                    }
2431
               }
2432
               {
2433
                    E T1y, T6M, T1D, T6N, T1E, Td2, T1J, T74, T1O, T75, T1P, Td3, T21, Td8, T6W;
2434
                    E T6Z, T2c, Td9, T6R, T6U;
2435
                    {
2436
                         E T1v, T1x, T1u, T1w;
2437
                         T1v = ri[WS(rs, 2)];
2438
                         T1x = ii[WS(rs, 2)];
2439
                         T1u = W[2];
2440
                         T1w = W[3];
2441
                         T1y = FMA(T1u, T1v, T1w * T1x);
2442
                         T6M = FNMS(T1w, T1v, T1u * T1x);
2443
                    }
2444
                    {
2445
                         E T1A, T1C, T1z, T1B;
2446
                         T1A = ri[WS(rs, 34)];
2447
                         T1C = ii[WS(rs, 34)];
2448
                         T1z = W[66];
2449
                         T1B = W[67];
2450
                         T1D = FMA(T1z, T1A, T1B * T1C);
2451
                         T6N = FNMS(T1B, T1A, T1z * T1C);
2452
                    }
2453
                    T1E = T1y + T1D;
2454
                    Td2 = T6M + T6N;
2455
                    {
2456
                         E T1G, T1I, T1F, T1H;
2457
                         T1G = ri[WS(rs, 18)];
2458
                         T1I = ii[WS(rs, 18)];
2459
                         T1F = W[34];
2460
                         T1H = W[35];
2461
                         T1J = FMA(T1F, T1G, T1H * T1I);
2462
                         T74 = FNMS(T1H, T1G, T1F * T1I);
2463
                    }
2464
                    {
2465
                         E T1L, T1N, T1K, T1M;
2466
                         T1L = ri[WS(rs, 50)];
2467
                         T1N = ii[WS(rs, 50)];
2468
                         T1K = W[98];
2469
                         T1M = W[99];
2470
                         T1O = FMA(T1K, T1L, T1M * T1N);
2471
                         T75 = FNMS(T1M, T1L, T1K * T1N);
2472
                    }
2473
                    T1P = T1J + T1O;
2474
                    Td3 = T74 + T75;
2475
                    {
2476
                         E T1V, T6X, T20, T6Y;
2477
                         {
2478
                              E T1S, T1U, T1R, T1T;
2479
                              T1S = ri[WS(rs, 10)];
2480
                              T1U = ii[WS(rs, 10)];
2481
                              T1R = W[18];
2482
                              T1T = W[19];
2483
                              T1V = FMA(T1R, T1S, T1T * T1U);
2484
                              T6X = FNMS(T1T, T1S, T1R * T1U);
2485
                         }
2486
                         {
2487
                              E T1X, T1Z, T1W, T1Y;
2488
                              T1X = ri[WS(rs, 42)];
2489
                              T1Z = ii[WS(rs, 42)];
2490
                              T1W = W[82];
2491
                              T1Y = W[83];
2492
                              T20 = FMA(T1W, T1X, T1Y * T1Z);
2493
                              T6Y = FNMS(T1Y, T1X, T1W * T1Z);
2494
                         }
2495
                         T21 = T1V + T20;
2496
                         Td8 = T6X + T6Y;
2497
                         T6W = T1V - T20;
2498
                         T6Z = T6X - T6Y;
2499
                    }
2500
                    {
2501
                         E T26, T6S, T2b, T6T;
2502
                         {
2503
                              E T23, T25, T22, T24;
2504
                              T23 = ri[WS(rs, 58)];
2505
                              T25 = ii[WS(rs, 58)];
2506
                              T22 = W[114];
2507
                              T24 = W[115];
2508
                              T26 = FMA(T22, T23, T24 * T25);
2509
                              T6S = FNMS(T24, T23, T22 * T25);
2510
                         }
2511
                         {
2512
                              E T28, T2a, T27, T29;
2513
                              T28 = ri[WS(rs, 26)];
2514
                              T2a = ii[WS(rs, 26)];
2515
                              T27 = W[50];
2516
                              T29 = W[51];
2517
                              T2b = FMA(T27, T28, T29 * T2a);
2518
                              T6T = FNMS(T29, T28, T27 * T2a);
2519
                         }
2520
                         T2c = T26 + T2b;
2521
                         Td9 = T6S + T6T;
2522
                         T6R = T26 - T2b;
2523
                         T6U = T6S - T6T;
2524
                    }
2525
                    T1Q = T1E + T1P;
2526
                    T2d = T21 + T2c;
2527
                    Tfx = T1Q - T2d;
2528
                    Tfu = Td2 + Td3;
2529
                    Tfv = Td8 + Td9;
2530
                    Tfw = Tfu - Tfv;
2531
                    {
2532
                         E T6O, T6P, Td7, Tda;
2533
                         T6O = T6M - T6N;
2534
                         T6P = T1J - T1O;
2535
                         T6Q = T6O + T6P;
2536
                         TaM = T6O - T6P;
2537
                         Td7 = T1E - T1P;
2538
                         Tda = Td8 - Td9;
2539
                         Tdb = Td7 - Tda;
2540
                         TeJ = Td7 + Tda;
2541
                    }
2542
                    {
2543
                         E T6V, T70, T78, T79;
2544
                         T6V = T6R - T6U;
2545
                         T70 = T6W + T6Z;
2546
                         T71 = KP707106781 * (T6V - T70);
2547
                         TaQ = KP707106781 * (T70 + T6V);
2548
                         T78 = T6Z - T6W;
2549
                         T79 = T6R + T6U;
2550
                         T7a = KP707106781 * (T78 - T79);
2551
                         TaN = KP707106781 * (T78 + T79);
2552
                    }
2553
                    {
2554
                         E Td4, Td5, T73, T76;
2555
                         Td4 = Td2 - Td3;
2556
                         Td5 = T2c - T21;
2557
                         Td6 = Td4 - Td5;
2558
                         TeI = Td4 + Td5;
2559
                         T73 = T1y - T1D;
2560
                         T76 = T74 - T75;
2561
                         T77 = T73 - T76;
2562
                         TaP = T73 + T76;
2563
                    }
2564
               }
2565
               {
2566
                    E T2j, T7d, T2o, T7e, T2p, Tdd, T2u, T7v, T2z, T7w, T2A, Tde, T2M, Tdj, T7n;
2567
                    E T7q, T2X, Tdk, T7i, T7l;
2568
                    {
2569
                         E T2g, T2i, T2f, T2h;
2570
                         T2g = ri[WS(rs, 62)];
2571
                         T2i = ii[WS(rs, 62)];
2572
                         T2f = W[122];
2573
                         T2h = W[123];
2574
                         T2j = FMA(T2f, T2g, T2h * T2i);
2575
                         T7d = FNMS(T2h, T2g, T2f * T2i);
2576
                    }
2577
                    {
2578
                         E T2l, T2n, T2k, T2m;
2579
                         T2l = ri[WS(rs, 30)];
2580
                         T2n = ii[WS(rs, 30)];
2581
                         T2k = W[58];
2582
                         T2m = W[59];
2583
                         T2o = FMA(T2k, T2l, T2m * T2n);
2584
                         T7e = FNMS(T2m, T2l, T2k * T2n);
2585
                    }
2586
                    T2p = T2j + T2o;
2587
                    Tdd = T7d + T7e;
2588
                    {
2589
                         E T2r, T2t, T2q, T2s;
2590
                         T2r = ri[WS(rs, 14)];
2591
                         T2t = ii[WS(rs, 14)];
2592
                         T2q = W[26];
2593
                         T2s = W[27];
2594
                         T2u = FMA(T2q, T2r, T2s * T2t);
2595
                         T7v = FNMS(T2s, T2r, T2q * T2t);
2596
                    }
2597
                    {
2598
                         E T2w, T2y, T2v, T2x;
2599
                         T2w = ri[WS(rs, 46)];
2600
                         T2y = ii[WS(rs, 46)];
2601
                         T2v = W[90];
2602
                         T2x = W[91];
2603
                         T2z = FMA(T2v, T2w, T2x * T2y);
2604
                         T7w = FNMS(T2x, T2w, T2v * T2y);
2605
                    }
2606
                    T2A = T2u + T2z;
2607
                    Tde = T7v + T7w;
2608
                    {
2609
                         E T2G, T7o, T2L, T7p;
2610
                         {
2611
                              E T2D, T2F, T2C, T2E;
2612
                              T2D = ri[WS(rs, 6)];
2613
                              T2F = ii[WS(rs, 6)];
2614
                              T2C = W[10];
2615
                              T2E = W[11];
2616
                              T2G = FMA(T2C, T2D, T2E * T2F);
2617
                              T7o = FNMS(T2E, T2D, T2C * T2F);
2618
                         }
2619
                         {
2620
                              E T2I, T2K, T2H, T2J;
2621
                              T2I = ri[WS(rs, 38)];
2622
                              T2K = ii[WS(rs, 38)];
2623
                              T2H = W[74];
2624
                              T2J = W[75];
2625
                              T2L = FMA(T2H, T2I, T2J * T2K);
2626
                              T7p = FNMS(T2J, T2I, T2H * T2K);
2627
                         }
2628
                         T2M = T2G + T2L;
2629
                         Tdj = T7o + T7p;
2630
                         T7n = T2G - T2L;
2631
                         T7q = T7o - T7p;
2632
                    }
2633
                    {
2634
                         E T2R, T7j, T2W, T7k;
2635
                         {
2636
                              E T2O, T2Q, T2N, T2P;
2637
                              T2O = ri[WS(rs, 54)];
2638
                              T2Q = ii[WS(rs, 54)];
2639
                              T2N = W[106];
2640
                              T2P = W[107];
2641
                              T2R = FMA(T2N, T2O, T2P * T2Q);
2642
                              T7j = FNMS(T2P, T2O, T2N * T2Q);
2643
                         }
2644
                         {
2645
                              E T2T, T2V, T2S, T2U;
2646
                              T2T = ri[WS(rs, 22)];
2647
                              T2V = ii[WS(rs, 22)];
2648
                              T2S = W[42];
2649
                              T2U = W[43];
2650
                              T2W = FMA(T2S, T2T, T2U * T2V);
2651
                              T7k = FNMS(T2U, T2T, T2S * T2V);
2652
                         }
2653
                         T2X = T2R + T2W;
2654
                         Tdk = T7j + T7k;
2655
                         T7i = T2R - T2W;
2656
                         T7l = T7j - T7k;
2657
                    }
2658
                    T2B = T2p + T2A;
2659
                    T2Y = T2M + T2X;
2660
                    Tfz = T2B - T2Y;
2661
                    TfA = Tdd + Tde;
2662
                    TfB = Tdj + Tdk;
2663
                    TfC = TfA - TfB;
2664
                    {
2665
                         E T7f, T7g, Tdi, Tdl;
2666
                         T7f = T7d - T7e;
2667
                         T7g = T2u - T2z;
2668
                         T7h = T7f + T7g;
2669
                         TaW = T7f - T7g;
2670
                         Tdi = T2p - T2A;
2671
                         Tdl = Tdj - Tdk;
2672
                         Tdm = Tdi - Tdl;
2673
                         TeM = Tdi + Tdl;
2674
                    }
2675
                    {
2676
                         E T7m, T7r, T7z, T7A;
2677
                         T7m = T7i - T7l;
2678
                         T7r = T7n + T7q;
2679
                         T7s = KP707106781 * (T7m - T7r);
2680
                         TaU = KP707106781 * (T7r + T7m);
2681
                         T7z = T7q - T7n;
2682
                         T7A = T7i + T7l;
2683
                         T7B = KP707106781 * (T7z - T7A);
2684
                         TaX = KP707106781 * (T7z + T7A);
2685
                    }
2686
                    {
2687
                         E Tdf, Tdg, T7u, T7x;
2688
                         Tdf = Tdd - Tde;
2689
                         Tdg = T2X - T2M;
2690
                         Tdh = Tdf - Tdg;
2691
                         TeL = Tdf + Tdg;
2692
                         T7u = T2j - T2o;
2693
                         T7x = T7v - T7w;
2694
                         T7y = T7u - T7x;
2695
                         TaT = T7u + T7x;
2696
                    }
2697
               }
2698
               {
2699
                    E T4D, T9e, T4I, T9f, T4J, Te8, T4O, T8A, T4T, T8B, T4U, Te9, T56, TdS, T8G;
2700
                    E T8H, T5h, TdT, T8J, T8M;
2701
                    {
2702
                         E T4A, T4C, T4z, T4B;
2703
                         T4A = ri[WS(rs, 63)];
2704
                         T4C = ii[WS(rs, 63)];
2705
                         T4z = W[124];
2706
                         T4B = W[125];
2707
                         T4D = FMA(T4z, T4A, T4B * T4C);
2708
                         T9e = FNMS(T4B, T4A, T4z * T4C);
2709
                    }
2710
                    {
2711
                         E T4F, T4H, T4E, T4G;
2712
                         T4F = ri[WS(rs, 31)];
2713
                         T4H = ii[WS(rs, 31)];
2714
                         T4E = W[60];
2715
                         T4G = W[61];
2716
                         T4I = FMA(T4E, T4F, T4G * T4H);
2717
                         T9f = FNMS(T4G, T4F, T4E * T4H);
2718
                    }
2719
                    T4J = T4D + T4I;
2720
                    Te8 = T9e + T9f;
2721
                    {
2722
                         E T4L, T4N, T4K, T4M;
2723
                         T4L = ri[WS(rs, 15)];
2724
                         T4N = ii[WS(rs, 15)];
2725
                         T4K = W[28];
2726
                         T4M = W[29];
2727
                         T4O = FMA(T4K, T4L, T4M * T4N);
2728
                         T8A = FNMS(T4M, T4L, T4K * T4N);
2729
                    }
2730
                    {
2731
                         E T4Q, T4S, T4P, T4R;
2732
                         T4Q = ri[WS(rs, 47)];
2733
                         T4S = ii[WS(rs, 47)];
2734
                         T4P = W[92];
2735
                         T4R = W[93];
2736
                         T4T = FMA(T4P, T4Q, T4R * T4S);
2737
                         T8B = FNMS(T4R, T4Q, T4P * T4S);
2738
                    }
2739
                    T4U = T4O + T4T;
2740
                    Te9 = T8A + T8B;
2741
                    {
2742
                         E T50, T8E, T55, T8F;
2743
                         {
2744
                              E T4X, T4Z, T4W, T4Y;
2745
                              T4X = ri[WS(rs, 7)];
2746
                              T4Z = ii[WS(rs, 7)];
2747
                              T4W = W[12];
2748
                              T4Y = W[13];
2749
                              T50 = FMA(T4W, T4X, T4Y * T4Z);
2750
                              T8E = FNMS(T4Y, T4X, T4W * T4Z);
2751
                         }
2752
                         {
2753
                              E T52, T54, T51, T53;
2754
                              T52 = ri[WS(rs, 39)];
2755
                              T54 = ii[WS(rs, 39)];
2756
                              T51 = W[76];
2757
                              T53 = W[77];
2758
                              T55 = FMA(T51, T52, T53 * T54);
2759
                              T8F = FNMS(T53, T52, T51 * T54);
2760
                         }
2761
                         T56 = T50 + T55;
2762
                         TdS = T8E + T8F;
2763
                         T8G = T8E - T8F;
2764
                         T8H = T50 - T55;
2765
                    }
2766
                    {
2767
                         E T5b, T8K, T5g, T8L;
2768
                         {
2769
                              E T58, T5a, T57, T59;
2770
                              T58 = ri[WS(rs, 55)];
2771
                              T5a = ii[WS(rs, 55)];
2772
                              T57 = W[108];
2773
                              T59 = W[109];
2774
                              T5b = FMA(T57, T58, T59 * T5a);
2775
                              T8K = FNMS(T59, T58, T57 * T5a);
2776
                         }
2777
                         {
2778
                              E T5d, T5f, T5c, T5e;
2779
                              T5d = ri[WS(rs, 23)];
2780
                              T5f = ii[WS(rs, 23)];
2781
                              T5c = W[44];
2782
                              T5e = W[45];
2783
                              T5g = FMA(T5c, T5d, T5e * T5f);
2784
                              T8L = FNMS(T5e, T5d, T5c * T5f);
2785
                         }
2786
                         T5h = T5b + T5g;
2787
                         TdT = T8K + T8L;
2788
                         T8J = T5b - T5g;
2789
                         T8M = T8K - T8L;
2790
                    }
2791
                    {
2792
                         E T4V, T5i, Tea, Teb;
2793
                         T4V = T4J + T4U;
2794
                         T5i = T56 + T5h;
2795
                         T5j = T4V + T5i;
2796
                         TfR = T4V - T5i;
2797
                         Tea = Te8 - Te9;
2798
                         Teb = T5h - T56;
2799
                         Tec = Tea - Teb;
2800
                         Tf0 = Tea + Teb;
2801
                    }
2802
                    {
2803
                         E TfW, TfX, T8z, T8C;
2804
                         TfW = Te8 + Te9;
2805
                         TfX = TdS + TdT;
2806
                         TfY = TfW - TfX;
2807
                         Tgy = TfW + TfX;
2808
                         T8z = T4D - T4I;
2809
                         T8C = T8A - T8B;
2810
                         T8D = T8z - T8C;
2811
                         Tbl = T8z + T8C;
2812
                    }
2813
                    {
2814
                         E T8I, T8N, T9j, T9k;
2815
                         T8I = T8G - T8H;
2816
                         T8N = T8J + T8M;
2817
                         T8O = KP707106781 * (T8I - T8N);
2818
                         Tbx = KP707106781 * (T8I + T8N);
2819
                         T9j = T8J - T8M;
2820
                         T9k = T8H + T8G;
2821
                         T9l = KP707106781 * (T9j - T9k);
2822
                         Tbm = KP707106781 * (T9k + T9j);
2823
                    }
2824
                    {
2825
                         E TdR, TdU, T9g, T9h;
2826
                         TdR = T4J - T4U;
2827
                         TdU = TdS - TdT;
2828
                         TdV = TdR - TdU;
2829
                         TeX = TdR + TdU;
2830
                         T9g = T9e - T9f;
2831
                         T9h = T4O - T4T;
2832
                         T9i = T9g + T9h;
2833
                         Tbw = T9g - T9h;
2834
                    }
2835
               }
2836
               {
2837
                    E T36, T7G, T3b, T7H, T3c, Tdq, T3h, T8m, T3m, T8n, T3n, Tdr, T3z, TdI, T7Q;
2838
                    E T7T, T3K, TdJ, T7L, T7O;
2839
                    {
2840
                         E T33, T35, T32, T34;
2841
                         T33 = ri[WS(rs, 1)];
2842
                         T35 = ii[WS(rs, 1)];
2843
                         T32 = W[0];
2844
                         T34 = W[1];
2845
                         T36 = FMA(T32, T33, T34 * T35);
2846
                         T7G = FNMS(T34, T33, T32 * T35);
2847
                    }
2848
                    {
2849
                         E T38, T3a, T37, T39;
2850
                         T38 = ri[WS(rs, 33)];
2851
                         T3a = ii[WS(rs, 33)];
2852
                         T37 = W[64];
2853
                         T39 = W[65];
2854
                         T3b = FMA(T37, T38, T39 * T3a);
2855
                         T7H = FNMS(T39, T38, T37 * T3a);
2856
                    }
2857
                    T3c = T36 + T3b;
2858
                    Tdq = T7G + T7H;
2859
                    {
2860
                         E T3e, T3g, T3d, T3f;
2861
                         T3e = ri[WS(rs, 17)];
2862
                         T3g = ii[WS(rs, 17)];
2863
                         T3d = W[32];
2864
                         T3f = W[33];
2865
                         T3h = FMA(T3d, T3e, T3f * T3g);
2866
                         T8m = FNMS(T3f, T3e, T3d * T3g);
2867
                    }
2868
                    {
2869
                         E T3j, T3l, T3i, T3k;
2870
                         T3j = ri[WS(rs, 49)];
2871
                         T3l = ii[WS(rs, 49)];
2872
                         T3i = W[96];
2873
                         T3k = W[97];
2874
                         T3m = FMA(T3i, T3j, T3k * T3l);
2875
                         T8n = FNMS(T3k, T3j, T3i * T3l);
2876
                    }
2877
                    T3n = T3h + T3m;
2878
                    Tdr = T8m + T8n;
2879
                    {
2880
                         E T3t, T7R, T3y, T7S;
2881
                         {
2882
                              E T3q, T3s, T3p, T3r;
2883
                              T3q = ri[WS(rs, 9)];
2884
                              T3s = ii[WS(rs, 9)];
2885
                              T3p = W[16];
2886
                              T3r = W[17];
2887
                              T3t = FMA(T3p, T3q, T3r * T3s);
2888
                              T7R = FNMS(T3r, T3q, T3p * T3s);
2889
                         }
2890
                         {
2891
                              E T3v, T3x, T3u, T3w;
2892
                              T3v = ri[WS(rs, 41)];
2893
                              T3x = ii[WS(rs, 41)];
2894
                              T3u = W[80];
2895
                              T3w = W[81];
2896
                              T3y = FMA(T3u, T3v, T3w * T3x);
2897
                              T7S = FNMS(T3w, T3v, T3u * T3x);
2898
                         }
2899
                         T3z = T3t + T3y;
2900
                         TdI = T7R + T7S;
2901
                         T7Q = T3t - T3y;
2902
                         T7T = T7R - T7S;
2903
                    }
2904
                    {
2905
                         E T3E, T7M, T3J, T7N;
2906
                         {
2907
                              E T3B, T3D, T3A, T3C;
2908
                              T3B = ri[WS(rs, 57)];
2909
                              T3D = ii[WS(rs, 57)];
2910
                              T3A = W[112];
2911
                              T3C = W[113];
2912
                              T3E = FMA(T3A, T3B, T3C * T3D);
2913
                              T7M = FNMS(T3C, T3B, T3A * T3D);
2914
                         }
2915
                         {
2916
                              E T3G, T3I, T3F, T3H;
2917
                              T3G = ri[WS(rs, 25)];
2918
                              T3I = ii[WS(rs, 25)];
2919
                              T3F = W[48];
2920
                              T3H = W[49];
2921
                              T3J = FMA(T3F, T3G, T3H * T3I);
2922
                              T7N = FNMS(T3H, T3G, T3F * T3I);
2923
                         }
2924
                         T3K = T3E + T3J;
2925
                         TdJ = T7M + T7N;
2926
                         T7L = T3E - T3J;
2927
                         T7O = T7M - T7N;
2928
                    }
2929
                    {
2930
                         E T3o, T3L, TdH, TdK;
2931
                         T3o = T3c + T3n;
2932
                         T3L = T3z + T3K;
2933
                         T3M = T3o + T3L;
2934
                         TfL = T3o - T3L;
2935
                         TdH = T3c - T3n;
2936
                         TdK = TdI - TdJ;
2937
                         TdL = TdH - TdK;
2938
                         TeQ = TdH + TdK;
2939
                    }
2940
                    {
2941
                         E TfG, TfH, T7I, T7J;
2942
                         TfG = Tdq + Tdr;
2943
                         TfH = TdI + TdJ;
2944
                         TfI = TfG - TfH;
2945
                         Tgt = TfG + TfH;
2946
                         T7I = T7G - T7H;
2947
                         T7J = T3h - T3m;
2948
                         T7K = T7I + T7J;
2949
                         Tb2 = T7I - T7J;
2950
                    }
2951
                    {
2952
                         E T7P, T7U, T8q, T8r;
2953
                         T7P = T7L - T7O;
2954
                         T7U = T7Q + T7T;
2955
                         T7V = KP707106781 * (T7P - T7U);
2956
                         Tbe = KP707106781 * (T7U + T7P);
2957
                         T8q = T7T - T7Q;
2958
                         T8r = T7L + T7O;
2959
                         T8s = KP707106781 * (T8q - T8r);
2960
                         Tb3 = KP707106781 * (T8q + T8r);
2961
                    }
2962
                    {
2963
                         E Tds, Tdt, T8l, T8o;
2964
                         Tds = Tdq - Tdr;
2965
                         Tdt = T3K - T3z;
2966
                         Tdu = Tds - Tdt;
2967
                         TeT = Tds + Tdt;
2968
                         T8l = T36 - T3b;
2969
                         T8o = T8m - T8n;
2970
                         T8p = T8l - T8o;
2971
                         Tbd = T8l + T8o;
2972
                    }
2973
               }
2974
               {
2975
                    E T3X, TdB, T8a, T8d, T4v, Tdx, T80, T85, T48, TdC, T8b, T8g, T4k, Tdw, T7X;
2976
                    E T84;
2977
                    {
2978
                         E T3R, T88, T3W, T89;
2979
                         {
2980
                              E T3O, T3Q, T3N, T3P;
2981
                              T3O = ri[WS(rs, 5)];
2982
                              T3Q = ii[WS(rs, 5)];
2983
                              T3N = W[8];
2984
                              T3P = W[9];
2985
                              T3R = FMA(T3N, T3O, T3P * T3Q);
2986
                              T88 = FNMS(T3P, T3O, T3N * T3Q);
2987
                         }
2988
                         {
2989
                              E T3T, T3V, T3S, T3U;
2990
                              T3T = ri[WS(rs, 37)];
2991
                              T3V = ii[WS(rs, 37)];
2992
                              T3S = W[72];
2993
                              T3U = W[73];
2994
                              T3W = FMA(T3S, T3T, T3U * T3V);
2995
                              T89 = FNMS(T3U, T3T, T3S * T3V);
2996
                         }
2997
                         T3X = T3R + T3W;
2998
                         TdB = T88 + T89;
2999
                         T8a = T88 - T89;
3000
                         T8d = T3R - T3W;
3001
                    }
3002
                    {
3003
                         E T4p, T7Y, T4u, T7Z;
3004
                         {
3005
                              E T4m, T4o, T4l, T4n;
3006
                              T4m = ri[WS(rs, 13)];
3007
                              T4o = ii[WS(rs, 13)];
3008
                              T4l = W[24];
3009
                              T4n = W[25];
3010
                              T4p = FMA(T4l, T4m, T4n * T4o);
3011
                              T7Y = FNMS(T4n, T4m, T4l * T4o);
3012
                         }
3013
                         {
3014
                              E T4r, T4t, T4q, T4s;
3015
                              T4r = ri[WS(rs, 45)];
3016
                              T4t = ii[WS(rs, 45)];
3017
                              T4q = W[88];
3018
                              T4s = W[89];
3019
                              T4u = FMA(T4q, T4r, T4s * T4t);
3020
                              T7Z = FNMS(T4s, T4r, T4q * T4t);
3021
                         }
3022
                         T4v = T4p + T4u;
3023
                         Tdx = T7Y + T7Z;
3024
                         T80 = T7Y - T7Z;
3025
                         T85 = T4p - T4u;
3026
                    }
3027
                    {
3028
                         E T42, T8e, T47, T8f;
3029
                         {
3030
                              E T3Z, T41, T3Y, T40;
3031
                              T3Z = ri[WS(rs, 21)];
3032
                              T41 = ii[WS(rs, 21)];
3033
                              T3Y = W[40];
3034
                              T40 = W[41];
3035
                              T42 = FMA(T3Y, T3Z, T40 * T41);
3036
                              T8e = FNMS(T40, T3Z, T3Y * T41);
3037
                         }
3038
                         {
3039
                              E T44, T46, T43, T45;
3040
                              T44 = ri[WS(rs, 53)];
3041
                              T46 = ii[WS(rs, 53)];
3042
                              T43 = W[104];
3043
                              T45 = W[105];
3044
                              T47 = FMA(T43, T44, T45 * T46);
3045
                              T8f = FNMS(T45, T44, T43 * T46);
3046
                         }
3047
                         T48 = T42 + T47;
3048
                         TdC = T8e + T8f;
3049
                         T8b = T42 - T47;
3050
                         T8g = T8e - T8f;
3051
                    }
3052
                    {
3053
                         E T4e, T82, T4j, T83;
3054
                         {
3055
                              E T4b, T4d, T4a, T4c;
3056
                              T4b = ri[WS(rs, 61)];
3057
                              T4d = ii[WS(rs, 61)];
3058
                              T4a = W[120];
3059
                              T4c = W[121];
3060
                              T4e = FMA(T4a, T4b, T4c * T4d);
3061
                              T82 = FNMS(T4c, T4b, T4a * T4d);
3062
                         }
3063
                         {
3064
                              E T4g, T4i, T4f, T4h;
3065
                              T4g = ri[WS(rs, 29)];
3066
                              T4i = ii[WS(rs, 29)];
3067
                              T4f = W[56];
3068
                              T4h = W[57];
3069
                              T4j = FMA(T4f, T4g, T4h * T4i);
3070
                              T83 = FNMS(T4h, T4g, T4f * T4i);
3071
                         }
3072
                         T4k = T4e + T4j;
3073
                         Tdw = T82 + T83;
3074
                         T7X = T4e - T4j;
3075
                         T84 = T82 - T83;
3076
                    }
3077
                    {
3078
                         E T49, T4w, TdA, TdD;
3079
                         T49 = T3X + T48;
3080
                         T4w = T4k + T4v;
3081
                         T4x = T49 + T4w;
3082
                         TfJ = T4w - T49;
3083
                         TdA = T3X - T48;
3084
                         TdD = TdB - TdC;
3085
                         TdE = TdA + TdD;
3086
                         TdM = TdD - TdA;
3087
                    }
3088
                    {
3089
                         E TfM, TfN, T81, T86;
3090
                         TfM = TdB + TdC;
3091
                         TfN = Tdw + Tdx;
3092
                         TfO = TfM - TfN;
3093
                         Tgu = TfM + TfN;
3094
                         T81 = T7X - T80;
3095
                         T86 = T84 + T85;
3096
                         T87 = FNMS(KP923879532, T86, KP382683432 * T81);
3097
                         T8v = FMA(KP382683432, T86, KP923879532 * T81);
3098
                    }
3099
                    {
3100
                         E T8c, T8h, Tb8, Tb9;
3101
                         T8c = T8a + T8b;
3102
                         T8h = T8d - T8g;
3103
                         T8i = FMA(KP923879532, T8c, KP382683432 * T8h);
3104
                         T8u = FNMS(KP923879532, T8h, KP382683432 * T8c);
3105
                         Tb8 = T8a - T8b;
3106
                         Tb9 = T8d + T8g;
3107
                         Tba = FMA(KP382683432, Tb8, KP923879532 * Tb9);
3108
                         Tbg = FNMS(KP382683432, Tb9, KP923879532 * Tb8);
3109
                    }
3110
                    {
3111
                         E Tdv, Tdy, Tb5, Tb6;
3112
                         Tdv = T4k - T4v;
3113
                         Tdy = Tdw - Tdx;
3114
                         Tdz = Tdv - Tdy;
3115
                         TdN = Tdv + Tdy;
3116
                         Tb5 = T7X + T80;
3117
                         Tb6 = T84 - T85;
3118
                         Tb7 = FNMS(KP382683432, Tb6, KP923879532 * Tb5);
3119
                         Tbh = FMA(KP923879532, Tb6, KP382683432 * Tb5);
3120
                    }
3121
               }
3122
               {
3123
                    E T5u, TdW, T8S, T8V, T62, Te3, T94, T99, T5F, TdX, T8T, T8Y, T5R, Te2, T93;
3124
                    E T96;
3125
                    {
3126
                         E T5o, T8Q, T5t, T8R;
3127
                         {
3128
                              E T5l, T5n, T5k, T5m;
3129
                              T5l = ri[WS(rs, 3)];
3130
                              T5n = ii[WS(rs, 3)];
3131
                              T5k = W[4];
3132
                              T5m = W[5];
3133
                              T5o = FMA(T5k, T5l, T5m * T5n);
3134
                              T8Q = FNMS(T5m, T5l, T5k * T5n);
3135
                         }
3136
                         {
3137
                              E T5q, T5s, T5p, T5r;
3138
                              T5q = ri[WS(rs, 35)];
3139
                              T5s = ii[WS(rs, 35)];
3140
                              T5p = W[68];
3141
                              T5r = W[69];
3142
                              T5t = FMA(T5p, T5q, T5r * T5s);
3143
                              T8R = FNMS(T5r, T5q, T5p * T5s);
3144
                         }
3145
                         T5u = T5o + T5t;
3146
                         TdW = T8Q + T8R;
3147
                         T8S = T8Q - T8R;
3148
                         T8V = T5o - T5t;
3149
                    }
3150
                    {
3151
                         E T5W, T97, T61, T98;
3152
                         {
3153
                              E T5T, T5V, T5S, T5U;
3154
                              T5T = ri[WS(rs, 11)];
3155
                              T5V = ii[WS(rs, 11)];
3156
                              T5S = W[20];
3157
                              T5U = W[21];
3158
                              T5W = FMA(T5S, T5T, T5U * T5V);
3159
                              T97 = FNMS(T5U, T5T, T5S * T5V);
3160
                         }
3161
                         {
3162
                              E T5Y, T60, T5X, T5Z;
3163
                              T5Y = ri[WS(rs, 43)];
3164
                              T60 = ii[WS(rs, 43)];
3165
                              T5X = W[84];
3166
                              T5Z = W[85];
3167
                              T61 = FMA(T5X, T5Y, T5Z * T60);
3168
                              T98 = FNMS(T5Z, T5Y, T5X * T60);
3169
                         }
3170
                         T62 = T5W + T61;
3171
                         Te3 = T97 + T98;
3172
                         T94 = T5W - T61;
3173
                         T99 = T97 - T98;
3174
                    }
3175
                    {
3176
                         E T5z, T8W, T5E, T8X;
3177
                         {
3178
                              E T5w, T5y, T5v, T5x;
3179
                              T5w = ri[WS(rs, 19)];
3180
                              T5y = ii[WS(rs, 19)];
3181
                              T5v = W[36];
3182
                              T5x = W[37];
3183
                              T5z = FMA(T5v, T5w, T5x * T5y);
3184
                              T8W = FNMS(T5x, T5w, T5v * T5y);
3185
                         }
3186
                         {
3187
                              E T5B, T5D, T5A, T5C;
3188
                              T5B = ri[WS(rs, 51)];
3189
                              T5D = ii[WS(rs, 51)];
3190
                              T5A = W[100];
3191
                              T5C = W[101];
3192
                              T5E = FMA(T5A, T5B, T5C * T5D);
3193
                              T8X = FNMS(T5C, T5B, T5A * T5D);
3194
                         }
3195
                         T5F = T5z + T5E;
3196
                         TdX = T8W + T8X;
3197
                         T8T = T5z - T5E;
3198
                         T8Y = T8W - T8X;
3199
                    }
3200
                    {
3201
                         E T5L, T91, T5Q, T92;
3202
                         {
3203
                              E T5I, T5K, T5H, T5J;
3204
                              T5I = ri[WS(rs, 59)];
3205
                              T5K = ii[WS(rs, 59)];
3206
                              T5H = W[116];
3207
                              T5J = W[117];
3208
                              T5L = FMA(T5H, T5I, T5J * T5K);
3209
                              T91 = FNMS(T5J, T5I, T5H * T5K);
3210
                         }
3211
                         {
3212
                              E T5N, T5P, T5M, T5O;
3213
                              T5N = ri[WS(rs, 27)];
3214
                              T5P = ii[WS(rs, 27)];
3215
                              T5M = W[52];
3216
                              T5O = W[53];
3217
                              T5Q = FMA(T5M, T5N, T5O * T5P);
3218
                              T92 = FNMS(T5O, T5N, T5M * T5P);
3219
                         }
3220
                         T5R = T5L + T5Q;
3221
                         Te2 = T91 + T92;
3222
                         T93 = T91 - T92;
3223
                         T96 = T5L - T5Q;
3224
                    }
3225
                    {
3226
                         E T5G, T63, Te1, Te4;
3227
                         T5G = T5u + T5F;
3228
                         T63 = T5R + T62;
3229
                         T64 = T5G + T63;
3230
                         TfZ = T63 - T5G;
3231
                         Te1 = T5R - T62;
3232
                         Te4 = Te2 - Te3;
3233
                         Te5 = Te1 + Te4;
3234
                         Ted = Te1 - Te4;
3235
                    }
3236
                    {
3237
                         E TfS, TfT, T8U, T8Z;
3238
                         TfS = TdW + TdX;
3239
                         TfT = Te2 + Te3;
3240
                         TfU = TfS - TfT;
3241
                         Tgz = TfS + TfT;
3242
                         T8U = T8S + T8T;
3243
                         T8Z = T8V - T8Y;
3244
                         T90 = FNMS(KP923879532, T8Z, KP382683432 * T8U);
3245
                         T9o = FMA(KP923879532, T8U, KP382683432 * T8Z);
3246
                    }
3247
                    {
3248
                         E T95, T9a, Tbr, Tbs;
3249
                         T95 = T93 + T94;
3250
                         T9a = T96 - T99;
3251
                         T9b = FMA(KP382683432, T95, KP923879532 * T9a);
3252
                         T9n = FNMS(KP923879532, T95, KP382683432 * T9a);
3253
                         Tbr = T93 - T94;
3254
                         Tbs = T96 + T99;
3255
                         Tbt = FMA(KP923879532, Tbr, KP382683432 * Tbs);
3256
                         Tbz = FNMS(KP382683432, Tbr, KP923879532 * Tbs);
3257
                    }
3258
                    {
3259
                         E TdY, TdZ, Tbo, Tbp;
3260
                         TdY = TdW - TdX;
3261
                         TdZ = T5u - T5F;
3262
                         Te0 = TdY - TdZ;
3263
                         Tee = TdZ + TdY;
3264
                         Tbo = T8S - T8T;
3265
                         Tbp = T8V + T8Y;
3266
                         Tbq = FNMS(KP382683432, Tbp, KP923879532 * Tbo);
3267
                         TbA = FMA(KP382683432, Tbo, KP923879532 * Tbp);
3268
                    }
3269
               }
3270
               {
3271
                    E T1t, Tgn, TgK, TgL, TgV, Th1, T30, Th0, T66, TgX, Tgw, TgE, TgB, TgF, Tgq;
3272
                    E TgM;
3273
                    {
3274
                         E TH, T1s, TgI, TgJ;
3275
                         TH = Tj + TG;
3276
                         T1s = T14 + T1r;
3277
                         T1t = TH + T1s;
3278
                         Tgn = TH - T1s;
3279
                         TgI = Tgt + Tgu;
3280
                         TgJ = Tgy + Tgz;
3281
                         TgK = TgI - TgJ;
3282
                         TgL = TgI + TgJ;
3283
                    }
3284
                    {
3285
                         E TgN, TgU, T2e, T2Z;
3286
                         TgN = Tfq + Tfr;
3287
                         TgU = TgO + TgT;
3288
                         TgV = TgN + TgU;
3289
                         Th1 = TgU - TgN;
3290
                         T2e = T1Q + T2d;
3291
                         T2Z = T2B + T2Y;
3292
                         T30 = T2e + T2Z;
3293
                         Th0 = T2Z - T2e;
3294
                    }
3295
                    {
3296
                         E T4y, T65, Tgs, Tgv;
3297
                         T4y = T3M + T4x;
3298
                         T65 = T5j + T64;
3299
                         T66 = T4y + T65;
3300
                         TgX = T65 - T4y;
3301
                         Tgs = T3M - T4x;
3302
                         Tgv = Tgt - Tgu;
3303
                         Tgw = Tgs + Tgv;
3304
                         TgE = Tgv - Tgs;
3305
                    }
3306
                    {
3307
                         E Tgx, TgA, Tgo, Tgp;
3308
                         Tgx = T5j - T64;
3309
                         TgA = Tgy - Tgz;
3310
                         TgB = Tgx - TgA;
3311
                         TgF = Tgx + TgA;
3312
                         Tgo = Tfu + Tfv;
3313
                         Tgp = TfA + TfB;
3314
                         Tgq = Tgo - Tgp;
3315
                         TgM = Tgo + Tgp;
3316
                    }
3317
                    {
3318
                         E T31, TgW, TgH, TgY;
3319
                         T31 = T1t + T30;
3320
                         ri[WS(rs, 32)] = T31 - T66;
3321
                         ri[0] = T31 + T66;
3322
                         TgW = TgM + TgV;
3323
                         ii[0] = TgL + TgW;
3324
                         ii[WS(rs, 32)] = TgW - TgL;
3325
                         TgH = T1t - T30;
3326
                         ri[WS(rs, 48)] = TgH - TgK;
3327
                         ri[WS(rs, 16)] = TgH + TgK;
3328
                         TgY = TgV - TgM;
3329
                         ii[WS(rs, 16)] = TgX + TgY;
3330
                         ii[WS(rs, 48)] = TgY - TgX;
3331
                    }
3332
                    {
3333
                         E Tgr, TgC, TgZ, Th2;
3334
                         Tgr = Tgn + Tgq;
3335
                         TgC = KP707106781 * (Tgw + TgB);
3336
                         ri[WS(rs, 40)] = Tgr - TgC;
3337
                         ri[WS(rs, 8)] = Tgr + TgC;
3338
                         TgZ = KP707106781 * (TgE + TgF);
3339
                         Th2 = Th0 + Th1;
3340
                         ii[WS(rs, 8)] = TgZ + Th2;
3341
                         ii[WS(rs, 40)] = Th2 - TgZ;
3342
                    }
3343
                    {
3344
                         E TgD, TgG, Th3, Th4;
3345
                         TgD = Tgn - Tgq;
3346
                         TgG = KP707106781 * (TgE - TgF);
3347
                         ri[WS(rs, 56)] = TgD - TgG;
3348
                         ri[WS(rs, 24)] = TgD + TgG;
3349
                         Th3 = KP707106781 * (TgB - Tgw);
3350
                         Th4 = Th1 - Th0;
3351
                         ii[WS(rs, 24)] = Th3 + Th4;
3352
                         ii[WS(rs, 56)] = Th4 - Th3;
3353
                    }
3354
               }
3355
               {
3356
                    E Tft, Tg7, Tgh, Tgl, Th9, Thf, TfE, Th6, TfQ, Tg4, Tga, The, Tge, Tgk, Tg1;
3357
                    E Tg5;
3358
                    {
3359
                         E Tfp, Tfs, Tgf, Tgg;
3360
                         Tfp = Tj - TG;
3361
                         Tfs = Tfq - Tfr;
3362
                         Tft = Tfp - Tfs;
3363
                         Tg7 = Tfp + Tfs;
3364
                         Tgf = TfR + TfU;
3365
                         Tgg = TfY + TfZ;
3366
                         Tgh = FNMS(KP382683432, Tgg, KP923879532 * Tgf);
3367
                         Tgl = FMA(KP923879532, Tgg, KP382683432 * Tgf);
3368
                    }
3369
                    {
3370
                         E Th7, Th8, Tfy, TfD;
3371
                         Th7 = T1r - T14;
3372
                         Th8 = TgT - TgO;
3373
                         Th9 = Th7 + Th8;
3374
                         Thf = Th8 - Th7;
3375
                         Tfy = Tfw - Tfx;
3376
                         TfD = Tfz + TfC;
3377
                         TfE = KP707106781 * (Tfy - TfD);
3378
                         Th6 = KP707106781 * (Tfy + TfD);
3379
                    }
3380
                    {
3381
                         E TfK, TfP, Tg8, Tg9;
3382
                         TfK = TfI - TfJ;
3383
                         TfP = TfL - TfO;
3384
                         TfQ = FMA(KP923879532, TfK, KP382683432 * TfP);
3385
                         Tg4 = FNMS(KP923879532, TfP, KP382683432 * TfK);
3386
                         Tg8 = Tfx + Tfw;
3387
                         Tg9 = Tfz - TfC;
3388
                         Tga = KP707106781 * (Tg8 + Tg9);
3389
                         The = KP707106781 * (Tg9 - Tg8);
3390
                    }
3391
                    {
3392
                         E Tgc, Tgd, TfV, Tg0;
3393
                         Tgc = TfI + TfJ;
3394
                         Tgd = TfL + TfO;
3395
                         Tge = FMA(KP382683432, Tgc, KP923879532 * Tgd);
3396
                         Tgk = FNMS(KP382683432, Tgd, KP923879532 * Tgc);
3397
                         TfV = TfR - TfU;
3398
                         Tg0 = TfY - TfZ;
3399
                         Tg1 = FNMS(KP923879532, Tg0, KP382683432 * TfV);
3400
                         Tg5 = FMA(KP382683432, Tg0, KP923879532 * TfV);
3401
                    }
3402
                    {
3403
                         E TfF, Tg2, Thd, Thg;
3404
                         TfF = Tft + TfE;
3405
                         Tg2 = TfQ + Tg1;
3406
                         ri[WS(rs, 44)] = TfF - Tg2;
3407
                         ri[WS(rs, 12)] = TfF + Tg2;
3408
                         Thd = Tg4 + Tg5;
3409
                         Thg = The + Thf;
3410
                         ii[WS(rs, 12)] = Thd + Thg;
3411
                         ii[WS(rs, 44)] = Thg - Thd;
3412
                    }
3413
                    {
3414
                         E Tg3, Tg6, Thh, Thi;
3415
                         Tg3 = Tft - TfE;
3416
                         Tg6 = Tg4 - Tg5;
3417
                         ri[WS(rs, 60)] = Tg3 - Tg6;
3418
                         ri[WS(rs, 28)] = Tg3 + Tg6;
3419
                         Thh = Tg1 - TfQ;
3420
                         Thi = Thf - The;
3421
                         ii[WS(rs, 28)] = Thh + Thi;
3422
                         ii[WS(rs, 60)] = Thi - Thh;
3423
                    }
3424
                    {
3425
                         E Tgb, Tgi, Th5, Tha;
3426
                         Tgb = Tg7 + Tga;
3427
                         Tgi = Tge + Tgh;
3428
                         ri[WS(rs, 36)] = Tgb - Tgi;
3429
                         ri[WS(rs, 4)] = Tgb + Tgi;
3430
                         Th5 = Tgk + Tgl;
3431
                         Tha = Th6 + Th9;
3432
                         ii[WS(rs, 4)] = Th5 + Tha;
3433
                         ii[WS(rs, 36)] = Tha - Th5;
3434
                    }
3435
                    {
3436
                         E Tgj, Tgm, Thb, Thc;
3437
                         Tgj = Tg7 - Tga;
3438
                         Tgm = Tgk - Tgl;
3439
                         ri[WS(rs, 52)] = Tgj - Tgm;
3440
                         ri[WS(rs, 20)] = Tgj + Tgm;
3441
                         Thb = Tgh - Tge;
3442
                         Thc = Th9 - Th6;
3443
                         ii[WS(rs, 20)] = Thb + Thc;
3444
                         ii[WS(rs, 52)] = Thc - Thb;
3445
                    }
3446
               }
3447
               {
3448
                    E Td1, Ten, Tdo, ThA, ThD, ThJ, Teq, ThI, Teh, TeB, Tel, Tex, TdQ, TeA, Tek;
3449
                    E Teu;
3450
                    {
3451
                         E TcP, Td0, Teo, Tep;
3452
                         TcP = TcL - TcO;
3453
                         Td0 = KP707106781 * (TcU - TcZ);
3454
                         Td1 = TcP - Td0;
3455
                         Ten = TcP + Td0;
3456
                         {
3457
                              E Tdc, Tdn, ThB, ThC;
3458
                              Tdc = FNMS(KP923879532, Tdb, KP382683432 * Td6);
3459
                              Tdn = FMA(KP382683432, Tdh, KP923879532 * Tdm);
3460
                              Tdo = Tdc - Tdn;
3461
                              ThA = Tdc + Tdn;
3462
                              ThB = KP707106781 * (TeF - TeE);
3463
                              ThC = Thn - Thm;
3464
                              ThD = ThB + ThC;
3465
                              ThJ = ThC - ThB;
3466
                         }
3467
                         Teo = FMA(KP923879532, Td6, KP382683432 * Tdb);
3468
                         Tep = FNMS(KP923879532, Tdh, KP382683432 * Tdm);
3469
                         Teq = Teo + Tep;
3470
                         ThI = Tep - Teo;
3471
                         {
3472
                              E Te7, Tev, Teg, Tew, Te6, Tef;
3473
                              Te6 = KP707106781 * (Te0 - Te5);
3474
                              Te7 = TdV - Te6;
3475
                              Tev = TdV + Te6;
3476
                              Tef = KP707106781 * (Ted - Tee);
3477
                              Teg = Tec - Tef;
3478
                              Tew = Tec + Tef;
3479
                              Teh = FNMS(KP980785280, Teg, KP195090322 * Te7);
3480
                              TeB = FMA(KP831469612, Tew, KP555570233 * Tev);
3481
                              Tel = FMA(KP195090322, Teg, KP980785280 * Te7);
3482
                              Tex = FNMS(KP555570233, Tew, KP831469612 * Tev);
3483
                         }
3484
                         {
3485
                              E TdG, Tes, TdP, Tet, TdF, TdO;
3486
                              TdF = KP707106781 * (Tdz - TdE);
3487
                              TdG = Tdu - TdF;
3488
                              Tes = Tdu + TdF;
3489
                              TdO = KP707106781 * (TdM - TdN);
3490
                              TdP = TdL - TdO;
3491
                              Tet = TdL + TdO;
3492
                              TdQ = FMA(KP980785280, TdG, KP195090322 * TdP);
3493
                              TeA = FNMS(KP555570233, Tet, KP831469612 * Tes);
3494
                              Tek = FNMS(KP980785280, TdP, KP195090322 * TdG);
3495
                              Teu = FMA(KP555570233, Tes, KP831469612 * Tet);
3496
                         }
3497
                    }
3498
                    {
3499
                         E Tdp, Tei, ThH, ThK;
3500
                         Tdp = Td1 + Tdo;
3501
                         Tei = TdQ + Teh;
3502
                         ri[WS(rs, 46)] = Tdp - Tei;
3503
                         ri[WS(rs, 14)] = Tdp + Tei;
3504
                         ThH = Tek + Tel;
3505
                         ThK = ThI + ThJ;
3506
                         ii[WS(rs, 14)] = ThH + ThK;
3507
                         ii[WS(rs, 46)] = ThK - ThH;
3508
                    }
3509
                    {
3510
                         E Tej, Tem, ThL, ThM;
3511
                         Tej = Td1 - Tdo;
3512
                         Tem = Tek - Tel;
3513
                         ri[WS(rs, 62)] = Tej - Tem;
3514
                         ri[WS(rs, 30)] = Tej + Tem;
3515
                         ThL = Teh - TdQ;
3516
                         ThM = ThJ - ThI;
3517
                         ii[WS(rs, 30)] = ThL + ThM;
3518
                         ii[WS(rs, 62)] = ThM - ThL;
3519
                    }
3520
                    {
3521
                         E Ter, Tey, Thz, ThE;
3522
                         Ter = Ten + Teq;
3523
                         Tey = Teu + Tex;
3524
                         ri[WS(rs, 38)] = Ter - Tey;
3525
                         ri[WS(rs, 6)] = Ter + Tey;
3526
                         Thz = TeA + TeB;
3527
                         ThE = ThA + ThD;
3528
                         ii[WS(rs, 6)] = Thz + ThE;
3529
                         ii[WS(rs, 38)] = ThE - Thz;
3530
                    }
3531
                    {
3532
                         E Tez, TeC, ThF, ThG;
3533
                         Tez = Ten - Teq;
3534
                         TeC = TeA - TeB;
3535
                         ri[WS(rs, 54)] = Tez - TeC;
3536
                         ri[WS(rs, 22)] = Tez + TeC;
3537
                         ThF = Tex - Teu;
3538
                         ThG = ThD - ThA;
3539
                         ii[WS(rs, 22)] = ThF + ThG;
3540
                         ii[WS(rs, 54)] = ThG - ThF;
3541
                    }
3542
               }
3543
               {
3544
                    E TeH, Tf9, TeO, Thk, Thp, Thv, Tfc, Thu, Tf3, Tfn, Tf7, Tfj, TeW, Tfm, Tf6;
3545
                    E Tfg;
3546
                    {
3547
                         E TeD, TeG, Tfa, Tfb;
3548
                         TeD = TcL + TcO;
3549
                         TeG = KP707106781 * (TeE + TeF);
3550
                         TeH = TeD - TeG;
3551
                         Tf9 = TeD + TeG;
3552
                         {
3553
                              E TeK, TeN, Thl, Tho;
3554
                              TeK = FNMS(KP382683432, TeJ, KP923879532 * TeI);
3555
                              TeN = FMA(KP923879532, TeL, KP382683432 * TeM);
3556
                              TeO = TeK - TeN;
3557
                              Thk = TeK + TeN;
3558
                              Thl = KP707106781 * (TcU + TcZ);
3559
                              Tho = Thm + Thn;
3560
                              Thp = Thl + Tho;
3561
                              Thv = Tho - Thl;
3562
                         }
3563
                         Tfa = FMA(KP382683432, TeI, KP923879532 * TeJ);
3564
                         Tfb = FNMS(KP382683432, TeL, KP923879532 * TeM);
3565
                         Tfc = Tfa + Tfb;
3566
                         Thu = Tfb - Tfa;
3567
                         {
3568
                              E TeZ, Tfh, Tf2, Tfi, TeY, Tf1;
3569
                              TeY = KP707106781 * (Tee + Ted);
3570
                              TeZ = TeX - TeY;
3571
                              Tfh = TeX + TeY;
3572
                              Tf1 = KP707106781 * (Te0 + Te5);
3573
                              Tf2 = Tf0 - Tf1;
3574
                              Tfi = Tf0 + Tf1;
3575
                              Tf3 = FNMS(KP831469612, Tf2, KP555570233 * TeZ);
3576
                              Tfn = FMA(KP195090322, Tfh, KP980785280 * Tfi);
3577
                              Tf7 = FMA(KP831469612, TeZ, KP555570233 * Tf2);
3578
                              Tfj = FNMS(KP195090322, Tfi, KP980785280 * Tfh);
3579
                         }
3580
                         {
3581
                              E TeS, Tfe, TeV, Tff, TeR, TeU;
3582
                              TeR = KP707106781 * (TdE + Tdz);
3583
                              TeS = TeQ - TeR;
3584
                              Tfe = TeQ + TeR;
3585
                              TeU = KP707106781 * (TdM + TdN);
3586
                              TeV = TeT - TeU;
3587
                              Tff = TeT + TeU;
3588
                              TeW = FMA(KP555570233, TeS, KP831469612 * TeV);
3589
                              Tfm = FNMS(KP195090322, Tfe, KP980785280 * Tff);
3590
                              Tf6 = FNMS(KP831469612, TeS, KP555570233 * TeV);
3591
                              Tfg = FMA(KP980785280, Tfe, KP195090322 * Tff);
3592
                         }
3593
                    }
3594
                    {
3595
                         E TeP, Tf4, Tht, Thw;
3596
                         TeP = TeH + TeO;
3597
                         Tf4 = TeW + Tf3;
3598
                         ri[WS(rs, 42)] = TeP - Tf4;
3599
                         ri[WS(rs, 10)] = TeP + Tf4;
3600
                         Tht = Tf6 + Tf7;
3601
                         Thw = Thu + Thv;
3602
                         ii[WS(rs, 10)] = Tht + Thw;
3603
                         ii[WS(rs, 42)] = Thw - Tht;
3604
                    }
3605
                    {
3606
                         E Tf5, Tf8, Thx, Thy;
3607
                         Tf5 = TeH - TeO;
3608
                         Tf8 = Tf6 - Tf7;
3609
                         ri[WS(rs, 58)] = Tf5 - Tf8;
3610
                         ri[WS(rs, 26)] = Tf5 + Tf8;
3611
                         Thx = Tf3 - TeW;
3612
                         Thy = Thv - Thu;
3613
                         ii[WS(rs, 26)] = Thx + Thy;
3614
                         ii[WS(rs, 58)] = Thy - Thx;
3615
                    }
3616
                    {
3617
                         E Tfd, Tfk, Thj, Thq;
3618
                         Tfd = Tf9 + Tfc;
3619
                         Tfk = Tfg + Tfj;
3620
                         ri[WS(rs, 34)] = Tfd - Tfk;
3621
                         ri[WS(rs, 2)] = Tfd + Tfk;
3622
                         Thj = Tfm + Tfn;
3623
                         Thq = Thk + Thp;
3624
                         ii[WS(rs, 2)] = Thj + Thq;
3625
                         ii[WS(rs, 34)] = Thq - Thj;
3626
                    }
3627
                    {
3628
                         E Tfl, Tfo, Thr, Ths;
3629
                         Tfl = Tf9 - Tfc;
3630
                         Tfo = Tfm - Tfn;
3631
                         ri[WS(rs, 50)] = Tfl - Tfo;
3632
                         ri[WS(rs, 18)] = Tfl + Tfo;
3633
                         Thr = Tfj - Tfg;
3634
                         Ths = Thp - Thk;
3635
                         ii[WS(rs, 18)] = Thr + Ths;
3636
                         ii[WS(rs, 50)] = Ths - Thr;
3637
                    }
3638
               }
3639
               {
3640
                    E T6L, T9x, TiD, TiJ, T7E, TiI, T9A, TiA, T8y, T9K, T9u, T9E, T9r, T9L, T9v;
3641
                    E T9H;
3642
                    {
3643
                         E T6n, T6K, TiB, TiC;
3644
                         T6n = T6b - T6m;
3645
                         T6K = T6y - T6J;
3646
                         T6L = T6n - T6K;
3647
                         T9x = T6n + T6K;
3648
                         TiB = T9P - T9O;
3649
                         TiC = Tin - Tim;
3650
                         TiD = TiB + TiC;
3651
                         TiJ = TiC - TiB;
3652
                    }
3653
                    {
3654
                         E T7c, T9y, T7D, T9z;
3655
                         {
3656
                              E T72, T7b, T7t, T7C;
3657
                              T72 = T6Q - T71;
3658
                              T7b = T77 - T7a;
3659
                              T7c = FNMS(KP980785280, T7b, KP195090322 * T72);
3660
                              T9y = FMA(KP980785280, T72, KP195090322 * T7b);
3661
                              T7t = T7h - T7s;
3662
                              T7C = T7y - T7B;
3663
                              T7D = FMA(KP195090322, T7t, KP980785280 * T7C);
3664
                              T9z = FNMS(KP980785280, T7t, KP195090322 * T7C);
3665
                         }
3666
                         T7E = T7c - T7D;
3667
                         TiI = T9z - T9y;
3668
                         T9A = T9y + T9z;
3669
                         TiA = T7c + T7D;
3670
                    }
3671
                    {
3672
                         E T8k, T9C, T8x, T9D;
3673
                         {
3674
                              E T7W, T8j, T8t, T8w;
3675
                              T7W = T7K - T7V;
3676
                              T8j = T87 - T8i;
3677
                              T8k = T7W - T8j;
3678
                              T9C = T7W + T8j;
3679
                              T8t = T8p - T8s;
3680
                              T8w = T8u - T8v;
3681
                              T8x = T8t - T8w;
3682
                              T9D = T8t + T8w;
3683
                         }
3684
                         T8y = FMA(KP995184726, T8k, KP098017140 * T8x);
3685
                         T9K = FNMS(KP634393284, T9D, KP773010453 * T9C);
3686
                         T9u = FNMS(KP995184726, T8x, KP098017140 * T8k);
3687
                         T9E = FMA(KP634393284, T9C, KP773010453 * T9D);
3688
                    }
3689
                    {
3690
                         E T9d, T9F, T9q, T9G;
3691
                         {
3692
                              E T8P, T9c, T9m, T9p;
3693
                              T8P = T8D - T8O;
3694
                              T9c = T90 - T9b;
3695
                              T9d = T8P - T9c;
3696
                              T9F = T8P + T9c;
3697
                              T9m = T9i - T9l;
3698
                              T9p = T9n - T9o;
3699
                              T9q = T9m - T9p;
3700
                              T9G = T9m + T9p;
3701
                         }
3702
                         T9r = FNMS(KP995184726, T9q, KP098017140 * T9d);
3703
                         T9L = FMA(KP773010453, T9G, KP634393284 * T9F);
3704
                         T9v = FMA(KP098017140, T9q, KP995184726 * T9d);
3705
                         T9H = FNMS(KP634393284, T9G, KP773010453 * T9F);
3706
                    }
3707
                    {
3708
                         E T7F, T9s, TiH, TiK;
3709
                         T7F = T6L + T7E;
3710
                         T9s = T8y + T9r;
3711
                         ri[WS(rs, 47)] = T7F - T9s;
3712
                         ri[WS(rs, 15)] = T7F + T9s;
3713
                         TiH = T9u + T9v;
3714
                         TiK = TiI + TiJ;
3715
                         ii[WS(rs, 15)] = TiH + TiK;
3716
                         ii[WS(rs, 47)] = TiK - TiH;
3717
                    }
3718
                    {
3719
                         E T9t, T9w, TiL, TiM;
3720
                         T9t = T6L - T7E;
3721
                         T9w = T9u - T9v;
3722
                         ri[WS(rs, 63)] = T9t - T9w;
3723
                         ri[WS(rs, 31)] = T9t + T9w;
3724
                         TiL = T9r - T8y;
3725
                         TiM = TiJ - TiI;
3726
                         ii[WS(rs, 31)] = TiL + TiM;
3727
                         ii[WS(rs, 63)] = TiM - TiL;
3728
                    }
3729
                    {
3730
                         E T9B, T9I, Tiz, TiE;
3731
                         T9B = T9x + T9A;
3732
                         T9I = T9E + T9H;
3733
                         ri[WS(rs, 39)] = T9B - T9I;
3734
                         ri[WS(rs, 7)] = T9B + T9I;
3735
                         Tiz = T9K + T9L;
3736
                         TiE = TiA + TiD;
3737
                         ii[WS(rs, 7)] = Tiz + TiE;
3738
                         ii[WS(rs, 39)] = TiE - Tiz;
3739
                    }
3740
                    {
3741
                         E T9J, T9M, TiF, TiG;
3742
                         T9J = T9x - T9A;
3743
                         T9M = T9K - T9L;
3744
                         ri[WS(rs, 55)] = T9J - T9M;
3745
                         ri[WS(rs, 23)] = T9J + T9M;
3746
                         TiF = T9H - T9E;
3747
                         TiG = TiD - TiA;
3748
                         ii[WS(rs, 23)] = TiF + TiG;
3749
                         ii[WS(rs, 55)] = TiG - TiF;
3750
                    }
3751
               }
3752
               {
3753
                    E TaL, TbJ, Ti9, Tif, Tb0, Tie, TbM, Ti6, Tbk, TbW, TbG, TbQ, TbD, TbX, TbH;
3754
                    E TbT;
3755
                    {
3756
                         E TaD, TaK, Ti7, Ti8;
3757
                         TaD = Taz - TaC;
3758
                         TaK = TaG - TaJ;
3759
                         TaL = TaD - TaK;
3760
                         TbJ = TaD + TaK;
3761
                         Ti7 = Tc1 - Tc0;
3762
                         Ti8 = ThT - ThQ;
3763
                         Ti9 = Ti7 + Ti8;
3764
                         Tif = Ti8 - Ti7;
3765
                    }
3766
                    {
3767
                         E TaS, TbK, TaZ, TbL;
3768
                         {
3769
                              E TaO, TaR, TaV, TaY;
3770
                              TaO = TaM - TaN;
3771
                              TaR = TaP - TaQ;
3772
                              TaS = FNMS(KP831469612, TaR, KP555570233 * TaO);
3773
                              TbK = FMA(KP555570233, TaR, KP831469612 * TaO);
3774
                              TaV = TaT - TaU;
3775
                              TaY = TaW - TaX;
3776
                              TaZ = FMA(KP831469612, TaV, KP555570233 * TaY);
3777
                              TbL = FNMS(KP831469612, TaY, KP555570233 * TaV);
3778
                         }
3779
                         Tb0 = TaS - TaZ;
3780
                         Tie = TbL - TbK;
3781
                         TbM = TbK + TbL;
3782
                         Ti6 = TaS + TaZ;
3783
                    }
3784
                    {
3785
                         E Tbc, TbO, Tbj, TbP;
3786
                         {
3787
                              E Tb4, Tbb, Tbf, Tbi;
3788
                              Tb4 = Tb2 - Tb3;
3789
                              Tbb = Tb7 - Tba;
3790
                              Tbc = Tb4 - Tbb;
3791
                              TbO = Tb4 + Tbb;
3792
                              Tbf = Tbd - Tbe;
3793
                              Tbi = Tbg - Tbh;
3794
                              Tbj = Tbf - Tbi;
3795
                              TbP = Tbf + Tbi;
3796
                         }
3797
                         Tbk = FMA(KP956940335, Tbc, KP290284677 * Tbj);
3798
                         TbW = FNMS(KP471396736, TbP, KP881921264 * TbO);
3799
                         TbG = FNMS(KP956940335, Tbj, KP290284677 * Tbc);
3800
                         TbQ = FMA(KP471396736, TbO, KP881921264 * TbP);
3801
                    }
3802
                    {
3803
                         E Tbv, TbR, TbC, TbS;
3804
                         {
3805
                              E Tbn, Tbu, Tby, TbB;
3806
                              Tbn = Tbl - Tbm;
3807
                              Tbu = Tbq - Tbt;
3808
                              Tbv = Tbn - Tbu;
3809
                              TbR = Tbn + Tbu;
3810
                              Tby = Tbw - Tbx;
3811
                              TbB = Tbz - TbA;
3812
                              TbC = Tby - TbB;
3813
                              TbS = Tby + TbB;
3814
                         }
3815
                         TbD = FNMS(KP956940335, TbC, KP290284677 * Tbv);
3816
                         TbX = FMA(KP881921264, TbS, KP471396736 * TbR);
3817
                         TbH = FMA(KP290284677, TbC, KP956940335 * Tbv);
3818
                         TbT = FNMS(KP471396736, TbS, KP881921264 * TbR);
3819
                    }
3820
                    {
3821
                         E Tb1, TbE, Tid, Tig;
3822
                         Tb1 = TaL + Tb0;
3823
                         TbE = Tbk + TbD;
3824
                         ri[WS(rs, 45)] = Tb1 - TbE;
3825
                         ri[WS(rs, 13)] = Tb1 + TbE;
3826
                         Tid = TbG + TbH;
3827
                         Tig = Tie + Tif;
3828
                         ii[WS(rs, 13)] = Tid + Tig;
3829
                         ii[WS(rs, 45)] = Tig - Tid;
3830
                    }
3831
                    {
3832
                         E TbF, TbI, Tih, Tii;
3833
                         TbF = TaL - Tb0;
3834
                         TbI = TbG - TbH;
3835
                         ri[WS(rs, 61)] = TbF - TbI;
3836
                         ri[WS(rs, 29)] = TbF + TbI;
3837
                         Tih = TbD - Tbk;
3838
                         Tii = Tif - Tie;
3839
                         ii[WS(rs, 29)] = Tih + Tii;
3840
                         ii[WS(rs, 61)] = Tii - Tih;
3841
                    }
3842
                    {
3843
                         E TbN, TbU, Ti5, Tia;
3844
                         TbN = TbJ + TbM;
3845
                         TbU = TbQ + TbT;
3846
                         ri[WS(rs, 37)] = TbN - TbU;
3847
                         ri[WS(rs, 5)] = TbN + TbU;
3848
                         Ti5 = TbW + TbX;
3849
                         Tia = Ti6 + Ti9;
3850
                         ii[WS(rs, 5)] = Ti5 + Tia;
3851
                         ii[WS(rs, 37)] = Tia - Ti5;
3852
                    }
3853
                    {
3854
                         E TbV, TbY, Tib, Tic;
3855
                         TbV = TbJ - TbM;
3856
                         TbY = TbW - TbX;
3857
                         ri[WS(rs, 53)] = TbV - TbY;
3858
                         ri[WS(rs, 21)] = TbV + TbY;
3859
                         Tib = TbT - TbQ;
3860
                         Tic = Ti9 - Ti6;
3861
                         ii[WS(rs, 21)] = Tib + Tic;
3862
                         ii[WS(rs, 53)] = Tic - Tib;
3863
                    }
3864
               }
3865
               {
3866
                    E Tc3, Tcv, ThV, Ti1, Tca, Ti0, Tcy, ThO, Tci, TcI, Tcs, TcC, Tcp, TcJ, Tct;
3867
                    E TcF;
3868
                    {
3869
                         E TbZ, Tc2, ThP, ThU;
3870
                         TbZ = Taz + TaC;
3871
                         Tc2 = Tc0 + Tc1;
3872
                         Tc3 = TbZ - Tc2;
3873
                         Tcv = TbZ + Tc2;
3874
                         ThP = TaG + TaJ;
3875
                         ThU = ThQ + ThT;
3876
                         ThV = ThP + ThU;
3877
                         Ti1 = ThU - ThP;
3878
                    }
3879
                    {
3880
                         E Tc6, Tcw, Tc9, Tcx;
3881
                         {
3882
                              E Tc4, Tc5, Tc7, Tc8;
3883
                              Tc4 = TaM + TaN;
3884
                              Tc5 = TaP + TaQ;
3885
                              Tc6 = FNMS(KP195090322, Tc5, KP980785280 * Tc4);
3886
                              Tcw = FMA(KP980785280, Tc5, KP195090322 * Tc4);
3887
                              Tc7 = TaT + TaU;
3888
                              Tc8 = TaW + TaX;
3889
                              Tc9 = FMA(KP195090322, Tc7, KP980785280 * Tc8);
3890
                              Tcx = FNMS(KP195090322, Tc8, KP980785280 * Tc7);
3891
                         }
3892
                         Tca = Tc6 - Tc9;
3893
                         Ti0 = Tcx - Tcw;
3894
                         Tcy = Tcw + Tcx;
3895
                         ThO = Tc6 + Tc9;
3896
                    }
3897
                    {
3898
                         E Tce, TcA, Tch, TcB;
3899
                         {
3900
                              E Tcc, Tcd, Tcf, Tcg;
3901
                              Tcc = Tbd + Tbe;
3902
                              Tcd = Tba + Tb7;
3903
                              Tce = Tcc - Tcd;
3904
                              TcA = Tcc + Tcd;
3905
                              Tcf = Tb2 + Tb3;
3906
                              Tcg = Tbg + Tbh;
3907
                              Tch = Tcf - Tcg;
3908
                              TcB = Tcf + Tcg;
3909
                         }
3910
                         Tci = FMA(KP634393284, Tce, KP773010453 * Tch);
3911
                         TcI = FNMS(KP098017140, TcA, KP995184726 * TcB);
3912
                         Tcs = FNMS(KP773010453, Tce, KP634393284 * Tch);
3913
                         TcC = FMA(KP995184726, TcA, KP098017140 * TcB);
3914
                    }
3915
                    {
3916
                         E Tcl, TcD, Tco, TcE;
3917
                         {
3918
                              E Tcj, Tck, Tcm, Tcn;
3919
                              Tcj = Tbl + Tbm;
3920
                              Tck = TbA + Tbz;
3921
                              Tcl = Tcj - Tck;
3922
                              TcD = Tcj + Tck;
3923
                              Tcm = Tbw + Tbx;
3924
                              Tcn = Tbq + Tbt;
3925
                              Tco = Tcm - Tcn;
3926
                              TcE = Tcm + Tcn;
3927
                         }
3928
                         Tcp = FNMS(KP773010453, Tco, KP634393284 * Tcl);
3929
                         TcJ = FMA(KP098017140, TcD, KP995184726 * TcE);
3930
                         Tct = FMA(KP773010453, Tcl, KP634393284 * Tco);
3931
                         TcF = FNMS(KP098017140, TcE, KP995184726 * TcD);
3932
                    }
3933
                    {
3934
                         E Tcb, Tcq, ThZ, Ti2;
3935
                         Tcb = Tc3 + Tca;
3936
                         Tcq = Tci + Tcp;
3937
                         ri[WS(rs, 41)] = Tcb - Tcq;
3938
                         ri[WS(rs, 9)] = Tcb + Tcq;
3939
                         ThZ = Tcs + Tct;
3940
                         Ti2 = Ti0 + Ti1;
3941
                         ii[WS(rs, 9)] = ThZ + Ti2;
3942
                         ii[WS(rs, 41)] = Ti2 - ThZ;
3943
                    }
3944
                    {
3945
                         E Tcr, Tcu, Ti3, Ti4;
3946
                         Tcr = Tc3 - Tca;
3947
                         Tcu = Tcs - Tct;
3948
                         ri[WS(rs, 57)] = Tcr - Tcu;
3949
                         ri[WS(rs, 25)] = Tcr + Tcu;
3950
                         Ti3 = Tcp - Tci;
3951
                         Ti4 = Ti1 - Ti0;
3952
                         ii[WS(rs, 25)] = Ti3 + Ti4;
3953
                         ii[WS(rs, 57)] = Ti4 - Ti3;
3954
                    }
3955
                    {
3956
                         E Tcz, TcG, ThN, ThW;
3957
                         Tcz = Tcv + Tcy;
3958
                         TcG = TcC + TcF;
3959
                         ri[WS(rs, 33)] = Tcz - TcG;
3960
                         ri[WS(rs, 1)] = Tcz + TcG;
3961
                         ThN = TcI + TcJ;
3962
                         ThW = ThO + ThV;
3963
                         ii[WS(rs, 1)] = ThN + ThW;
3964
                         ii[WS(rs, 33)] = ThW - ThN;
3965
                    }
3966
                    {
3967
                         E TcH, TcK, ThX, ThY;
3968
                         TcH = Tcv - Tcy;
3969
                         TcK = TcI - TcJ;
3970
                         ri[WS(rs, 49)] = TcH - TcK;
3971
                         ri[WS(rs, 17)] = TcH + TcK;
3972
                         ThX = TcF - TcC;
3973
                         ThY = ThV - ThO;
3974
                         ii[WS(rs, 17)] = ThX + ThY;
3975
                         ii[WS(rs, 49)] = ThY - ThX;
3976
                    }
3977
               }
3978
               {
3979
                    E T9R, Taj, Tip, Tiv, T9Y, Tiu, Tam, Tik, Ta6, Taw, Tag, Taq, Tad, Tax, Tah;
3980
                    E Tat;
3981
                    {
3982
                         E T9N, T9Q, Til, Tio;
3983
                         T9N = T6b + T6m;
3984
                         T9Q = T9O + T9P;
3985
                         T9R = T9N - T9Q;
3986
                         Taj = T9N + T9Q;
3987
                         Til = T6y + T6J;
3988
                         Tio = Tim + Tin;
3989
                         Tip = Til + Tio;
3990
                         Tiv = Tio - Til;
3991
                    }
3992
                    {
3993
                         E T9U, Tak, T9X, Tal;
3994
                         {
3995
                              E T9S, T9T, T9V, T9W;
3996
                              T9S = T6Q + T71;
3997
                              T9T = T77 + T7a;
3998
                              T9U = FNMS(KP555570233, T9T, KP831469612 * T9S);
3999
                              Tak = FMA(KP555570233, T9S, KP831469612 * T9T);
4000
                              T9V = T7h + T7s;
4001
                              T9W = T7y + T7B;
4002
                              T9X = FMA(KP831469612, T9V, KP555570233 * T9W);
4003
                              Tal = FNMS(KP555570233, T9V, KP831469612 * T9W);
4004
                         }
4005
                         T9Y = T9U - T9X;
4006
                         Tiu = Tal - Tak;
4007
                         Tam = Tak + Tal;
4008
                         Tik = T9U + T9X;
4009
                    }
4010
                    {
4011
                         E Ta2, Tao, Ta5, Tap;
4012
                         {
4013
                              E Ta0, Ta1, Ta3, Ta4;
4014
                              Ta0 = T8p + T8s;
4015
                              Ta1 = T8i + T87;
4016
                              Ta2 = Ta0 - Ta1;
4017
                              Tao = Ta0 + Ta1;
4018
                              Ta3 = T7K + T7V;
4019
                              Ta4 = T8u + T8v;
4020
                              Ta5 = Ta3 - Ta4;
4021
                              Tap = Ta3 + Ta4;
4022
                         }
4023
                         Ta6 = FMA(KP471396736, Ta2, KP881921264 * Ta5);
4024
                         Taw = FNMS(KP290284677, Tao, KP956940335 * Tap);
4025
                         Tag = FNMS(KP881921264, Ta2, KP471396736 * Ta5);
4026
                         Taq = FMA(KP956940335, Tao, KP290284677 * Tap);
4027
                    }
4028
                    {
4029
                         E Ta9, Tar, Tac, Tas;
4030
                         {
4031
                              E Ta7, Ta8, Taa, Tab;
4032
                              Ta7 = T8D + T8O;
4033
                              Ta8 = T9o + T9n;
4034
                              Ta9 = Ta7 - Ta8;
4035
                              Tar = Ta7 + Ta8;
4036
                              Taa = T9i + T9l;
4037
                              Tab = T90 + T9b;
4038
                              Tac = Taa - Tab;
4039
                              Tas = Taa + Tab;
4040
                         }
4041
                         Tad = FNMS(KP881921264, Tac, KP471396736 * Ta9);
4042
                         Tax = FMA(KP290284677, Tar, KP956940335 * Tas);
4043
                         Tah = FMA(KP881921264, Ta9, KP471396736 * Tac);
4044
                         Tat = FNMS(KP290284677, Tas, KP956940335 * Tar);
4045
                    }
4046
                    {
4047
                         E T9Z, Tae, Tit, Tiw;
4048
                         T9Z = T9R + T9Y;
4049
                         Tae = Ta6 + Tad;
4050
                         ri[WS(rs, 43)] = T9Z - Tae;
4051
                         ri[WS(rs, 11)] = T9Z + Tae;
4052
                         Tit = Tag + Tah;
4053
                         Tiw = Tiu + Tiv;
4054
                         ii[WS(rs, 11)] = Tit + Tiw;
4055
                         ii[WS(rs, 43)] = Tiw - Tit;
4056
                    }
4057
                    {
4058
                         E Taf, Tai, Tix, Tiy;
4059
                         Taf = T9R - T9Y;
4060
                         Tai = Tag - Tah;
4061
                         ri[WS(rs, 59)] = Taf - Tai;
4062
                         ri[WS(rs, 27)] = Taf + Tai;
4063
                         Tix = Tad - Ta6;
4064
                         Tiy = Tiv - Tiu;
4065
                         ii[WS(rs, 27)] = Tix + Tiy;
4066
                         ii[WS(rs, 59)] = Tiy - Tix;
4067
                    }
4068
                    {
4069
                         E Tan, Tau, Tij, Tiq;
4070
                         Tan = Taj + Tam;
4071
                         Tau = Taq + Tat;
4072
                         ri[WS(rs, 35)] = Tan - Tau;
4073
                         ri[WS(rs, 3)] = Tan + Tau;
4074
                         Tij = Taw + Tax;
4075
                         Tiq = Tik + Tip;
4076
                         ii[WS(rs, 3)] = Tij + Tiq;
4077
                         ii[WS(rs, 35)] = Tiq - Tij;
4078
                    }
4079
                    {
4080
                         E Tav, Tay, Tir, Tis;
4081
                         Tav = Taj - Tam;
4082
                         Tay = Taw - Tax;
4083
                         ri[WS(rs, 51)] = Tav - Tay;
4084
                         ri[WS(rs, 19)] = Tav + Tay;
4085
                         Tir = Tat - Taq;
4086
                         Tis = Tip - Tik;
4087
                         ii[WS(rs, 19)] = Tir + Tis;
4088
                         ii[WS(rs, 51)] = Tis - Tir;
4089
                    }
4090
               }
4091
          }
4092
     }
4093
}
4094

    
4095
static const tw_instr twinstr[] = {
4096
     {TW_FULL, 0, 64},
4097
     {TW_NEXT, 1, 0}
4098
};
4099

    
4100
static const ct_desc desc = { 64, "t1_64", twinstr, &GENUS, {808, 270, 230, 0}, 0, 0, 0 };
4101

    
4102
void X(codelet_t1_64) (planner *p) {
4103
     X(kdft_dit_register) (p, t1_64, &desc);
4104
}
4105
#endif