To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / t2_64.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (116 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:21 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_twiddle.native -fma -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -name t2_64 -include dft/scalar/t.h */
29

    
30
/*
31
 * This function contains 1154 FP additions, 840 FP multiplications,
32
 * (or, 520 additions, 206 multiplications, 634 fused multiply/add),
33
 * 316 stack variables, 15 constants, and 256 memory accesses
34
 */
35
#include "dft/scalar/t.h"
36

    
37
static void t2_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
38
{
39
     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
40
     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
41
     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
42
     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
43
     DK(KP098491403, +0.098491403357164253077197521291327432293052451);
44
     DK(KP820678790, +0.820678790828660330972281985331011598767386482);
45
     DK(KP303346683, +0.303346683607342391675883946941299872384187453);
46
     DK(KP534511135, +0.534511135950791641089685961295362908582039528);
47
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
48
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
49
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
50
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
51
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
52
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
53
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
54
     {
55
          INT m;
56
          for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(128, rs)) {
57
               E T2, T3, Tc, T8, Te, T5, T6, Tr, T7, TJ, T14, T3d, T3i, TG, T10;
58
               E T3a, T3g, TL, TP, Tb, Td, T17, Tt, Tu, T1i, Ti, T2U, T1t, T7B, T5O;
59
               E T3N, T3U, T1I, T3G, T3R, T79, T1x, T3D, T2l, T3X, T2d, T1M, T4B, T4x, T4T;
60
               E T2h, T29, T5s, T81, T5w, T7X, T7N, T7h, T64, T6a, T6e, T7l, T60, T7R, T5A;
61
               E T6h, T6J, T7o, T5E, T6k, T6N, T7r, T2X, T6t, T6x, TO, TK, TQ, T7c, TU;
62
               E T2x, T2u, T2y, T7E, T2C, T4b, T48, T4c, T5R, T4g, T3m, T3j, T3n, T4W, T3r;
63
               E Tx, Ty, TC, T1Z, T23, T4s, T4p, T70, T6W, T19, T41, T44, T1a, T1e, T35;
64
               E T31, T59, T55, T1k, T1R, T1V, T1l, T1p, T2Q, T2N, T8i, T8e, Th, T4E, T4H;
65
               E Tj, Tn, T3A, T3w, T5n, T5j;
66
               {
67
                    E T1H, Tg, Tw, T1s, T2g, TH, T2t, T47, T3h, T28, T4w, T3M, T2c, T4A, T3Q;
68
                    E T1w, T2k, T1L, T5r, T80;
69
                    {
70
                         E TI, T13, TF, TZ, Ta, T4, T9, Ts;
71
                         T2 = W[0];
72
                         T3 = W[2];
73
                         T4 = T2 * T3;
74
                         Tc = W[5];
75
                         TI = T3 * Tc;
76
                         T13 = T2 * Tc;
77
                         T8 = W[4];
78
                         Te = W[6];
79
                         TF = T3 * T8;
80
                         T1H = T8 * Te;
81
                         TZ = T2 * T8;
82
                         T5 = W[1];
83
                         T6 = W[3];
84
                         Ta = T2 * T6;
85
                         Tr = FMA(T5, T6, T4);
86
                         T7 = FNMS(T5, T6, T4);
87
                         Tg = T7 * Tc;
88
                         Tw = Tr * Tc;
89
                         T1s = T3 * Te;
90
                         T2g = T2 * Te;
91
                         TJ = FMA(T6, T8, TI);
92
                         T14 = FNMS(T5, T8, T13);
93
                         T3d = FMA(T5, T8, T13);
94
                         T3i = FNMS(T6, T8, TI);
95
                         TG = FNMS(T6, Tc, TF);
96
                         TH = TG * Te;
97
                         T10 = FMA(T5, Tc, TZ);
98
                         T2t = T10 * Te;
99
                         T3a = FNMS(T5, Tc, TZ);
100
                         T47 = T3a * Te;
101
                         T3g = FMA(T6, Tc, TF);
102
                         T3h = T3g * Te;
103
                         TL = W[8];
104
                         T28 = T3 * TL;
105
                         T4w = T8 * TL;
106
                         T3M = T2 * TL;
107
                         TP = W[9];
108
                         T2c = T3 * TP;
109
                         T4A = T8 * TP;
110
                         T3Q = T2 * TP;
111
                         T9 = T7 * T8;
112
                         Tb = FMA(T5, T3, Ta);
113
                         Td = FMA(Tb, Tc, T9);
114
                         T17 = FNMS(Tb, Tc, T9);
115
                         Ts = Tr * T8;
116
                         Tt = FNMS(T5, T3, Ta);
117
                         Tu = FNMS(Tt, Tc, Ts);
118
                         T1i = FMA(Tt, Tc, Ts);
119
                         Ti = W[7];
120
                         T1w = T3 * Ti;
121
                         T2k = T2 * Ti;
122
                         T1L = T8 * Ti;
123
                         T2U = FMA(Tc, Ti, T1H);
124
                    }
125
                    T1t = FMA(T6, Ti, T1s);
126
                    T7B = FNMS(T14, Ti, T2t);
127
                    T5O = FNMS(T3d, Ti, T47);
128
                    T3N = FMA(T5, TP, T3M);
129
                    T3U = FNMS(T6, Ti, T1s);
130
                    T1I = FNMS(Tc, Ti, T1H);
131
                    T3G = FNMS(T5, Te, T2k);
132
                    T3R = FNMS(T5, TL, T3Q);
133
                    T79 = FNMS(TJ, Ti, TH);
134
                    T1x = FNMS(T6, Te, T1w);
135
                    T3D = FMA(T5, Ti, T2g);
136
                    T2l = FMA(T5, Te, T2k);
137
                    T3X = FMA(T6, Te, T1w);
138
                    T2d = FNMS(T6, TL, T2c);
139
                    T1M = FMA(Tc, Te, T1L);
140
                    T4B = FNMS(Tc, TL, T4A);
141
                    T4x = FMA(Tc, TP, T4w);
142
                    T4T = FNMS(T3i, Ti, T3h);
143
                    T2h = FNMS(T5, Ti, T2g);
144
                    T29 = FMA(T6, TP, T28);
145
                    T5r = T3g * TL;
146
                    T5s = FMA(T3i, TP, T5r);
147
                    T80 = T7 * TP;
148
                    T81 = FNMS(Tb, TL, T80);
149
                    {
150
                         E T5v, T7W, T7M, T7g, T63;
151
                         T5v = T3g * TP;
152
                         T5w = FNMS(T3i, TL, T5v);
153
                         T7W = T7 * TL;
154
                         T7X = FMA(Tb, TP, T7W);
155
                         T7M = TG * TL;
156
                         T7N = FMA(TJ, TP, T7M);
157
                         T7g = T10 * TL;
158
                         T7h = FMA(T14, TP, T7g);
159
                         T63 = T3a * TP;
160
                         T64 = FNMS(T3d, TL, T63);
161
                    }
162
                    {
163
                         E T69, T6d, T7k, T5Z, T7Q, T5z;
164
                         T69 = Tr * TL;
165
                         T6a = FMA(Tt, TP, T69);
166
                         T6d = Tr * TP;
167
                         T6e = FNMS(Tt, TL, T6d);
168
                         T7k = T10 * TP;
169
                         T7l = FNMS(T14, TL, T7k);
170
                         T5Z = T3a * TL;
171
                         T60 = FMA(T3d, TP, T5Z);
172
                         T7Q = TG * TP;
173
                         T7R = FNMS(TJ, TL, T7Q);
174
                         T5z = Tr * Te;
175
                         T5A = FMA(Tt, Ti, T5z);
176
                         T6h = FNMS(Tt, Ti, T5z);
177
                    }
178
                    {
179
                         E T6I, T5D, T6M, T6s, T6w;
180
                         T6I = T7 * Te;
181
                         T6J = FNMS(Tb, Ti, T6I);
182
                         T7o = FMA(Tb, Ti, T6I);
183
                         T5D = Tr * Ti;
184
                         T5E = FNMS(Tt, Te, T5D);
185
                         T6k = FMA(Tt, Te, T5D);
186
                         T6M = T7 * Ti;
187
                         T6N = FMA(Tb, Te, T6M);
188
                         T7r = FNMS(Tb, Te, T6M);
189
                         T6s = T2U * TL;
190
                         T6w = T2U * TP;
191
                         T2X = FNMS(Tc, Te, T1L);
192
                         T6t = FMA(T2X, TP, T6s);
193
                         T6x = FNMS(T2X, TL, T6w);
194
                         {
195
                              E TN, TM, TT, T2w, T2v, T2B;
196
                              TN = TG * Ti;
197
                              TO = FNMS(TJ, Te, TN);
198
                              TK = FMA(TJ, Ti, TH);
199
                              TM = TK * TL;
200
                              TT = TK * TP;
201
                              TQ = FMA(TO, TP, TM);
202
                              T7c = FMA(TJ, Te, TN);
203
                              TU = FNMS(TO, TL, TT);
204
                              T2w = T10 * Ti;
205
                              T2x = FNMS(T14, Te, T2w);
206
                              T2u = FMA(T14, Ti, T2t);
207
                              T2v = T2u * TL;
208
                              T2B = T2u * TP;
209
                              T2y = FMA(T2x, TP, T2v);
210
                              T7E = FMA(T14, Te, T2w);
211
                              T2C = FNMS(T2x, TL, T2B);
212
                         }
213
                    }
214
                    {
215
                         E T4a, T49, T4f, T3l, T3k, T3q;
216
                         T4a = T3a * Ti;
217
                         T4b = FNMS(T3d, Te, T4a);
218
                         T48 = FMA(T3d, Ti, T47);
219
                         T49 = T48 * TL;
220
                         T4f = T48 * TP;
221
                         T4c = FMA(T4b, TP, T49);
222
                         T5R = FMA(T3d, Te, T4a);
223
                         T4g = FNMS(T4b, TL, T4f);
224
                         T3l = T3g * Ti;
225
                         T3m = FNMS(T3i, Te, T3l);
226
                         T3j = FMA(T3i, Ti, T3h);
227
                         T3k = T3j * TL;
228
                         T3q = T3j * TP;
229
                         T3n = FMA(T3m, TP, T3k);
230
                         T4W = FMA(T3i, Te, T3l);
231
                         T3r = FNMS(T3m, TL, T3q);
232
                         {
233
                              E T1Y, T22, Tv, TB, T6Z, T6V;
234
                              T1Y = Tu * TL;
235
                              T22 = Tu * TP;
236
                              Tv = Tu * Te;
237
                              TB = Tu * Ti;
238
                              Tx = FMA(Tt, T8, Tw);
239
                              Ty = FMA(Tx, Ti, Tv);
240
                              TC = FNMS(Tx, Te, TB);
241
                              T1Z = FMA(Tx, TP, T1Y);
242
                              T23 = FNMS(Tx, TL, T22);
243
                              T4s = FMA(Tx, Te, TB);
244
                              T4p = FNMS(Tx, Ti, Tv);
245
                              T6Z = Ty * TP;
246
                              T70 = FNMS(TC, TL, T6Z);
247
                              T6V = Ty * TL;
248
                              T6W = FMA(TC, TP, T6V);
249
                         }
250
                    }
251
                    {
252
                         E T30, T34, T18, T1d, T58, T54;
253
                         T30 = T17 * TL;
254
                         T34 = T17 * TP;
255
                         T18 = T17 * Te;
256
                         T1d = T17 * Ti;
257
                         T19 = FMA(Tb, T8, Tg);
258
                         T41 = FMA(T19, Ti, T18);
259
                         T44 = FNMS(T19, Te, T1d);
260
                         T1a = FNMS(T19, Ti, T18);
261
                         T1e = FMA(T19, Te, T1d);
262
                         T35 = FNMS(T19, TL, T34);
263
                         T31 = FMA(T19, TP, T30);
264
                         T58 = T41 * TP;
265
                         T59 = FNMS(T44, TL, T58);
266
                         T54 = T41 * TL;
267
                         T55 = FMA(T44, TP, T54);
268
                    }
269
                    {
270
                         E T1j, T1o, T1Q, T1U, T8h, T8d;
271
                         T1j = T1i * TL;
272
                         T1o = T1i * TP;
273
                         T1Q = T1i * Te;
274
                         T1U = T1i * Ti;
275
                         T1k = FNMS(Tt, T8, Tw);
276
                         T1R = FMA(T1k, Ti, T1Q);
277
                         T1V = FNMS(T1k, Te, T1U);
278
                         T1l = FMA(T1k, TP, T1j);
279
                         T1p = FNMS(T1k, TL, T1o);
280
                         T2Q = FMA(T1k, Te, T1U);
281
                         T2N = FNMS(T1k, Ti, T1Q);
282
                         T8h = T1R * TP;
283
                         T8i = FNMS(T1V, TL, T8h);
284
                         T8d = T1R * TL;
285
                         T8e = FMA(T1V, TP, T8d);
286
                    }
287
                    {
288
                         E T3v, T3z, Tf, Tm, T5m, T5i;
289
                         T3v = Td * TL;
290
                         T3z = Td * TP;
291
                         Tf = Td * Te;
292
                         Tm = Td * Ti;
293
                         Th = FNMS(Tb, T8, Tg);
294
                         T4E = FMA(Th, Ti, Tf);
295
                         T4H = FNMS(Th, Te, Tm);
296
                         Tj = FNMS(Th, Ti, Tf);
297
                         Tn = FMA(Th, Te, Tm);
298
                         T3A = FNMS(Th, TL, T3z);
299
                         T3w = FMA(Th, TP, T3v);
300
                         T5m = T4E * TP;
301
                         T5n = FNMS(T4H, TL, T5m);
302
                         T5i = T4E * TL;
303
                         T5j = FMA(T4H, TP, T5i);
304
                    }
305
               }
306
               {
307
                    E TY, Tg4, Tl9, TlD, T8w, TdS, Tkd, TkE, T2G, Tge, Tgh, TiK, T98, Te1, T9f;
308
                    E Te0, T39, Tgq, Tgn, TiN, T9p, Te5, T9M, Te8, T74, Thr, Thc, Tja, TbI, TeE;
309
                    E TcB, TeP, T1B, TkD, Tg7, Tk7, T8D, TdT, T8K, TdU, T27, Tg9, Tgc, TiJ, T8T;
310
                    E TdY, T90, TdX, T4k, TgB, Tgy, TiT, T9Y, Tec, Tal, Tef, T5d, Th0, TgL, TiZ;
311
                    E Taz, Tel, Tbs, Tew, T3K, Tgo, Tgt, TiO, T9E, Te9, T9P, Te6, T4L, Tgz, TgE;
312
                    E TiU, Tad, Teg, Tao, Ted, T5I, TgM, Th3, Tj0, TaO, Tex, Tbv, Tem, T7v, Thd;
313
                    E Thu, Tjb, TbX, TeQ, TcE, TeF, T68, Tj5, TgS, Th5, Tbj, Tez, Tbx, Teq, T6B;
314
                    E Tj6, TgX, Th6, Tb4, TeA, Tby, Tet, T7V, Tjg, Thj, Thw, Tcs, TeS, TcG, TeJ;
315
                    E T8m, Tjh, Tho, Thx, Tcd, TeT, TcH, TeM;
316
                    {
317
                         E T1, Tkb, Tp, Tka, TE, T8s, TW, T8u;
318
                         T1 = ri[0];
319
                         Tkb = ii[0];
320
                         {
321
                              E Tk, Tl, To, Tk9;
322
                              Tk = ri[WS(rs, 32)];
323
                              Tl = Tj * Tk;
324
                              To = ii[WS(rs, 32)];
325
                              Tk9 = Tj * To;
326
                              Tp = FMA(Tn, To, Tl);
327
                              Tka = FNMS(Tn, Tk, Tk9);
328
                         }
329
                         {
330
                              E Tz, TA, TD, T8r;
331
                              Tz = ri[WS(rs, 16)];
332
                              TA = Ty * Tz;
333
                              TD = ii[WS(rs, 16)];
334
                              T8r = Ty * TD;
335
                              TE = FMA(TC, TD, TA);
336
                              T8s = FNMS(TC, Tz, T8r);
337
                         }
338
                         {
339
                              E TR, TS, TV, T8t;
340
                              TR = ri[WS(rs, 48)];
341
                              TS = TQ * TR;
342
                              TV = ii[WS(rs, 48)];
343
                              T8t = TQ * TV;
344
                              TW = FMA(TU, TV, TS);
345
                              T8u = FNMS(TU, TR, T8t);
346
                         }
347
                         {
348
                              E Tq, TX, Tl7, Tl8;
349
                              Tq = T1 + Tp;
350
                              TX = TE + TW;
351
                              TY = Tq + TX;
352
                              Tg4 = Tq - TX;
353
                              Tl7 = Tkb - Tka;
354
                              Tl8 = TE - TW;
355
                              Tl9 = Tl7 - Tl8;
356
                              TlD = Tl8 + Tl7;
357
                         }
358
                         {
359
                              E T8q, T8v, Tk8, Tkc;
360
                              T8q = T1 - Tp;
361
                              T8v = T8s - T8u;
362
                              T8w = T8q - T8v;
363
                              TdS = T8q + T8v;
364
                              Tk8 = T8s + T8u;
365
                              Tkc = Tka + Tkb;
366
                              Tkd = Tk8 + Tkc;
367
                              TkE = Tkc - Tk8;
368
                         }
369
                    }
370
                    {
371
                         E T2f, T93, T2E, T9d, T2n, T95, T2s, T9b;
372
                         {
373
                              E T2a, T2b, T2e, T92;
374
                              T2a = ri[WS(rs, 60)];
375
                              T2b = T29 * T2a;
376
                              T2e = ii[WS(rs, 60)];
377
                              T92 = T29 * T2e;
378
                              T2f = FMA(T2d, T2e, T2b);
379
                              T93 = FNMS(T2d, T2a, T92);
380
                         }
381
                         {
382
                              E T2z, T2A, T2D, T9c;
383
                              T2z = ri[WS(rs, 44)];
384
                              T2A = T2y * T2z;
385
                              T2D = ii[WS(rs, 44)];
386
                              T9c = T2y * T2D;
387
                              T2E = FMA(T2C, T2D, T2A);
388
                              T9d = FNMS(T2C, T2z, T9c);
389
                         }
390
                         {
391
                              E T2i, T2j, T2m, T94;
392
                              T2i = ri[WS(rs, 28)];
393
                              T2j = T2h * T2i;
394
                              T2m = ii[WS(rs, 28)];
395
                              T94 = T2h * T2m;
396
                              T2n = FMA(T2l, T2m, T2j);
397
                              T95 = FNMS(T2l, T2i, T94);
398
                         }
399
                         {
400
                              E T2p, T2q, T2r, T9a;
401
                              T2p = ri[WS(rs, 12)];
402
                              T2q = TG * T2p;
403
                              T2r = ii[WS(rs, 12)];
404
                              T9a = TG * T2r;
405
                              T2s = FMA(TJ, T2r, T2q);
406
                              T9b = FNMS(TJ, T2p, T9a);
407
                         }
408
                         {
409
                              E T2o, T2F, Tgf, Tgg;
410
                              T2o = T2f + T2n;
411
                              T2F = T2s + T2E;
412
                              T2G = T2o + T2F;
413
                              Tge = T2o - T2F;
414
                              Tgf = T93 + T95;
415
                              Tgg = T9b + T9d;
416
                              Tgh = Tgf - Tgg;
417
                              TiK = Tgf + Tgg;
418
                         }
419
                         {
420
                              E T96, T97, T99, T9e;
421
                              T96 = T93 - T95;
422
                              T97 = T2s - T2E;
423
                              T98 = T96 + T97;
424
                              Te1 = T96 - T97;
425
                              T99 = T2f - T2n;
426
                              T9e = T9b - T9d;
427
                              T9f = T99 - T9e;
428
                              Te0 = T99 + T9e;
429
                         }
430
                    }
431
                    {
432
                         E T2M, T9k, T37, T9K, T2S, T9m, T2Z, T9I;
433
                         {
434
                              E T2J, T2K, T2L, T9j;
435
                              T2J = ri[WS(rs, 2)];
436
                              T2K = Tr * T2J;
437
                              T2L = ii[WS(rs, 2)];
438
                              T9j = Tr * T2L;
439
                              T2M = FMA(Tt, T2L, T2K);
440
                              T9k = FNMS(Tt, T2J, T9j);
441
                         }
442
                         {
443
                              E T32, T33, T36, T9J;
444
                              T32 = ri[WS(rs, 50)];
445
                              T33 = T31 * T32;
446
                              T36 = ii[WS(rs, 50)];
447
                              T9J = T31 * T36;
448
                              T37 = FMA(T35, T36, T33);
449
                              T9K = FNMS(T35, T32, T9J);
450
                         }
451
                         {
452
                              E T2O, T2P, T2R, T9l;
453
                              T2O = ri[WS(rs, 34)];
454
                              T2P = T2N * T2O;
455
                              T2R = ii[WS(rs, 34)];
456
                              T9l = T2N * T2R;
457
                              T2S = FMA(T2Q, T2R, T2P);
458
                              T9m = FNMS(T2Q, T2O, T9l);
459
                         }
460
                         {
461
                              E T2V, T2W, T2Y, T9H;
462
                              T2V = ri[WS(rs, 18)];
463
                              T2W = T2U * T2V;
464
                              T2Y = ii[WS(rs, 18)];
465
                              T9H = T2U * T2Y;
466
                              T2Z = FMA(T2X, T2Y, T2W);
467
                              T9I = FNMS(T2X, T2V, T9H);
468
                         }
469
                         {
470
                              E T2T, T38, Tgl, Tgm;
471
                              T2T = T2M + T2S;
472
                              T38 = T2Z + T37;
473
                              T39 = T2T + T38;
474
                              Tgq = T2T - T38;
475
                              Tgl = T9k + T9m;
476
                              Tgm = T9I + T9K;
477
                              Tgn = Tgl - Tgm;
478
                              TiN = Tgl + Tgm;
479
                         }
480
                         {
481
                              E T9n, T9o, T9G, T9L;
482
                              T9n = T9k - T9m;
483
                              T9o = T2Z - T37;
484
                              T9p = T9n + T9o;
485
                              Te5 = T9n - T9o;
486
                              T9G = T2M - T2S;
487
                              T9L = T9I - T9K;
488
                              T9M = T9G - T9L;
489
                              Te8 = T9G + T9L;
490
                         }
491
                    }
492
                    {
493
                         E T6H, TbD, T72, Tcz, T6P, TbF, T6U, Tcx;
494
                         {
495
                              E T6E, T6F, T6G, TbC;
496
                              T6E = ri[WS(rs, 63)];
497
                              T6F = TL * T6E;
498
                              T6G = ii[WS(rs, 63)];
499
                              TbC = TL * T6G;
500
                              T6H = FMA(TP, T6G, T6F);
501
                              TbD = FNMS(TP, T6E, TbC);
502
                         }
503
                         {
504
                              E T6X, T6Y, T71, Tcy;
505
                              T6X = ri[WS(rs, 47)];
506
                              T6Y = T6W * T6X;
507
                              T71 = ii[WS(rs, 47)];
508
                              Tcy = T6W * T71;
509
                              T72 = FMA(T70, T71, T6Y);
510
                              Tcz = FNMS(T70, T6X, Tcy);
511
                         }
512
                         {
513
                              E T6K, T6L, T6O, TbE;
514
                              T6K = ri[WS(rs, 31)];
515
                              T6L = T6J * T6K;
516
                              T6O = ii[WS(rs, 31)];
517
                              TbE = T6J * T6O;
518
                              T6P = FMA(T6N, T6O, T6L);
519
                              TbF = FNMS(T6N, T6K, TbE);
520
                         }
521
                         {
522
                              E T6R, T6S, T6T, Tcw;
523
                              T6R = ri[WS(rs, 15)];
524
                              T6S = TK * T6R;
525
                              T6T = ii[WS(rs, 15)];
526
                              Tcw = TK * T6T;
527
                              T6U = FMA(TO, T6T, T6S);
528
                              Tcx = FNMS(TO, T6R, Tcw);
529
                         }
530
                         {
531
                              E T6Q, T73, Tha, Thb;
532
                              T6Q = T6H + T6P;
533
                              T73 = T6U + T72;
534
                              T74 = T6Q + T73;
535
                              Thr = T6Q - T73;
536
                              Tha = TbD + TbF;
537
                              Thb = Tcx + Tcz;
538
                              Thc = Tha - Thb;
539
                              Tja = Tha + Thb;
540
                         }
541
                         {
542
                              E TbG, TbH, Tcv, TcA;
543
                              TbG = TbD - TbF;
544
                              TbH = T6U - T72;
545
                              TbI = TbG + TbH;
546
                              TeE = TbG - TbH;
547
                              Tcv = T6H - T6P;
548
                              TcA = Tcx - Tcz;
549
                              TcB = Tcv - TcA;
550
                              TeP = Tcv + TcA;
551
                         }
552
                    }
553
                    {
554
                         E T16, T8y, T1z, T8I, T1g, T8A, T1r, T8G;
555
                         {
556
                              E T11, T12, T15, T8x;
557
                              T11 = ri[WS(rs, 8)];
558
                              T12 = T10 * T11;
559
                              T15 = ii[WS(rs, 8)];
560
                              T8x = T10 * T15;
561
                              T16 = FMA(T14, T15, T12);
562
                              T8y = FNMS(T14, T11, T8x);
563
                         }
564
                         {
565
                              E T1u, T1v, T1y, T8H;
566
                              T1u = ri[WS(rs, 24)];
567
                              T1v = T1t * T1u;
568
                              T1y = ii[WS(rs, 24)];
569
                              T8H = T1t * T1y;
570
                              T1z = FMA(T1x, T1y, T1v);
571
                              T8I = FNMS(T1x, T1u, T8H);
572
                         }
573
                         {
574
                              E T1b, T1c, T1f, T8z;
575
                              T1b = ri[WS(rs, 40)];
576
                              T1c = T1a * T1b;
577
                              T1f = ii[WS(rs, 40)];
578
                              T8z = T1a * T1f;
579
                              T1g = FMA(T1e, T1f, T1c);
580
                              T8A = FNMS(T1e, T1b, T8z);
581
                         }
582
                         {
583
                              E T1m, T1n, T1q, T8F;
584
                              T1m = ri[WS(rs, 56)];
585
                              T1n = T1l * T1m;
586
                              T1q = ii[WS(rs, 56)];
587
                              T8F = T1l * T1q;
588
                              T1r = FMA(T1p, T1q, T1n);
589
                              T8G = FNMS(T1p, T1m, T8F);
590
                         }
591
                         {
592
                              E T1h, T1A, Tg5, Tg6;
593
                              T1h = T16 + T1g;
594
                              T1A = T1r + T1z;
595
                              T1B = T1h + T1A;
596
                              TkD = T1A - T1h;
597
                              Tg5 = T8y + T8A;
598
                              Tg6 = T8G + T8I;
599
                              Tg7 = Tg5 - Tg6;
600
                              Tk7 = Tg5 + Tg6;
601
                         }
602
                         {
603
                              E T8B, T8C, T8E, T8J;
604
                              T8B = T8y - T8A;
605
                              T8C = T16 - T1g;
606
                              T8D = T8B - T8C;
607
                              TdT = T8C + T8B;
608
                              T8E = T1r - T1z;
609
                              T8J = T8G - T8I;
610
                              T8K = T8E + T8J;
611
                              TdU = T8E - T8J;
612
                         }
613
                    }
614
                    {
615
                         E T1G, T8O, T25, T8Y, T1O, T8Q, T1X, T8W;
616
                         {
617
                              E T1D, T1E, T1F, T8N;
618
                              T1D = ri[WS(rs, 4)];
619
                              T1E = T7 * T1D;
620
                              T1F = ii[WS(rs, 4)];
621
                              T8N = T7 * T1F;
622
                              T1G = FMA(Tb, T1F, T1E);
623
                              T8O = FNMS(Tb, T1D, T8N);
624
                         }
625
                         {
626
                              E T20, T21, T24, T8X;
627
                              T20 = ri[WS(rs, 52)];
628
                              T21 = T1Z * T20;
629
                              T24 = ii[WS(rs, 52)];
630
                              T8X = T1Z * T24;
631
                              T25 = FMA(T23, T24, T21);
632
                              T8Y = FNMS(T23, T20, T8X);
633
                         }
634
                         {
635
                              E T1J, T1K, T1N, T8P;
636
                              T1J = ri[WS(rs, 36)];
637
                              T1K = T1I * T1J;
638
                              T1N = ii[WS(rs, 36)];
639
                              T8P = T1I * T1N;
640
                              T1O = FMA(T1M, T1N, T1K);
641
                              T8Q = FNMS(T1M, T1J, T8P);
642
                         }
643
                         {
644
                              E T1S, T1T, T1W, T8V;
645
                              T1S = ri[WS(rs, 20)];
646
                              T1T = T1R * T1S;
647
                              T1W = ii[WS(rs, 20)];
648
                              T8V = T1R * T1W;
649
                              T1X = FMA(T1V, T1W, T1T);
650
                              T8W = FNMS(T1V, T1S, T8V);
651
                         }
652
                         {
653
                              E T1P, T26, Tga, Tgb;
654
                              T1P = T1G + T1O;
655
                              T26 = T1X + T25;
656
                              T27 = T1P + T26;
657
                              Tg9 = T1P - T26;
658
                              Tga = T8O + T8Q;
659
                              Tgb = T8W + T8Y;
660
                              Tgc = Tga - Tgb;
661
                              TiJ = Tga + Tgb;
662
                         }
663
                         {
664
                              E T8R, T8S, T8U, T8Z;
665
                              T8R = T8O - T8Q;
666
                              T8S = T1X - T25;
667
                              T8T = T8R + T8S;
668
                              TdY = T8R - T8S;
669
                              T8U = T1G - T1O;
670
                              T8Z = T8W - T8Y;
671
                              T90 = T8U - T8Z;
672
                              TdX = T8U + T8Z;
673
                         }
674
                    }
675
                    {
676
                         E T3T, T9T, T4i, Taj, T3Z, T9V, T46, Tah;
677
                         {
678
                              E T3O, T3P, T3S, T9S;
679
                              T3O = ri[WS(rs, 62)];
680
                              T3P = T3N * T3O;
681
                              T3S = ii[WS(rs, 62)];
682
                              T9S = T3N * T3S;
683
                              T3T = FMA(T3R, T3S, T3P);
684
                              T9T = FNMS(T3R, T3O, T9S);
685
                         }
686
                         {
687
                              E T4d, T4e, T4h, Tai;
688
                              T4d = ri[WS(rs, 46)];
689
                              T4e = T4c * T4d;
690
                              T4h = ii[WS(rs, 46)];
691
                              Tai = T4c * T4h;
692
                              T4i = FMA(T4g, T4h, T4e);
693
                              Taj = FNMS(T4g, T4d, Tai);
694
                         }
695
                         {
696
                              E T3V, T3W, T3Y, T9U;
697
                              T3V = ri[WS(rs, 30)];
698
                              T3W = T3U * T3V;
699
                              T3Y = ii[WS(rs, 30)];
700
                              T9U = T3U * T3Y;
701
                              T3Z = FMA(T3X, T3Y, T3W);
702
                              T9V = FNMS(T3X, T3V, T9U);
703
                         }
704
                         {
705
                              E T42, T43, T45, Tag;
706
                              T42 = ri[WS(rs, 14)];
707
                              T43 = T41 * T42;
708
                              T45 = ii[WS(rs, 14)];
709
                              Tag = T41 * T45;
710
                              T46 = FMA(T44, T45, T43);
711
                              Tah = FNMS(T44, T42, Tag);
712
                         }
713
                         {
714
                              E T40, T4j, Tgw, Tgx;
715
                              T40 = T3T + T3Z;
716
                              T4j = T46 + T4i;
717
                              T4k = T40 + T4j;
718
                              TgB = T40 - T4j;
719
                              Tgw = T9T + T9V;
720
                              Tgx = Tah + Taj;
721
                              Tgy = Tgw - Tgx;
722
                              TiT = Tgw + Tgx;
723
                         }
724
                         {
725
                              E T9W, T9X, Taf, Tak;
726
                              T9W = T9T - T9V;
727
                              T9X = T46 - T4i;
728
                              T9Y = T9W + T9X;
729
                              Tec = T9W - T9X;
730
                              Taf = T3T - T3Z;
731
                              Tak = Tah - Taj;
732
                              Tal = Taf - Tak;
733
                              Tef = Taf + Tak;
734
                         }
735
                    }
736
                    {
737
                         E T4S, Tau, T5b, Tbq, T4Y, Taw, T53, Tbo;
738
                         {
739
                              E T4P, T4Q, T4R, Tat;
740
                              T4P = ri[WS(rs, 1)];
741
                              T4Q = T2 * T4P;
742
                              T4R = ii[WS(rs, 1)];
743
                              Tat = T2 * T4R;
744
                              T4S = FMA(T5, T4R, T4Q);
745
                              Tau = FNMS(T5, T4P, Tat);
746
                         }
747
                         {
748
                              E T56, T57, T5a, Tbp;
749
                              T56 = ri[WS(rs, 49)];
750
                              T57 = T55 * T56;
751
                              T5a = ii[WS(rs, 49)];
752
                              Tbp = T55 * T5a;
753
                              T5b = FMA(T59, T5a, T57);
754
                              Tbq = FNMS(T59, T56, Tbp);
755
                         }
756
                         {
757
                              E T4U, T4V, T4X, Tav;
758
                              T4U = ri[WS(rs, 33)];
759
                              T4V = T4T * T4U;
760
                              T4X = ii[WS(rs, 33)];
761
                              Tav = T4T * T4X;
762
                              T4Y = FMA(T4W, T4X, T4V);
763
                              Taw = FNMS(T4W, T4U, Tav);
764
                         }
765
                         {
766
                              E T50, T51, T52, Tbn;
767
                              T50 = ri[WS(rs, 17)];
768
                              T51 = T48 * T50;
769
                              T52 = ii[WS(rs, 17)];
770
                              Tbn = T48 * T52;
771
                              T53 = FMA(T4b, T52, T51);
772
                              Tbo = FNMS(T4b, T50, Tbn);
773
                         }
774
                         {
775
                              E T4Z, T5c, TgJ, TgK;
776
                              T4Z = T4S + T4Y;
777
                              T5c = T53 + T5b;
778
                              T5d = T4Z + T5c;
779
                              Th0 = T4Z - T5c;
780
                              TgJ = Tau + Taw;
781
                              TgK = Tbo + Tbq;
782
                              TgL = TgJ - TgK;
783
                              TiZ = TgJ + TgK;
784
                         }
785
                         {
786
                              E Tax, Tay, Tbm, Tbr;
787
                              Tax = Tau - Taw;
788
                              Tay = T53 - T5b;
789
                              Taz = Tax + Tay;
790
                              Tel = Tax - Tay;
791
                              Tbm = T4S - T4Y;
792
                              Tbr = Tbo - Tbq;
793
                              Tbs = Tbm - Tbr;
794
                              Tew = Tbm + Tbr;
795
                         }
796
                    }
797
                    {
798
                         E T3f, T9s, T3I, T9B, T3t, T9u, T3C, T9z;
799
                         {
800
                              E T3b, T3c, T3e, T9r;
801
                              T3b = ri[WS(rs, 10)];
802
                              T3c = T3a * T3b;
803
                              T3e = ii[WS(rs, 10)];
804
                              T9r = T3a * T3e;
805
                              T3f = FMA(T3d, T3e, T3c);
806
                              T9s = FNMS(T3d, T3b, T9r);
807
                         }
808
                         {
809
                              E T3E, T3F, T3H, T9A;
810
                              T3E = ri[WS(rs, 26)];
811
                              T3F = T3D * T3E;
812
                              T3H = ii[WS(rs, 26)];
813
                              T9A = T3D * T3H;
814
                              T3I = FMA(T3G, T3H, T3F);
815
                              T9B = FNMS(T3G, T3E, T9A);
816
                         }
817
                         {
818
                              E T3o, T3p, T3s, T9t;
819
                              T3o = ri[WS(rs, 42)];
820
                              T3p = T3n * T3o;
821
                              T3s = ii[WS(rs, 42)];
822
                              T9t = T3n * T3s;
823
                              T3t = FMA(T3r, T3s, T3p);
824
                              T9u = FNMS(T3r, T3o, T9t);
825
                         }
826
                         {
827
                              E T3x, T3y, T3B, T9y;
828
                              T3x = ri[WS(rs, 58)];
829
                              T3y = T3w * T3x;
830
                              T3B = ii[WS(rs, 58)];
831
                              T9y = T3w * T3B;
832
                              T3C = FMA(T3A, T3B, T3y);
833
                              T9z = FNMS(T3A, T3x, T9y);
834
                         }
835
                         {
836
                              E T3u, T3J, Tgr, Tgs;
837
                              T3u = T3f + T3t;
838
                              T3J = T3C + T3I;
839
                              T3K = T3u + T3J;
840
                              Tgo = T3J - T3u;
841
                              Tgr = T9s + T9u;
842
                              Tgs = T9z + T9B;
843
                              Tgt = Tgr - Tgs;
844
                              TiO = Tgr + Tgs;
845
                              {
846
                                   E T9w, T9O, T9D, T9N;
847
                                   {
848
                                        E T9q, T9v, T9x, T9C;
849
                                        T9q = T3f - T3t;
850
                                        T9v = T9s - T9u;
851
                                        T9w = T9q + T9v;
852
                                        T9O = T9v - T9q;
853
                                        T9x = T3C - T3I;
854
                                        T9C = T9z - T9B;
855
                                        T9D = T9x - T9C;
856
                                        T9N = T9x + T9C;
857
                                   }
858
                                   T9E = T9w - T9D;
859
                                   Te9 = T9w + T9D;
860
                                   T9P = T9N - T9O;
861
                                   Te6 = T9O + T9N;
862
                              }
863
                         }
864
                    }
865
                    {
866
                         E T4o, Ta1, T4J, Taa, T4u, Ta3, T4D, Ta8;
867
                         {
868
                              E T4l, T4m, T4n, Ta0;
869
                              T4l = ri[WS(rs, 6)];
870
                              T4m = T3g * T4l;
871
                              T4n = ii[WS(rs, 6)];
872
                              Ta0 = T3g * T4n;
873
                              T4o = FMA(T3i, T4n, T4m);
874
                              Ta1 = FNMS(T3i, T4l, Ta0);
875
                         }
876
                         {
877
                              E T4F, T4G, T4I, Ta9;
878
                              T4F = ri[WS(rs, 22)];
879
                              T4G = T4E * T4F;
880
                              T4I = ii[WS(rs, 22)];
881
                              Ta9 = T4E * T4I;
882
                              T4J = FMA(T4H, T4I, T4G);
883
                              Taa = FNMS(T4H, T4F, Ta9);
884
                         }
885
                         {
886
                              E T4q, T4r, T4t, Ta2;
887
                              T4q = ri[WS(rs, 38)];
888
                              T4r = T4p * T4q;
889
                              T4t = ii[WS(rs, 38)];
890
                              Ta2 = T4p * T4t;
891
                              T4u = FMA(T4s, T4t, T4r);
892
                              Ta3 = FNMS(T4s, T4q, Ta2);
893
                         }
894
                         {
895
                              E T4y, T4z, T4C, Ta7;
896
                              T4y = ri[WS(rs, 54)];
897
                              T4z = T4x * T4y;
898
                              T4C = ii[WS(rs, 54)];
899
                              Ta7 = T4x * T4C;
900
                              T4D = FMA(T4B, T4C, T4z);
901
                              Ta8 = FNMS(T4B, T4y, Ta7);
902
                         }
903
                         {
904
                              E T4v, T4K, TgC, TgD;
905
                              T4v = T4o + T4u;
906
                              T4K = T4D + T4J;
907
                              T4L = T4v + T4K;
908
                              Tgz = T4K - T4v;
909
                              TgC = Ta1 + Ta3;
910
                              TgD = Ta8 + Taa;
911
                              TgE = TgC - TgD;
912
                              TiU = TgC + TgD;
913
                              {
914
                                   E Ta5, Tan, Tac, Tam;
915
                                   {
916
                                        E T9Z, Ta4, Ta6, Tab;
917
                                        T9Z = T4o - T4u;
918
                                        Ta4 = Ta1 - Ta3;
919
                                        Ta5 = T9Z + Ta4;
920
                                        Tan = Ta4 - T9Z;
921
                                        Ta6 = T4D - T4J;
922
                                        Tab = Ta8 - Taa;
923
                                        Tac = Ta6 - Tab;
924
                                        Tam = Ta6 + Tab;
925
                                   }
926
                                   Tad = Ta5 - Tac;
927
                                   Teg = Ta5 + Tac;
928
                                   Tao = Tam - Tan;
929
                                   Ted = Tan + Tam;
930
                              }
931
                         }
932
                    }
933
                    {
934
                         E T5h, TaC, T5G, TaL, T5p, TaE, T5y, TaJ;
935
                         {
936
                              E T5e, T5f, T5g, TaB;
937
                              T5e = ri[WS(rs, 9)];
938
                              T5f = T8 * T5e;
939
                              T5g = ii[WS(rs, 9)];
940
                              TaB = T8 * T5g;
941
                              T5h = FMA(Tc, T5g, T5f);
942
                              TaC = FNMS(Tc, T5e, TaB);
943
                         }
944
                         {
945
                              E T5B, T5C, T5F, TaK;
946
                              T5B = ri[WS(rs, 25)];
947
                              T5C = T5A * T5B;
948
                              T5F = ii[WS(rs, 25)];
949
                              TaK = T5A * T5F;
950
                              T5G = FMA(T5E, T5F, T5C);
951
                              TaL = FNMS(T5E, T5B, TaK);
952
                         }
953
                         {
954
                              E T5k, T5l, T5o, TaD;
955
                              T5k = ri[WS(rs, 41)];
956
                              T5l = T5j * T5k;
957
                              T5o = ii[WS(rs, 41)];
958
                              TaD = T5j * T5o;
959
                              T5p = FMA(T5n, T5o, T5l);
960
                              TaE = FNMS(T5n, T5k, TaD);
961
                         }
962
                         {
963
                              E T5t, T5u, T5x, TaI;
964
                              T5t = ri[WS(rs, 57)];
965
                              T5u = T5s * T5t;
966
                              T5x = ii[WS(rs, 57)];
967
                              TaI = T5s * T5x;
968
                              T5y = FMA(T5w, T5x, T5u);
969
                              TaJ = FNMS(T5w, T5t, TaI);
970
                         }
971
                         {
972
                              E T5q, T5H, Th1, Th2;
973
                              T5q = T5h + T5p;
974
                              T5H = T5y + T5G;
975
                              T5I = T5q + T5H;
976
                              TgM = T5H - T5q;
977
                              Th1 = TaC + TaE;
978
                              Th2 = TaJ + TaL;
979
                              Th3 = Th1 - Th2;
980
                              Tj0 = Th1 + Th2;
981
                              {
982
                                   E TaG, Tbu, TaN, Tbt;
983
                                   {
984
                                        E TaA, TaF, TaH, TaM;
985
                                        TaA = T5h - T5p;
986
                                        TaF = TaC - TaE;
987
                                        TaG = TaA + TaF;
988
                                        Tbu = TaF - TaA;
989
                                        TaH = T5y - T5G;
990
                                        TaM = TaJ - TaL;
991
                                        TaN = TaH - TaM;
992
                                        Tbt = TaH + TaM;
993
                                   }
994
                                   TaO = TaG - TaN;
995
                                   Tex = TaG + TaN;
996
                                   Tbv = Tbt - Tbu;
997
                                   Tem = Tbu + Tbt;
998
                              }
999
                         }
1000
                    }
1001
                    {
1002
                         E T78, TbL, T7t, TbU, T7e, TbN, T7n, TbS;
1003
                         {
1004
                              E T75, T76, T77, TbK;
1005
                              T75 = ri[WS(rs, 7)];
1006
                              T76 = T1i * T75;
1007
                              T77 = ii[WS(rs, 7)];
1008
                              TbK = T1i * T77;
1009
                              T78 = FMA(T1k, T77, T76);
1010
                              TbL = FNMS(T1k, T75, TbK);
1011
                         }
1012
                         {
1013
                              E T7p, T7q, T7s, TbT;
1014
                              T7p = ri[WS(rs, 23)];
1015
                              T7q = T7o * T7p;
1016
                              T7s = ii[WS(rs, 23)];
1017
                              TbT = T7o * T7s;
1018
                              T7t = FMA(T7r, T7s, T7q);
1019
                              TbU = FNMS(T7r, T7p, TbT);
1020
                         }
1021
                         {
1022
                              E T7a, T7b, T7d, TbM;
1023
                              T7a = ri[WS(rs, 39)];
1024
                              T7b = T79 * T7a;
1025
                              T7d = ii[WS(rs, 39)];
1026
                              TbM = T79 * T7d;
1027
                              T7e = FMA(T7c, T7d, T7b);
1028
                              TbN = FNMS(T7c, T7a, TbM);
1029
                         }
1030
                         {
1031
                              E T7i, T7j, T7m, TbR;
1032
                              T7i = ri[WS(rs, 55)];
1033
                              T7j = T7h * T7i;
1034
                              T7m = ii[WS(rs, 55)];
1035
                              TbR = T7h * T7m;
1036
                              T7n = FMA(T7l, T7m, T7j);
1037
                              TbS = FNMS(T7l, T7i, TbR);
1038
                         }
1039
                         {
1040
                              E T7f, T7u, Ths, Tht;
1041
                              T7f = T78 + T7e;
1042
                              T7u = T7n + T7t;
1043
                              T7v = T7f + T7u;
1044
                              Thd = T7u - T7f;
1045
                              Ths = TbL + TbN;
1046
                              Tht = TbS + TbU;
1047
                              Thu = Ths - Tht;
1048
                              Tjb = Ths + Tht;
1049
                              {
1050
                                   E TbP, TcD, TbW, TcC;
1051
                                   {
1052
                                        E TbJ, TbO, TbQ, TbV;
1053
                                        TbJ = T78 - T7e;
1054
                                        TbO = TbL - TbN;
1055
                                        TbP = TbJ + TbO;
1056
                                        TcD = TbO - TbJ;
1057
                                        TbQ = T7n - T7t;
1058
                                        TbV = TbS - TbU;
1059
                                        TbW = TbQ - TbV;
1060
                                        TcC = TbQ + TbV;
1061
                                   }
1062
                                   TbX = TbP - TbW;
1063
                                   TeQ = TbP + TbW;
1064
                                   TcE = TcC - TcD;
1065
                                   TeF = TcD + TcC;
1066
                              }
1067
                         }
1068
                    }
1069
                    {
1070
                         E T5N, Tbd, T66, Tb9, T5T, Tbf, T5Y, Tb7;
1071
                         {
1072
                              E T5K, T5L, T5M, Tbc;
1073
                              T5K = ri[WS(rs, 5)];
1074
                              T5L = Td * T5K;
1075
                              T5M = ii[WS(rs, 5)];
1076
                              Tbc = Td * T5M;
1077
                              T5N = FMA(Th, T5M, T5L);
1078
                              Tbd = FNMS(Th, T5K, Tbc);
1079
                         }
1080
                         {
1081
                              E T61, T62, T65, Tb8;
1082
                              T61 = ri[WS(rs, 53)];
1083
                              T62 = T60 * T61;
1084
                              T65 = ii[WS(rs, 53)];
1085
                              Tb8 = T60 * T65;
1086
                              T66 = FMA(T64, T65, T62);
1087
                              Tb9 = FNMS(T64, T61, Tb8);
1088
                         }
1089
                         {
1090
                              E T5P, T5Q, T5S, Tbe;
1091
                              T5P = ri[WS(rs, 37)];
1092
                              T5Q = T5O * T5P;
1093
                              T5S = ii[WS(rs, 37)];
1094
                              Tbe = T5O * T5S;
1095
                              T5T = FMA(T5R, T5S, T5Q);
1096
                              Tbf = FNMS(T5R, T5P, Tbe);
1097
                         }
1098
                         {
1099
                              E T5V, T5W, T5X, Tb6;
1100
                              T5V = ri[WS(rs, 21)];
1101
                              T5W = T3j * T5V;
1102
                              T5X = ii[WS(rs, 21)];
1103
                              Tb6 = T3j * T5X;
1104
                              T5Y = FMA(T3m, T5X, T5W);
1105
                              Tb7 = FNMS(T3m, T5V, Tb6);
1106
                         }
1107
                         {
1108
                              E T5U, T67, TgR, TgO, TgP, TgQ;
1109
                              T5U = T5N + T5T;
1110
                              T67 = T5Y + T66;
1111
                              TgR = T5U - T67;
1112
                              TgO = Tbd + Tbf;
1113
                              TgP = Tb7 + Tb9;
1114
                              TgQ = TgO - TgP;
1115
                              T68 = T5U + T67;
1116
                              Tj5 = TgO + TgP;
1117
                              TgS = TgQ - TgR;
1118
                              Th5 = TgR + TgQ;
1119
                         }
1120
                         {
1121
                              E Tbb, Tep, Tbi, Teo;
1122
                              {
1123
                                   E Tb5, Tba, Tbg, Tbh;
1124
                                   Tb5 = T5N - T5T;
1125
                                   Tba = Tb7 - Tb9;
1126
                                   Tbb = Tb5 - Tba;
1127
                                   Tep = Tb5 + Tba;
1128
                                   Tbg = Tbd - Tbf;
1129
                                   Tbh = T5Y - T66;
1130
                                   Tbi = Tbg + Tbh;
1131
                                   Teo = Tbg - Tbh;
1132
                              }
1133
                              Tbj = FNMS(KP414213562, Tbi, Tbb);
1134
                              Tez = FMA(KP414213562, Teo, Tep);
1135
                              Tbx = FMA(KP414213562, Tbb, Tbi);
1136
                              Teq = FNMS(KP414213562, Tep, Teo);
1137
                         }
1138
                    }
1139
                    {
1140
                         E T6g, TaY, T6z, TaU, T6m, Tb0, T6r, TaS;
1141
                         {
1142
                              E T6b, T6c, T6f, TaX;
1143
                              T6b = ri[WS(rs, 61)];
1144
                              T6c = T6a * T6b;
1145
                              T6f = ii[WS(rs, 61)];
1146
                              TaX = T6a * T6f;
1147
                              T6g = FMA(T6e, T6f, T6c);
1148
                              TaY = FNMS(T6e, T6b, TaX);
1149
                         }
1150
                         {
1151
                              E T6u, T6v, T6y, TaT;
1152
                              T6u = ri[WS(rs, 45)];
1153
                              T6v = T6t * T6u;
1154
                              T6y = ii[WS(rs, 45)];
1155
                              TaT = T6t * T6y;
1156
                              T6z = FMA(T6x, T6y, T6v);
1157
                              TaU = FNMS(T6x, T6u, TaT);
1158
                         }
1159
                         {
1160
                              E T6i, T6j, T6l, TaZ;
1161
                              T6i = ri[WS(rs, 29)];
1162
                              T6j = T6h * T6i;
1163
                              T6l = ii[WS(rs, 29)];
1164
                              TaZ = T6h * T6l;
1165
                              T6m = FMA(T6k, T6l, T6j);
1166
                              Tb0 = FNMS(T6k, T6i, TaZ);
1167
                         }
1168
                         {
1169
                              E T6o, T6p, T6q, TaR;
1170
                              T6o = ri[WS(rs, 13)];
1171
                              T6p = T17 * T6o;
1172
                              T6q = ii[WS(rs, 13)];
1173
                              TaR = T17 * T6q;
1174
                              T6r = FMA(T19, T6q, T6p);
1175
                              TaS = FNMS(T19, T6o, TaR);
1176
                         }
1177
                         {
1178
                              E T6n, T6A, TgT, TgU, TgV, TgW;
1179
                              T6n = T6g + T6m;
1180
                              T6A = T6r + T6z;
1181
                              TgT = T6n - T6A;
1182
                              TgU = TaY + Tb0;
1183
                              TgV = TaS + TaU;
1184
                              TgW = TgU - TgV;
1185
                              T6B = T6n + T6A;
1186
                              Tj6 = TgU + TgV;
1187
                              TgX = TgT + TgW;
1188
                              Th6 = TgT - TgW;
1189
                         }
1190
                         {
1191
                              E TaW, Tes, Tb3, Ter;
1192
                              {
1193
                                   E TaQ, TaV, Tb1, Tb2;
1194
                                   TaQ = T6g - T6m;
1195
                                   TaV = TaS - TaU;
1196
                                   TaW = TaQ - TaV;
1197
                                   Tes = TaQ + TaV;
1198
                                   Tb1 = TaY - Tb0;
1199
                                   Tb2 = T6r - T6z;
1200
                                   Tb3 = Tb1 + Tb2;
1201
                                   Ter = Tb1 - Tb2;
1202
                              }
1203
                              Tb4 = FMA(KP414213562, Tb3, TaW);
1204
                              TeA = FNMS(KP414213562, Ter, Tes);
1205
                              Tby = FNMS(KP414213562, TaW, Tb3);
1206
                              Tet = FMA(KP414213562, Tes, Ter);
1207
                         }
1208
                    }
1209
                    {
1210
                         E T7A, Tcm, T7T, Tci, T7G, Tco, T7L, Tcg;
1211
                         {
1212
                              E T7x, T7y, T7z, Tcl;
1213
                              T7x = ri[WS(rs, 3)];
1214
                              T7y = T3 * T7x;
1215
                              T7z = ii[WS(rs, 3)];
1216
                              Tcl = T3 * T7z;
1217
                              T7A = FMA(T6, T7z, T7y);
1218
                              Tcm = FNMS(T6, T7x, Tcl);
1219
                         }
1220
                         {
1221
                              E T7O, T7P, T7S, Tch;
1222
                              T7O = ri[WS(rs, 51)];
1223
                              T7P = T7N * T7O;
1224
                              T7S = ii[WS(rs, 51)];
1225
                              Tch = T7N * T7S;
1226
                              T7T = FMA(T7R, T7S, T7P);
1227
                              Tci = FNMS(T7R, T7O, Tch);
1228
                         }
1229
                         {
1230
                              E T7C, T7D, T7F, Tcn;
1231
                              T7C = ri[WS(rs, 35)];
1232
                              T7D = T7B * T7C;
1233
                              T7F = ii[WS(rs, 35)];
1234
                              Tcn = T7B * T7F;
1235
                              T7G = FMA(T7E, T7F, T7D);
1236
                              Tco = FNMS(T7E, T7C, Tcn);
1237
                         }
1238
                         {
1239
                              E T7I, T7J, T7K, Tcf;
1240
                              T7I = ri[WS(rs, 19)];
1241
                              T7J = T2u * T7I;
1242
                              T7K = ii[WS(rs, 19)];
1243
                              Tcf = T2u * T7K;
1244
                              T7L = FMA(T2x, T7K, T7J);
1245
                              Tcg = FNMS(T2x, T7I, Tcf);
1246
                         }
1247
                         {
1248
                              E T7H, T7U, Thi, Thf, Thg, Thh;
1249
                              T7H = T7A + T7G;
1250
                              T7U = T7L + T7T;
1251
                              Thi = T7H - T7U;
1252
                              Thf = Tcm + Tco;
1253
                              Thg = Tcg + Tci;
1254
                              Thh = Thf - Thg;
1255
                              T7V = T7H + T7U;
1256
                              Tjg = Thf + Thg;
1257
                              Thj = Thh - Thi;
1258
                              Thw = Thi + Thh;
1259
                         }
1260
                         {
1261
                              E Tck, TeI, Tcr, TeH;
1262
                              {
1263
                                   E Tce, Tcj, Tcp, Tcq;
1264
                                   Tce = T7A - T7G;
1265
                                   Tcj = Tcg - Tci;
1266
                                   Tck = Tce - Tcj;
1267
                                   TeI = Tce + Tcj;
1268
                                   Tcp = Tcm - Tco;
1269
                                   Tcq = T7L - T7T;
1270
                                   Tcr = Tcp + Tcq;
1271
                                   TeH = Tcp - Tcq;
1272
                              }
1273
                              Tcs = FNMS(KP414213562, Tcr, Tck);
1274
                              TeS = FMA(KP414213562, TeH, TeI);
1275
                              TcG = FMA(KP414213562, Tck, Tcr);
1276
                              TeJ = FNMS(KP414213562, TeI, TeH);
1277
                         }
1278
                    }
1279
                    {
1280
                         E T83, Tc7, T8k, Tc3, T87, Tc9, T8c, Tc1;
1281
                         {
1282
                              E T7Y, T7Z, T82, Tc6;
1283
                              T7Y = ri[WS(rs, 59)];
1284
                              T7Z = T7X * T7Y;
1285
                              T82 = ii[WS(rs, 59)];
1286
                              Tc6 = T7X * T82;
1287
                              T83 = FMA(T81, T82, T7Z);
1288
                              Tc7 = FNMS(T81, T7Y, Tc6);
1289
                         }
1290
                         {
1291
                              E T8f, T8g, T8j, Tc2;
1292
                              T8f = ri[WS(rs, 43)];
1293
                              T8g = T8e * T8f;
1294
                              T8j = ii[WS(rs, 43)];
1295
                              Tc2 = T8e * T8j;
1296
                              T8k = FMA(T8i, T8j, T8g);
1297
                              Tc3 = FNMS(T8i, T8f, Tc2);
1298
                         }
1299
                         {
1300
                              E T84, T85, T86, Tc8;
1301
                              T84 = ri[WS(rs, 27)];
1302
                              T85 = Te * T84;
1303
                              T86 = ii[WS(rs, 27)];
1304
                              Tc8 = Te * T86;
1305
                              T87 = FMA(Ti, T86, T85);
1306
                              Tc9 = FNMS(Ti, T84, Tc8);
1307
                         }
1308
                         {
1309
                              E T89, T8a, T8b, Tc0;
1310
                              T89 = ri[WS(rs, 11)];
1311
                              T8a = Tu * T89;
1312
                              T8b = ii[WS(rs, 11)];
1313
                              Tc0 = Tu * T8b;
1314
                              T8c = FMA(Tx, T8b, T8a);
1315
                              Tc1 = FNMS(Tx, T89, Tc0);
1316
                         }
1317
                         {
1318
                              E T88, T8l, Thk, Thl, Thm, Thn;
1319
                              T88 = T83 + T87;
1320
                              T8l = T8c + T8k;
1321
                              Thk = T88 - T8l;
1322
                              Thl = Tc7 + Tc9;
1323
                              Thm = Tc1 + Tc3;
1324
                              Thn = Thl - Thm;
1325
                              T8m = T88 + T8l;
1326
                              Tjh = Thl + Thm;
1327
                              Tho = Thk + Thn;
1328
                              Thx = Thk - Thn;
1329
                         }
1330
                         {
1331
                              E Tc5, TeL, Tcc, TeK;
1332
                              {
1333
                                   E TbZ, Tc4, Tca, Tcb;
1334
                                   TbZ = T83 - T87;
1335
                                   Tc4 = Tc1 - Tc3;
1336
                                   Tc5 = TbZ - Tc4;
1337
                                   TeL = TbZ + Tc4;
1338
                                   Tca = Tc7 - Tc9;
1339
                                   Tcb = T8c - T8k;
1340
                                   Tcc = Tca + Tcb;
1341
                                   TeK = Tca - Tcb;
1342
                              }
1343
                              Tcd = FMA(KP414213562, Tcc, Tc5);
1344
                              TeT = FNMS(KP414213562, TeK, TeL);
1345
                              TcH = FNMS(KP414213562, Tc5, Tcc);
1346
                              TeM = FMA(KP414213562, TeL, TeK);
1347
                         }
1348
                    }
1349
                    {
1350
                         E T2I, TjG, T4N, Tkj, Tkf, Tkk, TjJ, Tk5, T8o, Tk2, TjU, TjY, T6D, Tk1, TjP;
1351
                         E TjX;
1352
                         {
1353
                              E T1C, T2H, TjH, TjI;
1354
                              T1C = TY + T1B;
1355
                              T2H = T27 + T2G;
1356
                              T2I = T1C + T2H;
1357
                              TjG = T1C - T2H;
1358
                              {
1359
                                   E T3L, T4M, Tk6, Tke;
1360
                                   T3L = T39 + T3K;
1361
                                   T4M = T4k + T4L;
1362
                                   T4N = T3L + T4M;
1363
                                   Tkj = T4M - T3L;
1364
                                   Tk6 = TiJ + TiK;
1365
                                   Tke = Tk7 + Tkd;
1366
                                   Tkf = Tk6 + Tke;
1367
                                   Tkk = Tke - Tk6;
1368
                              }
1369
                              TjH = TiN + TiO;
1370
                              TjI = TiT + TiU;
1371
                              TjJ = TjH - TjI;
1372
                              Tk5 = TjH + TjI;
1373
                              {
1374
                                   E T7w, T8n, TjQ, TjR, TjS, TjT;
1375
                                   T7w = T74 + T7v;
1376
                                   T8n = T7V + T8m;
1377
                                   TjQ = T7w - T8n;
1378
                                   TjR = Tja + Tjb;
1379
                                   TjS = Tjg + Tjh;
1380
                                   TjT = TjR - TjS;
1381
                                   T8o = T7w + T8n;
1382
                                   Tk2 = TjR + TjS;
1383
                                   TjU = TjQ - TjT;
1384
                                   TjY = TjQ + TjT;
1385
                              }
1386
                              {
1387
                                   E T5J, T6C, TjL, TjM, TjN, TjO;
1388
                                   T5J = T5d + T5I;
1389
                                   T6C = T68 + T6B;
1390
                                   TjL = T5J - T6C;
1391
                                   TjM = TiZ + Tj0;
1392
                                   TjN = Tj5 + Tj6;
1393
                                   TjO = TjM - TjN;
1394
                                   T6D = T5J + T6C;
1395
                                   Tk1 = TjM + TjN;
1396
                                   TjP = TjL + TjO;
1397
                                   TjX = TjO - TjL;
1398
                              }
1399
                         }
1400
                         {
1401
                              E T4O, T8p, Tk4, Tkg;
1402
                              T4O = T2I + T4N;
1403
                              T8p = T6D + T8o;
1404
                              ri[WS(rs, 32)] = T4O - T8p;
1405
                              ri[0] = T4O + T8p;
1406
                              Tk4 = Tk1 + Tk2;
1407
                              Tkg = Tk5 + Tkf;
1408
                              ii[0] = Tk4 + Tkg;
1409
                              ii[WS(rs, 32)] = Tkg - Tk4;
1410
                         }
1411
                         {
1412
                              E TjK, TjV, Tkl, Tkm;
1413
                              TjK = TjG + TjJ;
1414
                              TjV = TjP + TjU;
1415
                              ri[WS(rs, 40)] = FNMS(KP707106781, TjV, TjK);
1416
                              ri[WS(rs, 8)] = FMA(KP707106781, TjV, TjK);
1417
                              Tkl = Tkj + Tkk;
1418
                              Tkm = TjX + TjY;
1419
                              ii[WS(rs, 8)] = FMA(KP707106781, Tkm, Tkl);
1420
                              ii[WS(rs, 40)] = FNMS(KP707106781, Tkm, Tkl);
1421
                         }
1422
                         {
1423
                              E TjW, TjZ, Tkn, Tko;
1424
                              TjW = TjG - TjJ;
1425
                              TjZ = TjX - TjY;
1426
                              ri[WS(rs, 56)] = FNMS(KP707106781, TjZ, TjW);
1427
                              ri[WS(rs, 24)] = FMA(KP707106781, TjZ, TjW);
1428
                              Tkn = Tkk - Tkj;
1429
                              Tko = TjU - TjP;
1430
                              ii[WS(rs, 24)] = FMA(KP707106781, Tko, Tkn);
1431
                              ii[WS(rs, 56)] = FNMS(KP707106781, Tko, Tkn);
1432
                         }
1433
                         {
1434
                              E Tk0, Tk3, Tkh, Tki;
1435
                              Tk0 = T2I - T4N;
1436
                              Tk3 = Tk1 - Tk2;
1437
                              ri[WS(rs, 48)] = Tk0 - Tk3;
1438
                              ri[WS(rs, 16)] = Tk0 + Tk3;
1439
                              Tkh = T8o - T6D;
1440
                              Tki = Tkf - Tk5;
1441
                              ii[WS(rs, 16)] = Tkh + Tki;
1442
                              ii[WS(rs, 48)] = Tki - Tkh;
1443
                         }
1444
                    }
1445
                    {
1446
                         E TiM, Tjq, Tkr, Tkx, TiX, Tky, Tjt, Tks, Tj9, TjD, Tjn, Tjx, Tjk, TjE, Tjo;
1447
                         E TjA;
1448
                         {
1449
                              E TiI, TiL, Tkp, Tkq;
1450
                              TiI = TY - T1B;
1451
                              TiL = TiJ - TiK;
1452
                              TiM = TiI - TiL;
1453
                              Tjq = TiI + TiL;
1454
                              Tkp = T2G - T27;
1455
                              Tkq = Tkd - Tk7;
1456
                              Tkr = Tkp + Tkq;
1457
                              Tkx = Tkq - Tkp;
1458
                         }
1459
                         {
1460
                              E TiR, Tjr, TiW, Tjs;
1461
                              {
1462
                                   E TiP, TiQ, TiS, TiV;
1463
                                   TiP = TiN - TiO;
1464
                                   TiQ = T39 - T3K;
1465
                                   TiR = TiP - TiQ;
1466
                                   Tjr = TiQ + TiP;
1467
                                   TiS = T4k - T4L;
1468
                                   TiV = TiT - TiU;
1469
                                   TiW = TiS + TiV;
1470
                                   Tjs = TiS - TiV;
1471
                              }
1472
                              TiX = TiR - TiW;
1473
                              Tky = Tjs - Tjr;
1474
                              Tjt = Tjr + Tjs;
1475
                              Tks = TiR + TiW;
1476
                         }
1477
                         {
1478
                              E Tj3, Tjw, Tj8, Tjv;
1479
                              {
1480
                                   E Tj1, Tj2, Tj4, Tj7;
1481
                                   Tj1 = TiZ - Tj0;
1482
                                   Tj2 = T6B - T68;
1483
                                   Tj3 = Tj1 - Tj2;
1484
                                   Tjw = Tj1 + Tj2;
1485
                                   Tj4 = T5d - T5I;
1486
                                   Tj7 = Tj5 - Tj6;
1487
                                   Tj8 = Tj4 - Tj7;
1488
                                   Tjv = Tj4 + Tj7;
1489
                              }
1490
                              Tj9 = FMA(KP414213562, Tj8, Tj3);
1491
                              TjD = FNMS(KP414213562, Tjv, Tjw);
1492
                              Tjn = FNMS(KP414213562, Tj3, Tj8);
1493
                              Tjx = FMA(KP414213562, Tjw, Tjv);
1494
                         }
1495
                         {
1496
                              E Tje, Tjz, Tjj, Tjy;
1497
                              {
1498
                                   E Tjc, Tjd, Tjf, Tji;
1499
                                   Tjc = Tja - Tjb;
1500
                                   Tjd = T8m - T7V;
1501
                                   Tje = Tjc - Tjd;
1502
                                   Tjz = Tjc + Tjd;
1503
                                   Tjf = T74 - T7v;
1504
                                   Tji = Tjg - Tjh;
1505
                                   Tjj = Tjf - Tji;
1506
                                   Tjy = Tjf + Tji;
1507
                              }
1508
                              Tjk = FNMS(KP414213562, Tjj, Tje);
1509
                              TjE = FMA(KP414213562, Tjy, Tjz);
1510
                              Tjo = FMA(KP414213562, Tje, Tjj);
1511
                              TjA = FNMS(KP414213562, Tjz, Tjy);
1512
                         }
1513
                         {
1514
                              E TiY, Tjl, Tkz, TkA;
1515
                              TiY = FMA(KP707106781, TiX, TiM);
1516
                              Tjl = Tj9 - Tjk;
1517
                              ri[WS(rs, 44)] = FNMS(KP923879532, Tjl, TiY);
1518
                              ri[WS(rs, 12)] = FMA(KP923879532, Tjl, TiY);
1519
                              Tkz = FMA(KP707106781, Tky, Tkx);
1520
                              TkA = Tjo - Tjn;
1521
                              ii[WS(rs, 12)] = FMA(KP923879532, TkA, Tkz);
1522
                              ii[WS(rs, 44)] = FNMS(KP923879532, TkA, Tkz);
1523
                         }
1524
                         {
1525
                              E Tjm, Tjp, TkB, TkC;
1526
                              Tjm = FNMS(KP707106781, TiX, TiM);
1527
                              Tjp = Tjn + Tjo;
1528
                              ri[WS(rs, 28)] = FNMS(KP923879532, Tjp, Tjm);
1529
                              ri[WS(rs, 60)] = FMA(KP923879532, Tjp, Tjm);
1530
                              TkB = FNMS(KP707106781, Tky, Tkx);
1531
                              TkC = Tj9 + Tjk;
1532
                              ii[WS(rs, 28)] = FNMS(KP923879532, TkC, TkB);
1533
                              ii[WS(rs, 60)] = FMA(KP923879532, TkC, TkB);
1534
                         }
1535
                         {
1536
                              E Tju, TjB, Tkt, Tku;
1537
                              Tju = FMA(KP707106781, Tjt, Tjq);
1538
                              TjB = Tjx + TjA;
1539
                              ri[WS(rs, 36)] = FNMS(KP923879532, TjB, Tju);
1540
                              ri[WS(rs, 4)] = FMA(KP923879532, TjB, Tju);
1541
                              Tkt = FMA(KP707106781, Tks, Tkr);
1542
                              Tku = TjD + TjE;
1543
                              ii[WS(rs, 4)] = FMA(KP923879532, Tku, Tkt);
1544
                              ii[WS(rs, 36)] = FNMS(KP923879532, Tku, Tkt);
1545
                         }
1546
                         {
1547
                              E TjC, TjF, Tkv, Tkw;
1548
                              TjC = FNMS(KP707106781, Tjt, Tjq);
1549
                              TjF = TjD - TjE;
1550
                              ri[WS(rs, 52)] = FNMS(KP923879532, TjF, TjC);
1551
                              ri[WS(rs, 20)] = FMA(KP923879532, TjF, TjC);
1552
                              Tkv = FNMS(KP707106781, Tks, Tkr);
1553
                              Tkw = TjA - Tjx;
1554
                              ii[WS(rs, 20)] = FMA(KP923879532, Tkw, Tkv);
1555
                              ii[WS(rs, 52)] = FNMS(KP923879532, Tkw, Tkv);
1556
                         }
1557
                    }
1558
                    {
1559
                         E Tgk, Tl1, ThG, TkV, Ti0, TkN, Tis, TkH, TgH, TkO, ThJ, TkI, Tim, TiG, Tiq;
1560
                         E TiC, Th9, ThT, ThD, ThN, Ti7, Tl2, Tiv, TkW, Tif, TiF, Tip, Tiz, ThA, ThU;
1561
                         E ThE, ThQ;
1562
                         {
1563
                              E Tg8, TkT, Tgj, TkU, Tgd, Tgi;
1564
                              Tg8 = Tg4 + Tg7;
1565
                              TkT = TkE - TkD;
1566
                              Tgd = Tg9 + Tgc;
1567
                              Tgi = Tge - Tgh;
1568
                              Tgj = Tgd + Tgi;
1569
                              TkU = Tgi - Tgd;
1570
                              Tgk = FNMS(KP707106781, Tgj, Tg8);
1571
                              Tl1 = FNMS(KP707106781, TkU, TkT);
1572
                              ThG = FMA(KP707106781, Tgj, Tg8);
1573
                              TkV = FMA(KP707106781, TkU, TkT);
1574
                         }
1575
                         {
1576
                              E ThW, TkF, ThZ, TkG, ThX, ThY;
1577
                              ThW = Tg4 - Tg7;
1578
                              TkF = TkD + TkE;
1579
                              ThX = Tgc - Tg9;
1580
                              ThY = Tge + Tgh;
1581
                              ThZ = ThX - ThY;
1582
                              TkG = ThX + ThY;
1583
                              Ti0 = FMA(KP707106781, ThZ, ThW);
1584
                              TkN = FNMS(KP707106781, TkG, TkF);
1585
                              Tis = FNMS(KP707106781, ThZ, ThW);
1586
                              TkH = FMA(KP707106781, TkG, TkF);
1587
                         }
1588
                         {
1589
                              E Tgv, ThH, TgG, ThI;
1590
                              {
1591
                                   E Tgp, Tgu, TgA, TgF;
1592
                                   Tgp = Tgn + Tgo;
1593
                                   Tgu = Tgq + Tgt;
1594
                                   Tgv = FNMS(KP414213562, Tgu, Tgp);
1595
                                   ThH = FMA(KP414213562, Tgp, Tgu);
1596
                                   TgA = Tgy + Tgz;
1597
                                   TgF = TgB + TgE;
1598
                                   TgG = FMA(KP414213562, TgF, TgA);
1599
                                   ThI = FNMS(KP414213562, TgA, TgF);
1600
                              }
1601
                              TgH = Tgv - TgG;
1602
                              TkO = ThI - ThH;
1603
                              ThJ = ThH + ThI;
1604
                              TkI = Tgv + TgG;
1605
                         }
1606
                         {
1607
                              E Tii, TiB, Til, TiA;
1608
                              {
1609
                                   E Tig, Tih, Tij, Tik;
1610
                                   Tig = Thr - Thu;
1611
                                   Tih = Tho - Thj;
1612
                                   Tii = FNMS(KP707106781, Tih, Tig);
1613
                                   TiB = FMA(KP707106781, Tih, Tig);
1614
                                   Tij = Thc - Thd;
1615
                                   Tik = Thw - Thx;
1616
                                   Til = FNMS(KP707106781, Tik, Tij);
1617
                                   TiA = FMA(KP707106781, Tik, Tij);
1618
                              }
1619
                              Tim = FNMS(KP668178637, Til, Tii);
1620
                              TiG = FMA(KP198912367, TiA, TiB);
1621
                              Tiq = FMA(KP668178637, Tii, Til);
1622
                              TiC = FNMS(KP198912367, TiB, TiA);
1623
                         }
1624
                         {
1625
                              E TgZ, ThM, Th8, ThL;
1626
                              {
1627
                                   E TgN, TgY, Th4, Th7;
1628
                                   TgN = TgL + TgM;
1629
                                   TgY = TgS + TgX;
1630
                                   TgZ = FNMS(KP707106781, TgY, TgN);
1631
                                   ThM = FMA(KP707106781, TgY, TgN);
1632
                                   Th4 = Th0 + Th3;
1633
                                   Th7 = Th5 + Th6;
1634
                                   Th8 = FNMS(KP707106781, Th7, Th4);
1635
                                   ThL = FMA(KP707106781, Th7, Th4);
1636
                              }
1637
                              Th9 = FMA(KP668178637, Th8, TgZ);
1638
                              ThT = FNMS(KP198912367, ThL, ThM);
1639
                              ThD = FNMS(KP668178637, TgZ, Th8);
1640
                              ThN = FMA(KP198912367, ThM, ThL);
1641
                         }
1642
                         {
1643
                              E Ti3, Tit, Ti6, Tiu;
1644
                              {
1645
                                   E Ti1, Ti2, Ti4, Ti5;
1646
                                   Ti1 = Tgn - Tgo;
1647
                                   Ti2 = Tgq - Tgt;
1648
                                   Ti3 = FMA(KP414213562, Ti2, Ti1);
1649
                                   Tit = FNMS(KP414213562, Ti1, Ti2);
1650
                                   Ti4 = Tgy - Tgz;
1651
                                   Ti5 = TgB - TgE;
1652
                                   Ti6 = FNMS(KP414213562, Ti5, Ti4);
1653
                                   Tiu = FMA(KP414213562, Ti4, Ti5);
1654
                              }
1655
                              Ti7 = Ti3 - Ti6;
1656
                              Tl2 = Ti3 + Ti6;
1657
                              Tiv = Tit + Tiu;
1658
                              TkW = Tiu - Tit;
1659
                         }
1660
                         {
1661
                              E Tib, Tiy, Tie, Tix;
1662
                              {
1663
                                   E Ti9, Tia, Tic, Tid;
1664
                                   Ti9 = Th0 - Th3;
1665
                                   Tia = TgX - TgS;
1666
                                   Tib = FNMS(KP707106781, Tia, Ti9);
1667
                                   Tiy = FMA(KP707106781, Tia, Ti9);
1668
                                   Tic = TgL - TgM;
1669
                                   Tid = Th5 - Th6;
1670
                                   Tie = FNMS(KP707106781, Tid, Tic);
1671
                                   Tix = FMA(KP707106781, Tid, Tic);
1672
                              }
1673
                              Tif = FMA(KP668178637, Tie, Tib);
1674
                              TiF = FNMS(KP198912367, Tix, Tiy);
1675
                              Tip = FNMS(KP668178637, Tib, Tie);
1676
                              Tiz = FMA(KP198912367, Tiy, Tix);
1677
                         }
1678
                         {
1679
                              E Thq, ThP, Thz, ThO;
1680
                              {
1681
                                   E The, Thp, Thv, Thy;
1682
                                   The = Thc + Thd;
1683
                                   Thp = Thj + Tho;
1684
                                   Thq = FNMS(KP707106781, Thp, The);
1685
                                   ThP = FMA(KP707106781, Thp, The);
1686
                                   Thv = Thr + Thu;
1687
                                   Thy = Thw + Thx;
1688
                                   Thz = FNMS(KP707106781, Thy, Thv);
1689
                                   ThO = FMA(KP707106781, Thy, Thv);
1690
                              }
1691
                              ThA = FNMS(KP668178637, Thz, Thq);
1692
                              ThU = FMA(KP198912367, ThO, ThP);
1693
                              ThE = FMA(KP668178637, Thq, Thz);
1694
                              ThQ = FNMS(KP198912367, ThP, ThO);
1695
                         }
1696
                         {
1697
                              E TgI, ThB, TkP, TkQ;
1698
                              TgI = FMA(KP923879532, TgH, Tgk);
1699
                              ThB = Th9 - ThA;
1700
                              ri[WS(rs, 42)] = FNMS(KP831469612, ThB, TgI);
1701
                              ri[WS(rs, 10)] = FMA(KP831469612, ThB, TgI);
1702
                              TkP = FMA(KP923879532, TkO, TkN);
1703
                              TkQ = ThE - ThD;
1704
                              ii[WS(rs, 10)] = FMA(KP831469612, TkQ, TkP);
1705
                              ii[WS(rs, 42)] = FNMS(KP831469612, TkQ, TkP);
1706
                         }
1707
                         {
1708
                              E ThC, ThF, TkR, TkS;
1709
                              ThC = FNMS(KP923879532, TgH, Tgk);
1710
                              ThF = ThD + ThE;
1711
                              ri[WS(rs, 26)] = FNMS(KP831469612, ThF, ThC);
1712
                              ri[WS(rs, 58)] = FMA(KP831469612, ThF, ThC);
1713
                              TkR = FNMS(KP923879532, TkO, TkN);
1714
                              TkS = Th9 + ThA;
1715
                              ii[WS(rs, 26)] = FNMS(KP831469612, TkS, TkR);
1716
                              ii[WS(rs, 58)] = FMA(KP831469612, TkS, TkR);
1717
                         }
1718
                         {
1719
                              E ThK, ThR, TkJ, TkK;
1720
                              ThK = FMA(KP923879532, ThJ, ThG);
1721
                              ThR = ThN + ThQ;
1722
                              ri[WS(rs, 34)] = FNMS(KP980785280, ThR, ThK);
1723
                              ri[WS(rs, 2)] = FMA(KP980785280, ThR, ThK);
1724
                              TkJ = FMA(KP923879532, TkI, TkH);
1725
                              TkK = ThT + ThU;
1726
                              ii[WS(rs, 2)] = FMA(KP980785280, TkK, TkJ);
1727
                              ii[WS(rs, 34)] = FNMS(KP980785280, TkK, TkJ);
1728
                         }
1729
                         {
1730
                              E ThS, ThV, TkL, TkM;
1731
                              ThS = FNMS(KP923879532, ThJ, ThG);
1732
                              ThV = ThT - ThU;
1733
                              ri[WS(rs, 50)] = FNMS(KP980785280, ThV, ThS);
1734
                              ri[WS(rs, 18)] = FMA(KP980785280, ThV, ThS);
1735
                              TkL = FNMS(KP923879532, TkI, TkH);
1736
                              TkM = ThQ - ThN;
1737
                              ii[WS(rs, 18)] = FMA(KP980785280, TkM, TkL);
1738
                              ii[WS(rs, 50)] = FNMS(KP980785280, TkM, TkL);
1739
                         }
1740
                         {
1741
                              E Ti8, Tin, TkX, TkY;
1742
                              Ti8 = FMA(KP923879532, Ti7, Ti0);
1743
                              Tin = Tif + Tim;
1744
                              ri[WS(rs, 38)] = FNMS(KP831469612, Tin, Ti8);
1745
                              ri[WS(rs, 6)] = FMA(KP831469612, Tin, Ti8);
1746
                              TkX = FMA(KP923879532, TkW, TkV);
1747
                              TkY = Tip + Tiq;
1748
                              ii[WS(rs, 6)] = FMA(KP831469612, TkY, TkX);
1749
                              ii[WS(rs, 38)] = FNMS(KP831469612, TkY, TkX);
1750
                         }
1751
                         {
1752
                              E Tio, Tir, TkZ, Tl0;
1753
                              Tio = FNMS(KP923879532, Ti7, Ti0);
1754
                              Tir = Tip - Tiq;
1755
                              ri[WS(rs, 54)] = FNMS(KP831469612, Tir, Tio);
1756
                              ri[WS(rs, 22)] = FMA(KP831469612, Tir, Tio);
1757
                              TkZ = FNMS(KP923879532, TkW, TkV);
1758
                              Tl0 = Tim - Tif;
1759
                              ii[WS(rs, 22)] = FMA(KP831469612, Tl0, TkZ);
1760
                              ii[WS(rs, 54)] = FNMS(KP831469612, Tl0, TkZ);
1761
                         }
1762
                         {
1763
                              E Tiw, TiD, Tl3, Tl4;
1764
                              Tiw = FNMS(KP923879532, Tiv, Tis);
1765
                              TiD = Tiz - TiC;
1766
                              ri[WS(rs, 46)] = FNMS(KP980785280, TiD, Tiw);
1767
                              ri[WS(rs, 14)] = FMA(KP980785280, TiD, Tiw);
1768
                              Tl3 = FNMS(KP923879532, Tl2, Tl1);
1769
                              Tl4 = TiG - TiF;
1770
                              ii[WS(rs, 14)] = FMA(KP980785280, Tl4, Tl3);
1771
                              ii[WS(rs, 46)] = FNMS(KP980785280, Tl4, Tl3);
1772
                         }
1773
                         {
1774
                              E TiE, TiH, Tl5, Tl6;
1775
                              TiE = FMA(KP923879532, Tiv, Tis);
1776
                              TiH = TiF + TiG;
1777
                              ri[WS(rs, 30)] = FNMS(KP980785280, TiH, TiE);
1778
                              ri[WS(rs, 62)] = FMA(KP980785280, TiH, TiE);
1779
                              Tl5 = FMA(KP923879532, Tl2, Tl1);
1780
                              Tl6 = Tiz + TiC;
1781
                              ii[WS(rs, 30)] = FNMS(KP980785280, Tl6, Tl5);
1782
                              ii[WS(rs, 62)] = FMA(KP980785280, Tl6, Tl5);
1783
                         }
1784
                    }
1785
                    {
1786
                         E Tar, TlO, TcT, TlI, TbB, Td3, TcN, TcX, Tdw, TdQ, TdA, TdM, Tdp, TdP, Tdz;
1787
                         E TdJ, Tdh, Tm2, TdF, TlW, TcK, Td4, TcO, Td0, T9i, TlV, Tm1, TcQ, Tda, TlH;
1788
                         E TlN, TdC;
1789
                         {
1790
                              E T9R, TcR, Taq, TcS;
1791
                              {
1792
                                   E T9F, T9Q, Tae, Tap;
1793
                                   T9F = FNMS(KP707106781, T9E, T9p);
1794
                                   T9Q = FNMS(KP707106781, T9P, T9M);
1795
                                   T9R = FNMS(KP668178637, T9Q, T9F);
1796
                                   TcR = FMA(KP668178637, T9F, T9Q);
1797
                                   Tae = FNMS(KP707106781, Tad, T9Y);
1798
                                   Tap = FNMS(KP707106781, Tao, Tal);
1799
                                   Taq = FMA(KP668178637, Tap, Tae);
1800
                                   TcS = FNMS(KP668178637, Tae, Tap);
1801
                              }
1802
                              Tar = T9R - Taq;
1803
                              TlO = TcS - TcR;
1804
                              TcT = TcR + TcS;
1805
                              TlI = T9R + Taq;
1806
                         }
1807
                         {
1808
                              E Tbl, TcW, TbA, TcV;
1809
                              {
1810
                                   E TaP, Tbk, Tbw, Tbz;
1811
                                   TaP = FNMS(KP707106781, TaO, Taz);
1812
                                   Tbk = Tb4 - Tbj;
1813
                                   Tbl = FNMS(KP923879532, Tbk, TaP);
1814
                                   TcW = FMA(KP923879532, Tbk, TaP);
1815
                                   Tbw = FNMS(KP707106781, Tbv, Tbs);
1816
                                   Tbz = Tbx - Tby;
1817
                                   TbA = FNMS(KP923879532, Tbz, Tbw);
1818
                                   TcV = FMA(KP923879532, Tbz, Tbw);
1819
                              }
1820
                              TbB = FMA(KP534511135, TbA, Tbl);
1821
                              Td3 = FNMS(KP303346683, TcV, TcW);
1822
                              TcN = FNMS(KP534511135, Tbl, TbA);
1823
                              TcX = FMA(KP303346683, TcW, TcV);
1824
                         }
1825
                         {
1826
                              E Tds, TdL, Tdv, TdK;
1827
                              {
1828
                                   E Tdq, Tdr, Tdt, Tdu;
1829
                                   Tdq = FMA(KP707106781, TcE, TcB);
1830
                                   Tdr = Tcs + Tcd;
1831
                                   Tds = FNMS(KP923879532, Tdr, Tdq);
1832
                                   TdL = FMA(KP923879532, Tdr, Tdq);
1833
                                   Tdt = FMA(KP707106781, TbX, TbI);
1834
                                   Tdu = TcG + TcH;
1835
                                   Tdv = FNMS(KP923879532, Tdu, Tdt);
1836
                                   TdK = FMA(KP923879532, Tdu, Tdt);
1837
                              }
1838
                              Tdw = FNMS(KP820678790, Tdv, Tds);
1839
                              TdQ = FMA(KP098491403, TdK, TdL);
1840
                              TdA = FMA(KP820678790, Tds, Tdv);
1841
                              TdM = FNMS(KP098491403, TdL, TdK);
1842
                         }
1843
                         {
1844
                              E Tdl, TdI, Tdo, TdH;
1845
                              {
1846
                                   E Tdj, Tdk, Tdm, Tdn;
1847
                                   Tdj = FMA(KP707106781, Tbv, Tbs);
1848
                                   Tdk = Tbj + Tb4;
1849
                                   Tdl = FNMS(KP923879532, Tdk, Tdj);
1850
                                   TdI = FMA(KP923879532, Tdk, Tdj);
1851
                                   Tdm = FMA(KP707106781, TaO, Taz);
1852
                                   Tdn = Tbx + Tby;
1853
                                   Tdo = FNMS(KP923879532, Tdn, Tdm);
1854
                                   TdH = FMA(KP923879532, Tdn, Tdm);
1855
                              }
1856
                              Tdp = FMA(KP820678790, Tdo, Tdl);
1857
                              TdP = FNMS(KP098491403, TdH, TdI);
1858
                              Tdz = FNMS(KP820678790, Tdl, Tdo);
1859
                              TdJ = FMA(KP098491403, TdI, TdH);
1860
                         }
1861
                         {
1862
                              E Tdd, TdD, Tdg, TdE;
1863
                              {
1864
                                   E Tdb, Tdc, Tde, Tdf;
1865
                                   Tdb = FMA(KP707106781, T9E, T9p);
1866
                                   Tdc = FMA(KP707106781, T9P, T9M);
1867
                                   Tdd = FMA(KP198912367, Tdc, Tdb);
1868
                                   TdD = FNMS(KP198912367, Tdb, Tdc);
1869
                                   Tde = FMA(KP707106781, Tad, T9Y);
1870
                                   Tdf = FMA(KP707106781, Tao, Tal);
1871
                                   Tdg = FNMS(KP198912367, Tdf, Tde);
1872
                                   TdE = FMA(KP198912367, Tde, Tdf);
1873
                              }
1874
                              Tdh = Tdd - Tdg;
1875
                              Tm2 = Tdd + Tdg;
1876
                              TdF = TdD + TdE;
1877
                              TlW = TdE - TdD;
1878
                         }
1879
                         {
1880
                              E Tcu, TcZ, TcJ, TcY;
1881
                              {
1882
                                   E TbY, Tct, TcF, TcI;
1883
                                   TbY = FNMS(KP707106781, TbX, TbI);
1884
                                   Tct = Tcd - Tcs;
1885
                                   Tcu = FNMS(KP923879532, Tct, TbY);
1886
                                   TcZ = FMA(KP923879532, Tct, TbY);
1887
                                   TcF = FNMS(KP707106781, TcE, TcB);
1888
                                   TcI = TcG - TcH;
1889
                                   TcJ = FNMS(KP923879532, TcI, TcF);
1890
                                   TcY = FMA(KP923879532, TcI, TcF);
1891
                              }
1892
                              TcK = FNMS(KP534511135, TcJ, Tcu);
1893
                              Td4 = FMA(KP303346683, TcY, TcZ);
1894
                              TcO = FMA(KP534511135, Tcu, TcJ);
1895
                              Td0 = FNMS(KP303346683, TcZ, TcY);
1896
                         }
1897
                         {
1898
                              E T8M, Td6, TlF, TlT, T9h, TlU, Td9, TlG, T8L, TlE;
1899
                              T8L = T8D - T8K;
1900
                              T8M = FMA(KP707106781, T8L, T8w);
1901
                              Td6 = FNMS(KP707106781, T8L, T8w);
1902
                              TlE = TdU - TdT;
1903
                              TlF = FMA(KP707106781, TlE, TlD);
1904
                              TlT = FNMS(KP707106781, TlE, TlD);
1905
                              {
1906
                                   E T91, T9g, Td7, Td8;
1907
                                   T91 = FMA(KP414213562, T90, T8T);
1908
                                   T9g = FNMS(KP414213562, T9f, T98);
1909
                                   T9h = T91 - T9g;
1910
                                   TlU = T91 + T9g;
1911
                                   Td7 = FNMS(KP414213562, T8T, T90);
1912
                                   Td8 = FMA(KP414213562, T98, T9f);
1913
                                   Td9 = Td7 + Td8;
1914
                                   TlG = Td8 - Td7;
1915
                              }
1916
                              T9i = FNMS(KP923879532, T9h, T8M);
1917
                              TlV = FNMS(KP923879532, TlU, TlT);
1918
                              Tm1 = FMA(KP923879532, TlU, TlT);
1919
                              TcQ = FMA(KP923879532, T9h, T8M);
1920
                              Tda = FNMS(KP923879532, Td9, Td6);
1921
                              TlH = FMA(KP923879532, TlG, TlF);
1922
                              TlN = FNMS(KP923879532, TlG, TlF);
1923
                              TdC = FMA(KP923879532, Td9, Td6);
1924
                         }
1925
                         {
1926
                              E Tas, TcL, TlP, TlQ;
1927
                              Tas = FMA(KP831469612, Tar, T9i);
1928
                              TcL = TbB - TcK;
1929
                              ri[WS(rs, 43)] = FNMS(KP881921264, TcL, Tas);
1930
                              ri[WS(rs, 11)] = FMA(KP881921264, TcL, Tas);
1931
                              TlP = FMA(KP831469612, TlO, TlN);
1932
                              TlQ = TcO - TcN;
1933
                              ii[WS(rs, 11)] = FMA(KP881921264, TlQ, TlP);
1934
                              ii[WS(rs, 43)] = FNMS(KP881921264, TlQ, TlP);
1935
                         }
1936
                         {
1937
                              E TcM, TcP, TlR, TlS;
1938
                              TcM = FNMS(KP831469612, Tar, T9i);
1939
                              TcP = TcN + TcO;
1940
                              ri[WS(rs, 27)] = FNMS(KP881921264, TcP, TcM);
1941
                              ri[WS(rs, 59)] = FMA(KP881921264, TcP, TcM);
1942
                              TlR = FNMS(KP831469612, TlO, TlN);
1943
                              TlS = TbB + TcK;
1944
                              ii[WS(rs, 27)] = FNMS(KP881921264, TlS, TlR);
1945
                              ii[WS(rs, 59)] = FMA(KP881921264, TlS, TlR);
1946
                         }
1947
                         {
1948
                              E TcU, Td1, TlJ, TlK;
1949
                              TcU = FMA(KP831469612, TcT, TcQ);
1950
                              Td1 = TcX + Td0;
1951
                              ri[WS(rs, 35)] = FNMS(KP956940335, Td1, TcU);
1952
                              ri[WS(rs, 3)] = FMA(KP956940335, Td1, TcU);
1953
                              TlJ = FMA(KP831469612, TlI, TlH);
1954
                              TlK = Td3 + Td4;
1955
                              ii[WS(rs, 3)] = FMA(KP956940335, TlK, TlJ);
1956
                              ii[WS(rs, 35)] = FNMS(KP956940335, TlK, TlJ);
1957
                         }
1958
                         {
1959
                              E Td2, Td5, TlL, TlM;
1960
                              Td2 = FNMS(KP831469612, TcT, TcQ);
1961
                              Td5 = Td3 - Td4;
1962
                              ri[WS(rs, 51)] = FNMS(KP956940335, Td5, Td2);
1963
                              ri[WS(rs, 19)] = FMA(KP956940335, Td5, Td2);
1964
                              TlL = FNMS(KP831469612, TlI, TlH);
1965
                              TlM = Td0 - TcX;
1966
                              ii[WS(rs, 19)] = FMA(KP956940335, TlM, TlL);
1967
                              ii[WS(rs, 51)] = FNMS(KP956940335, TlM, TlL);
1968
                         }
1969
                         {
1970
                              E Tdi, Tdx, TlX, TlY;
1971
                              Tdi = FMA(KP980785280, Tdh, Tda);
1972
                              Tdx = Tdp + Tdw;
1973
                              ri[WS(rs, 39)] = FNMS(KP773010453, Tdx, Tdi);
1974
                              ri[WS(rs, 7)] = FMA(KP773010453, Tdx, Tdi);
1975
                              TlX = FMA(KP980785280, TlW, TlV);
1976
                              TlY = Tdz + TdA;
1977
                              ii[WS(rs, 7)] = FMA(KP773010453, TlY, TlX);
1978
                              ii[WS(rs, 39)] = FNMS(KP773010453, TlY, TlX);
1979
                         }
1980
                         {
1981
                              E Tdy, TdB, TlZ, Tm0;
1982
                              Tdy = FNMS(KP980785280, Tdh, Tda);
1983
                              TdB = Tdz - TdA;
1984
                              ri[WS(rs, 55)] = FNMS(KP773010453, TdB, Tdy);
1985
                              ri[WS(rs, 23)] = FMA(KP773010453, TdB, Tdy);
1986
                              TlZ = FNMS(KP980785280, TlW, TlV);
1987
                              Tm0 = Tdw - Tdp;
1988
                              ii[WS(rs, 23)] = FMA(KP773010453, Tm0, TlZ);
1989
                              ii[WS(rs, 55)] = FNMS(KP773010453, Tm0, TlZ);
1990
                         }
1991
                         {
1992
                              E TdG, TdN, Tm3, Tm4;
1993
                              TdG = FNMS(KP980785280, TdF, TdC);
1994
                              TdN = TdJ - TdM;
1995
                              ri[WS(rs, 47)] = FNMS(KP995184726, TdN, TdG);
1996
                              ri[WS(rs, 15)] = FMA(KP995184726, TdN, TdG);
1997
                              Tm3 = FNMS(KP980785280, Tm2, Tm1);
1998
                              Tm4 = TdQ - TdP;
1999
                              ii[WS(rs, 15)] = FMA(KP995184726, Tm4, Tm3);
2000
                              ii[WS(rs, 47)] = FNMS(KP995184726, Tm4, Tm3);
2001
                         }
2002
                         {
2003
                              E TdO, TdR, Tm5, Tm6;
2004
                              TdO = FMA(KP980785280, TdF, TdC);
2005
                              TdR = TdP + TdQ;
2006
                              ri[WS(rs, 31)] = FNMS(KP995184726, TdR, TdO);
2007
                              ri[WS(rs, 63)] = FMA(KP995184726, TdR, TdO);
2008
                              Tm5 = FMA(KP980785280, Tm2, Tm1);
2009
                              Tm6 = TdJ + TdM;
2010
                              ii[WS(rs, 31)] = FNMS(KP995184726, Tm6, Tm5);
2011
                              ii[WS(rs, 63)] = FMA(KP995184726, Tm6, Tm5);
2012
                         }
2013
                    }
2014
                    {
2015
                         E Tej, Tlk, Tf5, Tle, TeD, Tff, TeZ, Tf9, TfI, Tg2, TfM, TfY, TfB, Tg1, TfL;
2016
                         E TfV, Tft, Tly, TfR, Tls, TeW, Tfg, Tf0, Tfc, Te4, Tlr, Tlx, Tf2, Tfm, Tld;
2017
                         E Tlj, TfO;
2018
                         {
2019
                              E Teb, Tf3, Tei, Tf4;
2020
                              {
2021
                                   E Te7, Tea, Tee, Teh;
2022
                                   Te7 = FMA(KP707106781, Te6, Te5);
2023
                                   Tea = FMA(KP707106781, Te9, Te8);
2024
                                   Teb = FNMS(KP198912367, Tea, Te7);
2025
                                   Tf3 = FMA(KP198912367, Te7, Tea);
2026
                                   Tee = FMA(KP707106781, Ted, Tec);
2027
                                   Teh = FMA(KP707106781, Teg, Tef);
2028
                                   Tei = FMA(KP198912367, Teh, Tee);
2029
                                   Tf4 = FNMS(KP198912367, Tee, Teh);
2030
                              }
2031
                              Tej = Teb - Tei;
2032
                              Tlk = Tf4 - Tf3;
2033
                              Tf5 = Tf3 + Tf4;
2034
                              Tle = Teb + Tei;
2035
                         }
2036
                         {
2037
                              E Tev, Tf8, TeC, Tf7;
2038
                              {
2039
                                   E Ten, Teu, Tey, TeB;
2040
                                   Ten = FMA(KP707106781, Tem, Tel);
2041
                                   Teu = Teq + Tet;
2042
                                   Tev = FNMS(KP923879532, Teu, Ten);
2043
                                   Tf8 = FMA(KP923879532, Teu, Ten);
2044
                                   Tey = FMA(KP707106781, Tex, Tew);
2045
                                   TeB = Tez + TeA;
2046
                                   TeC = FNMS(KP923879532, TeB, Tey);
2047
                                   Tf7 = FMA(KP923879532, TeB, Tey);
2048
                              }
2049
                              TeD = FMA(KP820678790, TeC, Tev);
2050
                              Tff = FNMS(KP098491403, Tf7, Tf8);
2051
                              TeZ = FNMS(KP820678790, Tev, TeC);
2052
                              Tf9 = FMA(KP098491403, Tf8, Tf7);
2053
                         }
2054
                         {
2055
                              E TfE, TfX, TfH, TfW;
2056
                              {
2057
                                   E TfC, TfD, TfF, TfG;
2058
                                   TfC = FNMS(KP707106781, TeQ, TeP);
2059
                                   TfD = TeM - TeJ;
2060
                                   TfE = FNMS(KP923879532, TfD, TfC);
2061
                                   TfX = FMA(KP923879532, TfD, TfC);
2062
                                   TfF = FNMS(KP707106781, TeF, TeE);
2063
                                   TfG = TeS - TeT;
2064
                                   TfH = FNMS(KP923879532, TfG, TfF);
2065
                                   TfW = FMA(KP923879532, TfG, TfF);
2066
                              }
2067
                              TfI = FNMS(KP534511135, TfH, TfE);
2068
                              Tg2 = FMA(KP303346683, TfW, TfX);
2069
                              TfM = FMA(KP534511135, TfE, TfH);
2070
                              TfY = FNMS(KP303346683, TfX, TfW);
2071
                         }
2072
                         {
2073
                              E Tfx, TfU, TfA, TfT;
2074
                              {
2075
                                   E Tfv, Tfw, Tfy, Tfz;
2076
                                   Tfv = FNMS(KP707106781, Tex, Tew);
2077
                                   Tfw = Tet - Teq;
2078
                                   Tfx = FNMS(KP923879532, Tfw, Tfv);
2079
                                   TfU = FMA(KP923879532, Tfw, Tfv);
2080
                                   Tfy = FNMS(KP707106781, Tem, Tel);
2081
                                   Tfz = Tez - TeA;
2082
                                   TfA = FNMS(KP923879532, Tfz, Tfy);
2083
                                   TfT = FMA(KP923879532, Tfz, Tfy);
2084
                              }
2085
                              TfB = FMA(KP534511135, TfA, Tfx);
2086
                              Tg1 = FNMS(KP303346683, TfT, TfU);
2087
                              TfL = FNMS(KP534511135, Tfx, TfA);
2088
                              TfV = FMA(KP303346683, TfU, TfT);
2089
                         }
2090
                         {
2091
                              E Tfp, TfP, Tfs, TfQ;
2092
                              {
2093
                                   E Tfn, Tfo, Tfq, Tfr;
2094
                                   Tfn = FNMS(KP707106781, Te6, Te5);
2095
                                   Tfo = FNMS(KP707106781, Te9, Te8);
2096
                                   Tfp = FMA(KP668178637, Tfo, Tfn);
2097
                                   TfP = FNMS(KP668178637, Tfn, Tfo);
2098
                                   Tfq = FNMS(KP707106781, Ted, Tec);
2099
                                   Tfr = FNMS(KP707106781, Teg, Tef);
2100
                                   Tfs = FNMS(KP668178637, Tfr, Tfq);
2101
                                   TfQ = FMA(KP668178637, Tfq, Tfr);
2102
                              }
2103
                              Tft = Tfp - Tfs;
2104
                              Tly = Tfp + Tfs;
2105
                              TfR = TfP + TfQ;
2106
                              Tls = TfQ - TfP;
2107
                         }
2108
                         {
2109
                              E TeO, Tfb, TeV, Tfa;
2110
                              {
2111
                                   E TeG, TeN, TeR, TeU;
2112
                                   TeG = FMA(KP707106781, TeF, TeE);
2113
                                   TeN = TeJ + TeM;
2114
                                   TeO = FNMS(KP923879532, TeN, TeG);
2115
                                   Tfb = FMA(KP923879532, TeN, TeG);
2116
                                   TeR = FMA(KP707106781, TeQ, TeP);
2117
                                   TeU = TeS + TeT;
2118
                                   TeV = FNMS(KP923879532, TeU, TeR);
2119
                                   Tfa = FMA(KP923879532, TeU, TeR);
2120
                              }
2121
                              TeW = FNMS(KP820678790, TeV, TeO);
2122
                              Tfg = FMA(KP098491403, Tfa, Tfb);
2123
                              Tf0 = FMA(KP820678790, TeO, TeV);
2124
                              Tfc = FNMS(KP098491403, Tfb, Tfa);
2125
                         }
2126
                         {
2127
                              E TdW, Tfi, Tlb, Tlp, Te3, Tlq, Tfl, Tlc, TdV, Tla;
2128
                              TdV = TdT + TdU;
2129
                              TdW = FMA(KP707106781, TdV, TdS);
2130
                              Tfi = FNMS(KP707106781, TdV, TdS);
2131
                              Tla = T8D + T8K;
2132
                              Tlb = FMA(KP707106781, Tla, Tl9);
2133
                              Tlp = FNMS(KP707106781, Tla, Tl9);
2134
                              {
2135
                                   E TdZ, Te2, Tfj, Tfk;
2136
                                   TdZ = FMA(KP414213562, TdY, TdX);
2137
                                   Te2 = FNMS(KP414213562, Te1, Te0);
2138
                                   Te3 = TdZ + Te2;
2139
                                   Tlq = Te2 - TdZ;
2140
                                   Tfj = FNMS(KP414213562, TdX, TdY);
2141
                                   Tfk = FMA(KP414213562, Te0, Te1);
2142
                                   Tfl = Tfj - Tfk;
2143
                                   Tlc = Tfj + Tfk;
2144
                              }
2145
                              Te4 = FNMS(KP923879532, Te3, TdW);
2146
                              Tlr = FMA(KP923879532, Tlq, Tlp);
2147
                              Tlx = FNMS(KP923879532, Tlq, Tlp);
2148
                              Tf2 = FMA(KP923879532, Te3, TdW);
2149
                              Tfm = FMA(KP923879532, Tfl, Tfi);
2150
                              Tld = FMA(KP923879532, Tlc, Tlb);
2151
                              Tlj = FNMS(KP923879532, Tlc, Tlb);
2152
                              TfO = FNMS(KP923879532, Tfl, Tfi);
2153
                         }
2154
                         {
2155
                              E Tek, TeX, Tll, Tlm;
2156
                              Tek = FMA(KP980785280, Tej, Te4);
2157
                              TeX = TeD - TeW;
2158
                              ri[WS(rs, 41)] = FNMS(KP773010453, TeX, Tek);
2159
                              ri[WS(rs, 9)] = FMA(KP773010453, TeX, Tek);
2160
                              Tll = FMA(KP980785280, Tlk, Tlj);
2161
                              Tlm = Tf0 - TeZ;
2162
                              ii[WS(rs, 9)] = FMA(KP773010453, Tlm, Tll);
2163
                              ii[WS(rs, 41)] = FNMS(KP773010453, Tlm, Tll);
2164
                         }
2165
                         {
2166
                              E TeY, Tf1, Tln, Tlo;
2167
                              TeY = FNMS(KP980785280, Tej, Te4);
2168
                              Tf1 = TeZ + Tf0;
2169
                              ri[WS(rs, 25)] = FNMS(KP773010453, Tf1, TeY);
2170
                              ri[WS(rs, 57)] = FMA(KP773010453, Tf1, TeY);
2171
                              Tln = FNMS(KP980785280, Tlk, Tlj);
2172
                              Tlo = TeD + TeW;
2173
                              ii[WS(rs, 25)] = FNMS(KP773010453, Tlo, Tln);
2174
                              ii[WS(rs, 57)] = FMA(KP773010453, Tlo, Tln);
2175
                         }
2176
                         {
2177
                              E Tf6, Tfd, Tlf, Tlg;
2178
                              Tf6 = FMA(KP980785280, Tf5, Tf2);
2179
                              Tfd = Tf9 + Tfc;
2180
                              ri[WS(rs, 33)] = FNMS(KP995184726, Tfd, Tf6);
2181
                              ri[WS(rs, 1)] = FMA(KP995184726, Tfd, Tf6);
2182
                              Tlf = FMA(KP980785280, Tle, Tld);
2183
                              Tlg = Tff + Tfg;
2184
                              ii[WS(rs, 1)] = FMA(KP995184726, Tlg, Tlf);
2185
                              ii[WS(rs, 33)] = FNMS(KP995184726, Tlg, Tlf);
2186
                         }
2187
                         {
2188
                              E Tfe, Tfh, Tlh, Tli;
2189
                              Tfe = FNMS(KP980785280, Tf5, Tf2);
2190
                              Tfh = Tff - Tfg;
2191
                              ri[WS(rs, 49)] = FNMS(KP995184726, Tfh, Tfe);
2192
                              ri[WS(rs, 17)] = FMA(KP995184726, Tfh, Tfe);
2193
                              Tlh = FNMS(KP980785280, Tle, Tld);
2194
                              Tli = Tfc - Tf9;
2195
                              ii[WS(rs, 17)] = FMA(KP995184726, Tli, Tlh);
2196
                              ii[WS(rs, 49)] = FNMS(KP995184726, Tli, Tlh);
2197
                         }
2198
                         {
2199
                              E Tfu, TfJ, Tlt, Tlu;
2200
                              Tfu = FMA(KP831469612, Tft, Tfm);
2201
                              TfJ = TfB + TfI;
2202
                              ri[WS(rs, 37)] = FNMS(KP881921264, TfJ, Tfu);
2203
                              ri[WS(rs, 5)] = FMA(KP881921264, TfJ, Tfu);
2204
                              Tlt = FMA(KP831469612, Tls, Tlr);
2205
                              Tlu = TfL + TfM;
2206
                              ii[WS(rs, 5)] = FMA(KP881921264, Tlu, Tlt);
2207
                              ii[WS(rs, 37)] = FNMS(KP881921264, Tlu, Tlt);
2208
                         }
2209
                         {
2210
                              E TfK, TfN, Tlv, Tlw;
2211
                              TfK = FNMS(KP831469612, Tft, Tfm);
2212
                              TfN = TfL - TfM;
2213
                              ri[WS(rs, 53)] = FNMS(KP881921264, TfN, TfK);
2214
                              ri[WS(rs, 21)] = FMA(KP881921264, TfN, TfK);
2215
                              Tlv = FNMS(KP831469612, Tls, Tlr);
2216
                              Tlw = TfI - TfB;
2217
                              ii[WS(rs, 21)] = FMA(KP881921264, Tlw, Tlv);
2218
                              ii[WS(rs, 53)] = FNMS(KP881921264, Tlw, Tlv);
2219
                         }
2220
                         {
2221
                              E TfS, TfZ, Tlz, TlA;
2222
                              TfS = FNMS(KP831469612, TfR, TfO);
2223
                              TfZ = TfV - TfY;
2224
                              ri[WS(rs, 45)] = FNMS(KP956940335, TfZ, TfS);
2225
                              ri[WS(rs, 13)] = FMA(KP956940335, TfZ, TfS);
2226
                              Tlz = FNMS(KP831469612, Tly, Tlx);
2227
                              TlA = Tg2 - Tg1;
2228
                              ii[WS(rs, 13)] = FMA(KP956940335, TlA, Tlz);
2229
                              ii[WS(rs, 45)] = FNMS(KP956940335, TlA, Tlz);
2230
                         }
2231
                         {
2232
                              E Tg0, Tg3, TlB, TlC;
2233
                              Tg0 = FMA(KP831469612, TfR, TfO);
2234
                              Tg3 = Tg1 + Tg2;
2235
                              ri[WS(rs, 29)] = FNMS(KP956940335, Tg3, Tg0);
2236
                              ri[WS(rs, 61)] = FMA(KP956940335, Tg3, Tg0);
2237
                              TlB = FMA(KP831469612, Tly, Tlx);
2238
                              TlC = TfV + TfY;
2239
                              ii[WS(rs, 29)] = FNMS(KP956940335, TlC, TlB);
2240
                              ii[WS(rs, 61)] = FMA(KP956940335, TlC, TlB);
2241
                         }
2242
                    }
2243
               }
2244
          }
2245
     }
2246
}
2247

    
2248
static const tw_instr twinstr[] = {
2249
     {TW_CEXP, 0, 1},
2250
     {TW_CEXP, 0, 3},
2251
     {TW_CEXP, 0, 9},
2252
     {TW_CEXP, 0, 27},
2253
     {TW_CEXP, 0, 63},
2254
     {TW_NEXT, 1, 0}
2255
};
2256

    
2257
static const ct_desc desc = { 64, "t2_64", twinstr, &GENUS, {520, 206, 634, 0}, 0, 0, 0 };
2258

    
2259
void X(codelet_t2_64) (planner *p) {
2260
     X(kdft_dit_register) (p, t2_64, &desc);
2261
}
2262
#else
2263

    
2264
/* Generated by: ../../../genfft/gen_twiddle.native -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -name t2_64 -include dft/scalar/t.h */
2265

    
2266
/*
2267
 * This function contains 1154 FP additions, 660 FP multiplications,
2268
 * (or, 880 additions, 386 multiplications, 274 fused multiply/add),
2269
 * 302 stack variables, 15 constants, and 256 memory accesses
2270
 */
2271
#include "dft/scalar/t.h"
2272

    
2273
static void t2_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
2274
{
2275
     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
2276
     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
2277
     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
2278
     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
2279
     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
2280
     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
2281
     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
2282
     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
2283
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
2284
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
2285
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
2286
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
2287
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
2288
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
2289
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
2290
     {
2291
          INT m;
2292
          for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(128, rs)) {
2293
               E T2, T5, T3, T6, Te, T9, TP, T3e, T1e, T39, T3c, TT, T1a, T37, T8;
2294
               E Tw, Td, Ty, Tm, Th, T1C, T3K, T1V, T3x, T3I, T1G, T1R, T3v, T2m, T2q;
2295
               E T5Y, T6u, T53, T5B, T62, T6w, T57, T5D, T2V, T2X, Tg, TE, T3Y, T3V, T3j;
2296
               E Tl, TA, T3g, T1j, T1t, TV, T2C, T2z, T1u, TZ, T1h, To, T1p, T6j, T6H;
2297
               E Ts, T1l, T6l, T6F, T2P, T4b, T4x, T5i, T2R, T49, T4z, T5g, TG, T4k, T4m;
2298
               E TK, T21, T3O, T3Q, T25, TW, T10, T11, T79, T6X, T5M, T6b, T1v, T30, T69;
2299
               E T77, T13, T2F, T2D, T6p, T6O, T1x, T2a, T2f, T6V, T28, T6r, T2h, T6Q, T32;
2300
               E T5K, T5w, T4G, T4Q, T3m, T4h, T4I, T5y, T3k, T4f, T41, T4S, T4Y, T3q, T3D;
2301
               E T3F, T5r, T3s, T4W, T3Z, T5p;
2302
               {
2303
                    E Ta, Tj, Tx, TC, Tf, Tk, Tz, TD, T1B, T1E, T2o, T2l, T1T, T1Q, T1A;
2304
                    E T1F, T2p, T2k, T1U, T1P;
2305
                    {
2306
                         E T4, T1d, T19, Tb, T1c, T7, Tc, T18, TR, TO, TS, TN;
2307
                         T2 = W[0];
2308
                         T5 = W[1];
2309
                         T3 = W[2];
2310
                         T6 = W[3];
2311
                         Te = W[5];
2312
                         T9 = W[4];
2313
                         T4 = T2 * T3;
2314
                         T1d = T5 * T9;
2315
                         T19 = T5 * Te;
2316
                         Tb = T2 * T6;
2317
                         T1c = T2 * Te;
2318
                         T7 = T5 * T6;
2319
                         Tc = T5 * T3;
2320
                         T18 = T2 * T9;
2321
                         TR = T3 * Te;
2322
                         TO = T6 * Te;
2323
                         TS = T6 * T9;
2324
                         TN = T3 * T9;
2325
                         TP = TN - TO;
2326
                         T3e = TR - TS;
2327
                         T1e = T1c - T1d;
2328
                         T39 = T1c + T1d;
2329
                         T3c = TN + TO;
2330
                         TT = TR + TS;
2331
                         T1a = T18 + T19;
2332
                         T37 = T18 - T19;
2333
                         T8 = T4 - T7;
2334
                         Ta = T8 * T9;
2335
                         Tj = T8 * Te;
2336
                         Tw = T4 + T7;
2337
                         Tx = Tw * T9;
2338
                         TC = Tw * Te;
2339
                         Td = Tb + Tc;
2340
                         Tf = Td * Te;
2341
                         Tk = Td * T9;
2342
                         Ty = Tb - Tc;
2343
                         Tz = Ty * Te;
2344
                         TD = Ty * T9;
2345
                         Tm = W[7];
2346
                         T1B = T6 * Tm;
2347
                         T1E = T3 * Tm;
2348
                         T2o = T2 * Tm;
2349
                         T2l = T5 * Tm;
2350
                         T1T = T9 * Tm;
2351
                         T1Q = Te * Tm;
2352
                         Th = W[6];
2353
                         T1A = T3 * Th;
2354
                         T1F = T6 * Th;
2355
                         T2p = T5 * Th;
2356
                         T2k = T2 * Th;
2357
                         T1U = Te * Th;
2358
                         T1P = T9 * Th;
2359
                    }
2360
                    T1C = T1A + T1B;
2361
                    T3K = T1E + T1F;
2362
                    T1V = T1T + T1U;
2363
                    T3x = T2o - T2p;
2364
                    T3I = T1A - T1B;
2365
                    T1G = T1E - T1F;
2366
                    T1R = T1P - T1Q;
2367
                    {
2368
                         E T5W, T5X, T55, T56;
2369
                         T3v = T2k + T2l;
2370
                         T2m = T2k - T2l;
2371
                         T2q = T2o + T2p;
2372
                         T5W = T8 * Th;
2373
                         T5X = Td * Tm;
2374
                         T5Y = T5W - T5X;
2375
                         T6u = T5W + T5X;
2376
                         {
2377
                              E T51, T52, T60, T61;
2378
                              T51 = Tw * Th;
2379
                              T52 = Ty * Tm;
2380
                              T53 = T51 + T52;
2381
                              T5B = T51 - T52;
2382
                              T60 = T8 * Tm;
2383
                              T61 = Td * Th;
2384
                              T62 = T60 + T61;
2385
                              T6w = T60 - T61;
2386
                         }
2387
                         T55 = Tw * Tm;
2388
                         T56 = Ty * Th;
2389
                         T57 = T55 - T56;
2390
                         T5D = T55 + T56;
2391
                         {
2392
                              E Ti, Tq, TF, TJ, T3W, T3X, T3T, T3U, T3h, T3i, Tn, Tr, TB, TI, T3d;
2393
                              E T3f, T1k, T1o, T1Z, T23, TQ, TU, T2A, T2B, T2x, T2y, T20, T24, TX, TY;
2394
                              E T1i, T1n;
2395
                              T2V = T1P + T1Q;
2396
                              T2X = T1T - T1U;
2397
                              Tg = Ta + Tf;
2398
                              Ti = Tg * Th;
2399
                              Tq = Tg * Tm;
2400
                              TE = TC + TD;
2401
                              TF = TE * Tm;
2402
                              TJ = TE * Th;
2403
                              T3W = T37 * Tm;
2404
                              T3X = T39 * Th;
2405
                              T3Y = T3W - T3X;
2406
                              T3T = T37 * Th;
2407
                              T3U = T39 * Tm;
2408
                              T3V = T3T + T3U;
2409
                              T3h = T3c * Tm;
2410
                              T3i = T3e * Th;
2411
                              T3j = T3h - T3i;
2412
                              Tl = Tj - Tk;
2413
                              Tn = Tl * Tm;
2414
                              Tr = Tl * Th;
2415
                              TA = Tx - Tz;
2416
                              TB = TA * Th;
2417
                              TI = TA * Tm;
2418
                              T3d = T3c * Th;
2419
                              T3f = T3e * Tm;
2420
                              T3g = T3d + T3f;
2421
                              T1j = Tj + Tk;
2422
                              T1k = T1j * Tm;
2423
                              T1o = T1j * Th;
2424
                              T1t = Tx + Tz;
2425
                              T1Z = T1t * Th;
2426
                              T23 = T1t * Tm;
2427
                              TQ = TP * Th;
2428
                              TU = TT * Tm;
2429
                              TV = TQ + TU;
2430
                              T2A = T1a * Tm;
2431
                              T2B = T1e * Th;
2432
                              T2C = T2A - T2B;
2433
                              T2x = T1a * Th;
2434
                              T2y = T1e * Tm;
2435
                              T2z = T2x + T2y;
2436
                              T1u = TC - TD;
2437
                              T20 = T1u * Tm;
2438
                              T24 = T1u * Th;
2439
                              TX = TP * Tm;
2440
                              TY = TT * Th;
2441
                              TZ = TX - TY;
2442
                              T1h = Ta - Tf;
2443
                              T1i = T1h * Th;
2444
                              T1n = T1h * Tm;
2445
                              To = Ti - Tn;
2446
                              T1p = T1n + T1o;
2447
                              T6j = TQ - TU;
2448
                              T6H = T2A + T2B;
2449
                              Ts = Tq + Tr;
2450
                              T1l = T1i - T1k;
2451
                              T6l = TX + TY;
2452
                              T6F = T2x - T2y;
2453
                              T2P = T1Z - T20;
2454
                              T4b = TI + TJ;
2455
                              T4x = T3d - T3f;
2456
                              T5i = T3W + T3X;
2457
                              T2R = T23 + T24;
2458
                              T49 = TB - TF;
2459
                              T4z = T3h + T3i;
2460
                              T5g = T3T - T3U;
2461
                              TG = TB + TF;
2462
                              T4k = Ti + Tn;
2463
                              T4m = Tq - Tr;
2464
                              TK = TI - TJ;
2465
                              T21 = T1Z + T20;
2466
                              T3O = T1i + T1k;
2467
                              T3Q = T1n - T1o;
2468
                              T25 = T23 - T24;
2469
                              TW = W[8];
2470
                              T10 = W[9];
2471
                              T11 = FMA(TV, TW, TZ * T10);
2472
                              T79 = FNMS(T25, TW, T21 * T10);
2473
                              T6X = FNMS(Td, TW, T8 * T10);
2474
                              T5M = FNMS(T2X, TW, T2V * T10);
2475
                              T6b = FNMS(TK, TW, TG * T10);
2476
                              T1v = FMA(T1t, TW, T1u * T10);
2477
                              T30 = FMA(T1h, TW, T1j * T10);
2478
                              T69 = FMA(TG, TW, TK * T10);
2479
                              T77 = FMA(T21, TW, T25 * T10);
2480
                              T13 = FNMS(TZ, TW, TV * T10);
2481
                              T2F = FNMS(T2C, TW, T2z * T10);
2482
                              T2D = FMA(T2z, TW, T2C * T10);
2483
                              T6p = FMA(T1a, TW, T1e * T10);
2484
                              T6O = FMA(TP, TW, TT * T10);
2485
                              T1x = FNMS(T1u, TW, T1t * T10);
2486
                              T2a = FNMS(TE, TW, TA * T10);
2487
                              T2f = FMA(T3, TW, T6 * T10);
2488
                              T6V = FMA(T8, TW, Td * T10);
2489
                              T28 = FMA(TA, TW, TE * T10);
2490
                              T6r = FNMS(T1e, TW, T1a * T10);
2491
                              T2h = FNMS(T6, TW, T3 * T10);
2492
                              T6Q = FNMS(TT, TW, TP * T10);
2493
                              T32 = FNMS(T1j, TW, T1h * T10);
2494
                              T5K = FMA(T2V, TW, T2X * T10);
2495
                              T5w = FMA(Tw, TW, Ty * T10);
2496
                              T4G = FMA(T3O, TW, T3Q * T10);
2497
                              T4Q = FMA(T4k, TW, T4m * T10);
2498
                              T3m = FNMS(T3j, TW, T3g * T10);
2499
                              T4h = FNMS(Te, TW, T9 * T10);
2500
                              T4I = FNMS(T3Q, TW, T3O * T10);
2501
                              T5y = FNMS(Ty, TW, Tw * T10);
2502
                              T3k = FMA(T3g, TW, T3j * T10);
2503
                              T4f = FMA(T9, TW, Te * T10);
2504
                              T41 = FNMS(T3Y, TW, T3V * T10);
2505
                              T4S = FNMS(T4m, TW, T4k * T10);
2506
                              T4Y = FNMS(T3e, TW, T3c * T10);
2507
                              T3q = FMA(Tg, TW, Tl * T10);
2508
                              T3D = FMA(T2, TW, T5 * T10);
2509
                              T3F = FNMS(T5, TW, T2 * T10);
2510
                              T5r = FNMS(T39, TW, T37 * T10);
2511
                              T3s = FNMS(Tl, TW, Tg * T10);
2512
                              T4W = FMA(T3c, TW, T3e * T10);
2513
                              T3Z = FMA(T3V, TW, T3Y * T10);
2514
                              T5p = FMA(T37, TW, T39 * T10);
2515
                         }
2516
                    }
2517
               }
2518
               {
2519
                    E T17, TdV, Tj3, Tjx, T7l, TbJ, Ti3, Tix, T1K, Tiw, TdY, ThY, T7w, Tj0, TbM;
2520
                    E Tjw, T2e, TgA, T7I, TaY, TbQ, Tda, Te4, TfO, T2J, TgB, T7T, TaZ, TbT, Tdb;
2521
                    E Te9, TfP, T36, T3B, TgH, TgE, TgF, TgG, T80, TbW, Tel, TfT, T8b, Tc0, T8k;
2522
                    E TbX, Teg, TfS, T8h, TbZ, T45, T4q, TgJ, TgK, TgL, TgM, T8r, Tc6, Tew, TfW;
2523
                    E T8C, Tc4, T8L, Tc7, Ter, TfV, T8I, Tc3, T6B, Th1, Tfm, Tga, Th8, ThI, T9N;
2524
                    E Tcv, T9Y, TcH, Tav, Tcw, Tf5, Tg7, Tas, TcG, T5c, TgV, TeV, Tg0, TgS, ThD;
2525
                    E T8U, Tcc, T95, Tco, T9C, Tcd, TeE, Tg3, T9z, Tcn, T5R, TgT, TeO, TeW, TgY;
2526
                    E ThE, T9h, T9F, T9s, T9E, Tck, Tcq, TeJ, TeX, Tch, Tcr, T7e, Th9, Tff, Tfn;
2527
                    E Th4, ThJ, Taa, Tay, Tal, Tax, TcD, TcJ, Tfa, Tfo, TcA, TcK;
2528
                    {
2529
                         E T1, Ti1, Tu, Ti0, TM, T7i, T15, T7j, Tp, Tt;
2530
                         T1 = ri[0];
2531
                         Ti1 = ii[0];
2532
                         Tp = ri[WS(rs, 32)];
2533
                         Tt = ii[WS(rs, 32)];
2534
                         Tu = FMA(To, Tp, Ts * Tt);
2535
                         Ti0 = FNMS(Ts, Tp, To * Tt);
2536
                         {
2537
                              E TH, TL, T12, T14;
2538
                              TH = ri[WS(rs, 16)];
2539
                              TL = ii[WS(rs, 16)];
2540
                              TM = FMA(TG, TH, TK * TL);
2541
                              T7i = FNMS(TK, TH, TG * TL);
2542
                              T12 = ri[WS(rs, 48)];
2543
                              T14 = ii[WS(rs, 48)];
2544
                              T15 = FMA(T11, T12, T13 * T14);
2545
                              T7j = FNMS(T13, T12, T11 * T14);
2546
                         }
2547
                         {
2548
                              E Tv, T16, Tj1, Tj2;
2549
                              Tv = T1 + Tu;
2550
                              T16 = TM + T15;
2551
                              T17 = Tv + T16;
2552
                              TdV = Tv - T16;
2553
                              Tj1 = Ti1 - Ti0;
2554
                              Tj2 = TM - T15;
2555
                              Tj3 = Tj1 - Tj2;
2556
                              Tjx = Tj2 + Tj1;
2557
                         }
2558
                         {
2559
                              E T7h, T7k, ThZ, Ti2;
2560
                              T7h = T1 - Tu;
2561
                              T7k = T7i - T7j;
2562
                              T7l = T7h - T7k;
2563
                              TbJ = T7h + T7k;
2564
                              ThZ = T7i + T7j;
2565
                              Ti2 = Ti0 + Ti1;
2566
                              Ti3 = ThZ + Ti2;
2567
                              Tix = Ti2 - ThZ;
2568
                         }
2569
                    }
2570
                    {
2571
                         E T1g, T7m, T1r, T7n, T7o, T7p, T1z, T7s, T1I, T7t, T7r, T7u;
2572
                         {
2573
                              E T1b, T1f, T1m, T1q;
2574
                              T1b = ri[WS(rs, 8)];
2575
                              T1f = ii[WS(rs, 8)];
2576
                              T1g = FMA(T1a, T1b, T1e * T1f);
2577
                              T7m = FNMS(T1e, T1b, T1a * T1f);
2578
                              T1m = ri[WS(rs, 40)];
2579
                              T1q = ii[WS(rs, 40)];
2580
                              T1r = FMA(T1l, T1m, T1p * T1q);
2581
                              T7n = FNMS(T1p, T1m, T1l * T1q);
2582
                         }
2583
                         T7o = T7m - T7n;
2584
                         T7p = T1g - T1r;
2585
                         {
2586
                              E T1w, T1y, T1D, T1H;
2587
                              T1w = ri[WS(rs, 56)];
2588
                              T1y = ii[WS(rs, 56)];
2589
                              T1z = FMA(T1v, T1w, T1x * T1y);
2590
                              T7s = FNMS(T1x, T1w, T1v * T1y);
2591
                              T1D = ri[WS(rs, 24)];
2592
                              T1H = ii[WS(rs, 24)];
2593
                              T1I = FMA(T1C, T1D, T1G * T1H);
2594
                              T7t = FNMS(T1G, T1D, T1C * T1H);
2595
                         }
2596
                         T7r = T1z - T1I;
2597
                         T7u = T7s - T7t;
2598
                         {
2599
                              E T1s, T1J, TdW, TdX;
2600
                              T1s = T1g + T1r;
2601
                              T1J = T1z + T1I;
2602
                              T1K = T1s + T1J;
2603
                              Tiw = T1J - T1s;
2604
                              TdW = T7m + T7n;
2605
                              TdX = T7s + T7t;
2606
                              TdY = TdW - TdX;
2607
                              ThY = TdW + TdX;
2608
                         }
2609
                         {
2610
                              E T7q, T7v, TbK, TbL;
2611
                              T7q = T7o - T7p;
2612
                              T7v = T7r + T7u;
2613
                              T7w = KP707106781 * (T7q - T7v);
2614
                              Tj0 = KP707106781 * (T7q + T7v);
2615
                              TbK = T7p + T7o;
2616
                              TbL = T7r - T7u;
2617
                              TbM = KP707106781 * (TbK + TbL);
2618
                              Tjw = KP707106781 * (TbL - TbK);
2619
                         }
2620
                    }
2621
                    {
2622
                         E T1Y, Te0, T7A, T7D, T2d, Te1, T7B, T7G, T7C, T7H;
2623
                         {
2624
                              E T1O, T7y, T1X, T7z;
2625
                              {
2626
                                   E T1M, T1N, T1S, T1W;
2627
                                   T1M = ri[WS(rs, 4)];
2628
                                   T1N = ii[WS(rs, 4)];
2629
                                   T1O = FMA(T8, T1M, Td * T1N);
2630
                                   T7y = FNMS(Td, T1M, T8 * T1N);
2631
                                   T1S = ri[WS(rs, 36)];
2632
                                   T1W = ii[WS(rs, 36)];
2633
                                   T1X = FMA(T1R, T1S, T1V * T1W);
2634
                                   T7z = FNMS(T1V, T1S, T1R * T1W);
2635
                              }
2636
                              T1Y = T1O + T1X;
2637
                              Te0 = T7y + T7z;
2638
                              T7A = T7y - T7z;
2639
                              T7D = T1O - T1X;
2640
                         }
2641
                         {
2642
                              E T27, T7E, T2c, T7F;
2643
                              {
2644
                                   E T22, T26, T29, T2b;
2645
                                   T22 = ri[WS(rs, 20)];
2646
                                   T26 = ii[WS(rs, 20)];
2647
                                   T27 = FMA(T21, T22, T25 * T26);
2648
                                   T7E = FNMS(T25, T22, T21 * T26);
2649
                                   T29 = ri[WS(rs, 52)];
2650
                                   T2b = ii[WS(rs, 52)];
2651
                                   T2c = FMA(T28, T29, T2a * T2b);
2652
                                   T7F = FNMS(T2a, T29, T28 * T2b);
2653
                              }
2654
                              T2d = T27 + T2c;
2655
                              Te1 = T7E + T7F;
2656
                              T7B = T27 - T2c;
2657
                              T7G = T7E - T7F;
2658
                         }
2659
                         T2e = T1Y + T2d;
2660
                         TgA = Te0 + Te1;
2661
                         T7C = T7A + T7B;
2662
                         T7H = T7D - T7G;
2663
                         T7I = FNMS(KP923879532, T7H, KP382683432 * T7C);
2664
                         TaY = FMA(KP923879532, T7C, KP382683432 * T7H);
2665
                         {
2666
                              E TbO, TbP, Te2, Te3;
2667
                              TbO = T7A - T7B;
2668
                              TbP = T7D + T7G;
2669
                              TbQ = FNMS(KP382683432, TbP, KP923879532 * TbO);
2670
                              Tda = FMA(KP382683432, TbO, KP923879532 * TbP);
2671
                              Te2 = Te0 - Te1;
2672
                              Te3 = T1Y - T2d;
2673
                              Te4 = Te2 - Te3;
2674
                              TfO = Te3 + Te2;
2675
                         }
2676
                    }
2677
                    {
2678
                         E T2t, Te6, T7L, T7O, T2I, Te7, T7M, T7R, T7N, T7S;
2679
                         {
2680
                              E T2j, T7J, T2s, T7K;
2681
                              {
2682
                                   E T2g, T2i, T2n, T2r;
2683
                                   T2g = ri[WS(rs, 60)];
2684
                                   T2i = ii[WS(rs, 60)];
2685
                                   T2j = FMA(T2f, T2g, T2h * T2i);
2686
                                   T7J = FNMS(T2h, T2g, T2f * T2i);
2687
                                   T2n = ri[WS(rs, 28)];
2688
                                   T2r = ii[WS(rs, 28)];
2689
                                   T2s = FMA(T2m, T2n, T2q * T2r);
2690
                                   T7K = FNMS(T2q, T2n, T2m * T2r);
2691
                              }
2692
                              T2t = T2j + T2s;
2693
                              Te6 = T7J + T7K;
2694
                              T7L = T7J - T7K;
2695
                              T7O = T2j - T2s;
2696
                         }
2697
                         {
2698
                              E T2w, T7P, T2H, T7Q;
2699
                              {
2700
                                   E T2u, T2v, T2E, T2G;
2701
                                   T2u = ri[WS(rs, 12)];
2702
                                   T2v = ii[WS(rs, 12)];
2703
                                   T2w = FMA(TP, T2u, TT * T2v);
2704
                                   T7P = FNMS(TT, T2u, TP * T2v);
2705
                                   T2E = ri[WS(rs, 44)];
2706
                                   T2G = ii[WS(rs, 44)];
2707
                                   T2H = FMA(T2D, T2E, T2F * T2G);
2708
                                   T7Q = FNMS(T2F, T2E, T2D * T2G);
2709
                              }
2710
                              T2I = T2w + T2H;
2711
                              Te7 = T7P + T7Q;
2712
                              T7M = T2w - T2H;
2713
                              T7R = T7P - T7Q;
2714
                         }
2715
                         T2J = T2t + T2I;
2716
                         TgB = Te6 + Te7;
2717
                         T7N = T7L + T7M;
2718
                         T7S = T7O - T7R;
2719
                         T7T = FMA(KP382683432, T7N, KP923879532 * T7S);
2720
                         TaZ = FNMS(KP923879532, T7N, KP382683432 * T7S);
2721
                         {
2722
                              E TbR, TbS, Te5, Te8;
2723
                              TbR = T7L - T7M;
2724
                              TbS = T7O + T7R;
2725
                              TbT = FMA(KP923879532, TbR, KP382683432 * TbS);
2726
                              Tdb = FNMS(KP382683432, TbR, KP923879532 * TbS);
2727
                              Te5 = T2t - T2I;
2728
                              Te8 = Te6 - Te7;
2729
                              Te9 = Te5 + Te8;
2730
                              TfP = Te5 - Te8;
2731
                         }
2732
                    }
2733
                    {
2734
                         E T2O, T7W, T2T, T7X, T2U, Tec, T2Z, T8e, T34, T8f, T35, Ted, T3p, Tei, T86;
2735
                         E T89, T3A, Tej, T81, T84;
2736
                         {
2737
                              E T2M, T2N, T2Q, T2S;
2738
                              T2M = ri[WS(rs, 2)];
2739
                              T2N = ii[WS(rs, 2)];
2740
                              T2O = FMA(Tw, T2M, Ty * T2N);
2741
                              T7W = FNMS(Ty, T2M, Tw * T2N);
2742
                              T2Q = ri[WS(rs, 34)];
2743
                              T2S = ii[WS(rs, 34)];
2744
                              T2T = FMA(T2P, T2Q, T2R * T2S);
2745
                              T7X = FNMS(T2R, T2Q, T2P * T2S);
2746
                         }
2747
                         T2U = T2O + T2T;
2748
                         Tec = T7W + T7X;
2749
                         {
2750
                              E T2W, T2Y, T31, T33;
2751
                              T2W = ri[WS(rs, 18)];
2752
                              T2Y = ii[WS(rs, 18)];
2753
                              T2Z = FMA(T2V, T2W, T2X * T2Y);
2754
                              T8e = FNMS(T2X, T2W, T2V * T2Y);
2755
                              T31 = ri[WS(rs, 50)];
2756
                              T33 = ii[WS(rs, 50)];
2757
                              T34 = FMA(T30, T31, T32 * T33);
2758
                              T8f = FNMS(T32, T31, T30 * T33);
2759
                         }
2760
                         T35 = T2Z + T34;
2761
                         Ted = T8e + T8f;
2762
                         {
2763
                              E T3b, T87, T3o, T88;
2764
                              {
2765
                                   E T38, T3a, T3l, T3n;
2766
                                   T38 = ri[WS(rs, 10)];
2767
                                   T3a = ii[WS(rs, 10)];
2768
                                   T3b = FMA(T37, T38, T39 * T3a);
2769
                                   T87 = FNMS(T39, T38, T37 * T3a);
2770
                                   T3l = ri[WS(rs, 42)];
2771
                                   T3n = ii[WS(rs, 42)];
2772
                                   T3o = FMA(T3k, T3l, T3m * T3n);
2773
                                   T88 = FNMS(T3m, T3l, T3k * T3n);
2774
                              }
2775
                              T3p = T3b + T3o;
2776
                              Tei = T87 + T88;
2777
                              T86 = T3b - T3o;
2778
                              T89 = T87 - T88;
2779
                         }
2780
                         {
2781
                              E T3u, T82, T3z, T83;
2782
                              {
2783
                                   E T3r, T3t, T3w, T3y;
2784
                                   T3r = ri[WS(rs, 58)];
2785
                                   T3t = ii[WS(rs, 58)];
2786
                                   T3u = FMA(T3q, T3r, T3s * T3t);
2787
                                   T82 = FNMS(T3s, T3r, T3q * T3t);
2788
                                   T3w = ri[WS(rs, 26)];
2789
                                   T3y = ii[WS(rs, 26)];
2790
                                   T3z = FMA(T3v, T3w, T3x * T3y);
2791
                                   T83 = FNMS(T3x, T3w, T3v * T3y);
2792
                              }
2793
                              T3A = T3u + T3z;
2794
                              Tej = T82 + T83;
2795
                              T81 = T3u - T3z;
2796
                              T84 = T82 - T83;
2797
                         }
2798
                         T36 = T2U + T35;
2799
                         T3B = T3p + T3A;
2800
                         TgH = T36 - T3B;
2801
                         TgE = Tec + Ted;
2802
                         TgF = Tei + Tej;
2803
                         TgG = TgE - TgF;
2804
                         {
2805
                              E T7Y, T7Z, Teh, Tek;
2806
                              T7Y = T7W - T7X;
2807
                              T7Z = T2Z - T34;
2808
                              T80 = T7Y + T7Z;
2809
                              TbW = T7Y - T7Z;
2810
                              Teh = T2U - T35;
2811
                              Tek = Tei - Tej;
2812
                              Tel = Teh - Tek;
2813
                              TfT = Teh + Tek;
2814
                         }
2815
                         {
2816
                              E T85, T8a, T8i, T8j;
2817
                              T85 = T81 - T84;
2818
                              T8a = T86 + T89;
2819
                              T8b = KP707106781 * (T85 - T8a);
2820
                              Tc0 = KP707106781 * (T8a + T85);
2821
                              T8i = T89 - T86;
2822
                              T8j = T81 + T84;
2823
                              T8k = KP707106781 * (T8i - T8j);
2824
                              TbX = KP707106781 * (T8i + T8j);
2825
                         }
2826
                         {
2827
                              E Tee, Tef, T8d, T8g;
2828
                              Tee = Tec - Ted;
2829
                              Tef = T3A - T3p;
2830
                              Teg = Tee - Tef;
2831
                              TfS = Tee + Tef;
2832
                              T8d = T2O - T2T;
2833
                              T8g = T8e - T8f;
2834
                              T8h = T8d - T8g;
2835
                              TbZ = T8d + T8g;
2836
                         }
2837
                    }
2838
                    {
2839
                         E T3H, T8n, T3M, T8o, T3N, Ten, T3S, T8F, T43, T8G, T44, Teo, T4e, Tet, T8x;
2840
                         E T8A, T4p, Teu, T8s, T8v;
2841
                         {
2842
                              E T3E, T3G, T3J, T3L;
2843
                              T3E = ri[WS(rs, 62)];
2844
                              T3G = ii[WS(rs, 62)];
2845
                              T3H = FMA(T3D, T3E, T3F * T3G);
2846
                              T8n = FNMS(T3F, T3E, T3D * T3G);
2847
                              T3J = ri[WS(rs, 30)];
2848
                              T3L = ii[WS(rs, 30)];
2849
                              T3M = FMA(T3I, T3J, T3K * T3L);
2850
                              T8o = FNMS(T3K, T3J, T3I * T3L);
2851
                         }
2852
                         T3N = T3H + T3M;
2853
                         Ten = T8n + T8o;
2854
                         {
2855
                              E T3P, T3R, T40, T42;
2856
                              T3P = ri[WS(rs, 14)];
2857
                              T3R = ii[WS(rs, 14)];
2858
                              T3S = FMA(T3O, T3P, T3Q * T3R);
2859
                              T8F = FNMS(T3Q, T3P, T3O * T3R);
2860
                              T40 = ri[WS(rs, 46)];
2861
                              T42 = ii[WS(rs, 46)];
2862
                              T43 = FMA(T3Z, T40, T41 * T42);
2863
                              T8G = FNMS(T41, T40, T3Z * T42);
2864
                         }
2865
                         T44 = T3S + T43;
2866
                         Teo = T8F + T8G;
2867
                         {
2868
                              E T48, T8y, T4d, T8z;
2869
                              {
2870
                                   E T46, T47, T4a, T4c;
2871
                                   T46 = ri[WS(rs, 6)];
2872
                                   T47 = ii[WS(rs, 6)];
2873
                                   T48 = FMA(T3c, T46, T3e * T47);
2874
                                   T8y = FNMS(T3e, T46, T3c * T47);
2875
                                   T4a = ri[WS(rs, 38)];
2876
                                   T4c = ii[WS(rs, 38)];
2877
                                   T4d = FMA(T49, T4a, T4b * T4c);
2878
                                   T8z = FNMS(T4b, T4a, T49 * T4c);
2879
                              }
2880
                              T4e = T48 + T4d;
2881
                              Tet = T8y + T8z;
2882
                              T8x = T48 - T4d;
2883
                              T8A = T8y - T8z;
2884
                         }
2885
                         {
2886
                              E T4j, T8t, T4o, T8u;
2887
                              {
2888
                                   E T4g, T4i, T4l, T4n;
2889
                                   T4g = ri[WS(rs, 54)];
2890
                                   T4i = ii[WS(rs, 54)];
2891
                                   T4j = FMA(T4f, T4g, T4h * T4i);
2892
                                   T8t = FNMS(T4h, T4g, T4f * T4i);
2893
                                   T4l = ri[WS(rs, 22)];
2894
                                   T4n = ii[WS(rs, 22)];
2895
                                   T4o = FMA(T4k, T4l, T4m * T4n);
2896
                                   T8u = FNMS(T4m, T4l, T4k * T4n);
2897
                              }
2898
                              T4p = T4j + T4o;
2899
                              Teu = T8t + T8u;
2900
                              T8s = T4j - T4o;
2901
                              T8v = T8t - T8u;
2902
                         }
2903
                         T45 = T3N + T44;
2904
                         T4q = T4e + T4p;
2905
                         TgJ = T45 - T4q;
2906
                         TgK = Ten + Teo;
2907
                         TgL = Tet + Teu;
2908
                         TgM = TgK - TgL;
2909
                         {
2910
                              E T8p, T8q, Tes, Tev;
2911
                              T8p = T8n - T8o;
2912
                              T8q = T3S - T43;
2913
                              T8r = T8p + T8q;
2914
                              Tc6 = T8p - T8q;
2915
                              Tes = T3N - T44;
2916
                              Tev = Tet - Teu;
2917
                              Tew = Tes - Tev;
2918
                              TfW = Tes + Tev;
2919
                         }
2920
                         {
2921
                              E T8w, T8B, T8J, T8K;
2922
                              T8w = T8s - T8v;
2923
                              T8B = T8x + T8A;
2924
                              T8C = KP707106781 * (T8w - T8B);
2925
                              Tc4 = KP707106781 * (T8B + T8w);
2926
                              T8J = T8A - T8x;
2927
                              T8K = T8s + T8v;
2928
                              T8L = KP707106781 * (T8J - T8K);
2929
                              Tc7 = KP707106781 * (T8J + T8K);
2930
                         }
2931
                         {
2932
                              E Tep, Teq, T8E, T8H;
2933
                              Tep = Ten - Teo;
2934
                              Teq = T4p - T4e;
2935
                              Ter = Tep - Teq;
2936
                              TfV = Tep + Teq;
2937
                              T8E = T3H - T3M;
2938
                              T8H = T8F - T8G;
2939
                              T8I = T8E - T8H;
2940
                              Tc3 = T8E + T8H;
2941
                         }
2942
                    }
2943
                    {
2944
                         E T5V, Tao, T64, Tap, T65, Tfi, T68, T9K, T6d, T9L, T6e, Tfj, T6o, Tf2, T9Q;
2945
                         E T9R, T6z, Tf3, T9T, T9W;
2946
                         {
2947
                              E T5T, T5U, T5Z, T63;
2948
                              T5T = ri[WS(rs, 63)];
2949
                              T5U = ii[WS(rs, 63)];
2950
                              T5V = FMA(TW, T5T, T10 * T5U);
2951
                              Tao = FNMS(T10, T5T, TW * T5U);
2952
                              T5Z = ri[WS(rs, 31)];
2953
                              T63 = ii[WS(rs, 31)];
2954
                              T64 = FMA(T5Y, T5Z, T62 * T63);
2955
                              Tap = FNMS(T62, T5Z, T5Y * T63);
2956
                         }
2957
                         T65 = T5V + T64;
2958
                         Tfi = Tao + Tap;
2959
                         {
2960
                              E T66, T67, T6a, T6c;
2961
                              T66 = ri[WS(rs, 15)];
2962
                              T67 = ii[WS(rs, 15)];
2963
                              T68 = FMA(TV, T66, TZ * T67);
2964
                              T9K = FNMS(TZ, T66, TV * T67);
2965
                              T6a = ri[WS(rs, 47)];
2966
                              T6c = ii[WS(rs, 47)];
2967
                              T6d = FMA(T69, T6a, T6b * T6c);
2968
                              T9L = FNMS(T6b, T6a, T69 * T6c);
2969
                         }
2970
                         T6e = T68 + T6d;
2971
                         Tfj = T9K + T9L;
2972
                         {
2973
                              E T6i, T9O, T6n, T9P;
2974
                              {
2975
                                   E T6g, T6h, T6k, T6m;
2976
                                   T6g = ri[WS(rs, 7)];
2977
                                   T6h = ii[WS(rs, 7)];
2978
                                   T6i = FMA(T1t, T6g, T1u * T6h);
2979
                                   T9O = FNMS(T1u, T6g, T1t * T6h);
2980
                                   T6k = ri[WS(rs, 39)];
2981
                                   T6m = ii[WS(rs, 39)];
2982
                                   T6n = FMA(T6j, T6k, T6l * T6m);
2983
                                   T9P = FNMS(T6l, T6k, T6j * T6m);
2984
                              }
2985
                              T6o = T6i + T6n;
2986
                              Tf2 = T9O + T9P;
2987
                              T9Q = T9O - T9P;
2988
                              T9R = T6i - T6n;
2989
                         }
2990
                         {
2991
                              E T6t, T9U, T6y, T9V;
2992
                              {
2993
                                   E T6q, T6s, T6v, T6x;
2994
                                   T6q = ri[WS(rs, 55)];
2995
                                   T6s = ii[WS(rs, 55)];
2996
                                   T6t = FMA(T6p, T6q, T6r * T6s);
2997
                                   T9U = FNMS(T6r, T6q, T6p * T6s);
2998
                                   T6v = ri[WS(rs, 23)];
2999
                                   T6x = ii[WS(rs, 23)];
3000
                                   T6y = FMA(T6u, T6v, T6w * T6x);
3001
                                   T9V = FNMS(T6w, T6v, T6u * T6x);
3002
                              }
3003
                              T6z = T6t + T6y;
3004
                              Tf3 = T9U + T9V;
3005
                              T9T = T6t - T6y;
3006
                              T9W = T9U - T9V;
3007
                         }
3008
                         {
3009
                              E T6f, T6A, Tfk, Tfl;
3010
                              T6f = T65 + T6e;
3011
                              T6A = T6o + T6z;
3012
                              T6B = T6f + T6A;
3013
                              Th1 = T6f - T6A;
3014
                              Tfk = Tfi - Tfj;
3015
                              Tfl = T6z - T6o;
3016
                              Tfm = Tfk - Tfl;
3017
                              Tga = Tfk + Tfl;
3018
                         }
3019
                         {
3020
                              E Th6, Th7, T9J, T9M;
3021
                              Th6 = Tfi + Tfj;
3022
                              Th7 = Tf2 + Tf3;
3023
                              Th8 = Th6 - Th7;
3024
                              ThI = Th6 + Th7;
3025
                              T9J = T5V - T64;
3026
                              T9M = T9K - T9L;
3027
                              T9N = T9J - T9M;
3028
                              Tcv = T9J + T9M;
3029
                         }
3030
                         {
3031
                              E T9S, T9X, Tat, Tau;
3032
                              T9S = T9Q - T9R;
3033
                              T9X = T9T + T9W;
3034
                              T9Y = KP707106781 * (T9S - T9X);
3035
                              TcH = KP707106781 * (T9S + T9X);
3036
                              Tat = T9T - T9W;
3037
                              Tau = T9R + T9Q;
3038
                              Tav = KP707106781 * (Tat - Tau);
3039
                              Tcw = KP707106781 * (Tau + Tat);
3040
                         }
3041
                         {
3042
                              E Tf1, Tf4, Taq, Tar;
3043
                              Tf1 = T65 - T6e;
3044
                              Tf4 = Tf2 - Tf3;
3045
                              Tf5 = Tf1 - Tf4;
3046
                              Tg7 = Tf1 + Tf4;
3047
                              Taq = Tao - Tap;
3048
                              Tar = T68 - T6d;
3049
                              Tas = Taq + Tar;
3050
                              TcG = Taq - Tar;
3051
                         }
3052
                    }
3053
                    {
3054
                         E T4w, T8Q, T4B, T8R, T4C, TeA, T4F, T9w, T4K, T9x, T4L, TeB, T4V, TeS, T90;
3055
                         E T93, T5a, TeT, T8V, T8Y;
3056
                         {
3057
                              E T4u, T4v, T4y, T4A;
3058
                              T4u = ri[WS(rs, 1)];
3059
                              T4v = ii[WS(rs, 1)];
3060
                              T4w = FMA(T2, T4u, T5 * T4v);
3061
                              T8Q = FNMS(T5, T4u, T2 * T4v);
3062
                              T4y = ri[WS(rs, 33)];
3063
                              T4A = ii[WS(rs, 33)];
3064
                              T4B = FMA(T4x, T4y, T4z * T4A);
3065
                              T8R = FNMS(T4z, T4y, T4x * T4A);
3066
                         }
3067
                         T4C = T4w + T4B;
3068
                         TeA = T8Q + T8R;
3069
                         {
3070
                              E T4D, T4E, T4H, T4J;
3071
                              T4D = ri[WS(rs, 17)];
3072
                              T4E = ii[WS(rs, 17)];
3073
                              T4F = FMA(T3V, T4D, T3Y * T4E);
3074
                              T9w = FNMS(T3Y, T4D, T3V * T4E);
3075
                              T4H = ri[WS(rs, 49)];
3076
                              T4J = ii[WS(rs, 49)];
3077
                              T4K = FMA(T4G, T4H, T4I * T4J);
3078
                              T9x = FNMS(T4I, T4H, T4G * T4J);
3079
                         }
3080
                         T4L = T4F + T4K;
3081
                         TeB = T9w + T9x;
3082
                         {
3083
                              E T4P, T91, T4U, T92;
3084
                              {
3085
                                   E T4N, T4O, T4R, T4T;
3086
                                   T4N = ri[WS(rs, 9)];
3087
                                   T4O = ii[WS(rs, 9)];
3088
                                   T4P = FMA(T9, T4N, Te * T4O);
3089
                                   T91 = FNMS(Te, T4N, T9 * T4O);
3090
                                   T4R = ri[WS(rs, 41)];
3091
                                   T4T = ii[WS(rs, 41)];
3092
                                   T4U = FMA(T4Q, T4R, T4S * T4T);
3093
                                   T92 = FNMS(T4S, T4R, T4Q * T4T);
3094
                              }
3095
                              T4V = T4P + T4U;
3096
                              TeS = T91 + T92;
3097
                              T90 = T4P - T4U;
3098
                              T93 = T91 - T92;
3099
                         }
3100
                         {
3101
                              E T50, T8W, T59, T8X;
3102
                              {
3103
                                   E T4X, T4Z, T54, T58;
3104
                                   T4X = ri[WS(rs, 57)];
3105
                                   T4Z = ii[WS(rs, 57)];
3106
                                   T50 = FMA(T4W, T4X, T4Y * T4Z);
3107
                                   T8W = FNMS(T4Y, T4X, T4W * T4Z);
3108
                                   T54 = ri[WS(rs, 25)];
3109
                                   T58 = ii[WS(rs, 25)];
3110
                                   T59 = FMA(T53, T54, T57 * T58);
3111
                                   T8X = FNMS(T57, T54, T53 * T58);
3112
                              }
3113
                              T5a = T50 + T59;
3114
                              TeT = T8W + T8X;
3115
                              T8V = T50 - T59;
3116
                              T8Y = T8W - T8X;
3117
                         }
3118
                         {
3119
                              E T4M, T5b, TeR, TeU;
3120
                              T4M = T4C + T4L;
3121
                              T5b = T4V + T5a;
3122
                              T5c = T4M + T5b;
3123
                              TgV = T4M - T5b;
3124
                              TeR = T4C - T4L;
3125
                              TeU = TeS - TeT;
3126
                              TeV = TeR - TeU;
3127
                              Tg0 = TeR + TeU;
3128
                         }
3129
                         {
3130
                              E TgQ, TgR, T8S, T8T;
3131
                              TgQ = TeA + TeB;
3132
                              TgR = TeS + TeT;
3133
                              TgS = TgQ - TgR;
3134
                              ThD = TgQ + TgR;
3135
                              T8S = T8Q - T8R;
3136
                              T8T = T4F - T4K;
3137
                              T8U = T8S + T8T;
3138
                              Tcc = T8S - T8T;
3139
                         }
3140
                         {
3141
                              E T8Z, T94, T9A, T9B;
3142
                              T8Z = T8V - T8Y;
3143
                              T94 = T90 + T93;
3144
                              T95 = KP707106781 * (T8Z - T94);
3145
                              Tco = KP707106781 * (T94 + T8Z);
3146
                              T9A = T93 - T90;
3147
                              T9B = T8V + T8Y;
3148
                              T9C = KP707106781 * (T9A - T9B);
3149
                              Tcd = KP707106781 * (T9A + T9B);
3150
                         }
3151
                         {
3152
                              E TeC, TeD, T9v, T9y;
3153
                              TeC = TeA - TeB;
3154
                              TeD = T5a - T4V;
3155
                              TeE = TeC - TeD;
3156
                              Tg3 = TeC + TeD;
3157
                              T9v = T4w - T4B;
3158
                              T9y = T9w - T9x;
3159
                              T9z = T9v - T9y;
3160
                              Tcn = T9v + T9y;
3161
                         }
3162
                    }
3163
                    {
3164
                         E T5l, TeL, T9k, T9n, T5P, TeH, T9a, T9f, T5u, TeM, T9l, T9q, T5G, TeG, T97;
3165
                         E T9e;
3166
                         {
3167
                              E T5f, T9i, T5k, T9j;
3168
                              {
3169
                                   E T5d, T5e, T5h, T5j;
3170
                                   T5d = ri[WS(rs, 5)];
3171
                                   T5e = ii[WS(rs, 5)];
3172
                                   T5f = FMA(Tg, T5d, Tl * T5e);
3173
                                   T9i = FNMS(Tl, T5d, Tg * T5e);
3174
                                   T5h = ri[WS(rs, 37)];
3175
                                   T5j = ii[WS(rs, 37)];
3176
                                   T5k = FMA(T5g, T5h, T5i * T5j);
3177
                                   T9j = FNMS(T5i, T5h, T5g * T5j);
3178
                              }
3179
                              T5l = T5f + T5k;
3180
                              TeL = T9i + T9j;
3181
                              T9k = T9i - T9j;
3182
                              T9n = T5f - T5k;
3183
                         }
3184
                         {
3185
                              E T5J, T98, T5O, T99;
3186
                              {
3187
                                   E T5H, T5I, T5L, T5N;
3188
                                   T5H = ri[WS(rs, 13)];
3189
                                   T5I = ii[WS(rs, 13)];
3190
                                   T5J = FMA(T1h, T5H, T1j * T5I);
3191
                                   T98 = FNMS(T1j, T5H, T1h * T5I);
3192
                                   T5L = ri[WS(rs, 45)];
3193
                                   T5N = ii[WS(rs, 45)];
3194
                                   T5O = FMA(T5K, T5L, T5M * T5N);
3195
                                   T99 = FNMS(T5M, T5L, T5K * T5N);
3196
                              }
3197
                              T5P = T5J + T5O;
3198
                              TeH = T98 + T99;
3199
                              T9a = T98 - T99;
3200
                              T9f = T5J - T5O;
3201
                         }
3202
                         {
3203
                              E T5o, T9o, T5t, T9p;
3204
                              {
3205
                                   E T5m, T5n, T5q, T5s;
3206
                                   T5m = ri[WS(rs, 21)];
3207
                                   T5n = ii[WS(rs, 21)];
3208
                                   T5o = FMA(T3g, T5m, T3j * T5n);
3209
                                   T9o = FNMS(T3j, T5m, T3g * T5n);
3210
                                   T5q = ri[WS(rs, 53)];
3211
                                   T5s = ii[WS(rs, 53)];
3212
                                   T5t = FMA(T5p, T5q, T5r * T5s);
3213
                                   T9p = FNMS(T5r, T5q, T5p * T5s);
3214
                              }
3215
                              T5u = T5o + T5t;
3216
                              TeM = T9o + T9p;
3217
                              T9l = T5o - T5t;
3218
                              T9q = T9o - T9p;
3219
                         }
3220
                         {
3221
                              E T5A, T9c, T5F, T9d;
3222
                              {
3223
                                   E T5x, T5z, T5C, T5E;
3224
                                   T5x = ri[WS(rs, 61)];
3225
                                   T5z = ii[WS(rs, 61)];
3226
                                   T5A = FMA(T5w, T5x, T5y * T5z);
3227
                                   T9c = FNMS(T5y, T5x, T5w * T5z);
3228
                                   T5C = ri[WS(rs, 29)];
3229
                                   T5E = ii[WS(rs, 29)];
3230
                                   T5F = FMA(T5B, T5C, T5D * T5E);
3231
                                   T9d = FNMS(T5D, T5C, T5B * T5E);
3232
                              }
3233
                              T5G = T5A + T5F;
3234
                              TeG = T9c + T9d;
3235
                              T97 = T5A - T5F;
3236
                              T9e = T9c - T9d;
3237
                         }
3238
                         {
3239
                              E T5v, T5Q, TeK, TeN;
3240
                              T5v = T5l + T5u;
3241
                              T5Q = T5G + T5P;
3242
                              T5R = T5v + T5Q;
3243
                              TgT = T5Q - T5v;
3244
                              TeK = T5l - T5u;
3245
                              TeN = TeL - TeM;
3246
                              TeO = TeK + TeN;
3247
                              TeW = TeN - TeK;
3248
                         }
3249
                         {
3250
                              E TgW, TgX, T9b, T9g;
3251
                              TgW = TeL + TeM;
3252
                              TgX = TeG + TeH;
3253
                              TgY = TgW - TgX;
3254
                              ThE = TgW + TgX;
3255
                              T9b = T97 - T9a;
3256
                              T9g = T9e + T9f;
3257
                              T9h = FNMS(KP923879532, T9g, KP382683432 * T9b);
3258
                              T9F = FMA(KP382683432, T9g, KP923879532 * T9b);
3259
                         }
3260
                         {
3261
                              E T9m, T9r, Tci, Tcj;
3262
                              T9m = T9k + T9l;
3263
                              T9r = T9n - T9q;
3264
                              T9s = FMA(KP923879532, T9m, KP382683432 * T9r);
3265
                              T9E = FNMS(KP923879532, T9r, KP382683432 * T9m);
3266
                              Tci = T9k - T9l;
3267
                              Tcj = T9n + T9q;
3268
                              Tck = FMA(KP382683432, Tci, KP923879532 * Tcj);
3269
                              Tcq = FNMS(KP382683432, Tcj, KP923879532 * Tci);
3270
                         }
3271
                         {
3272
                              E TeF, TeI, Tcf, Tcg;
3273
                              TeF = T5G - T5P;
3274
                              TeI = TeG - TeH;
3275
                              TeJ = TeF - TeI;
3276
                              TeX = TeF + TeI;
3277
                              Tcf = T97 + T9a;
3278
                              Tcg = T9e - T9f;
3279
                              Tch = FNMS(KP382683432, Tcg, KP923879532 * Tcf);
3280
                              Tcr = FMA(KP923879532, Tcg, KP382683432 * Tcf);
3281
                         }
3282
                    }
3283
                    {
3284
                         E T6K, Tf6, Ta2, Ta5, T7c, Tfd, Tae, Taj, T6T, Tf7, Ta3, Ta8, T73, Tfc, Tad;
3285
                         E Tag;
3286
                         {
3287
                              E T6E, Ta0, T6J, Ta1;
3288
                              {
3289
                                   E T6C, T6D, T6G, T6I;
3290
                                   T6C = ri[WS(rs, 3)];
3291
                                   T6D = ii[WS(rs, 3)];
3292
                                   T6E = FMA(T3, T6C, T6 * T6D);
3293
                                   Ta0 = FNMS(T6, T6C, T3 * T6D);
3294
                                   T6G = ri[WS(rs, 35)];
3295
                                   T6I = ii[WS(rs, 35)];
3296
                                   T6J = FMA(T6F, T6G, T6H * T6I);
3297
                                   Ta1 = FNMS(T6H, T6G, T6F * T6I);
3298
                              }
3299
                              T6K = T6E + T6J;
3300
                              Tf6 = Ta0 + Ta1;
3301
                              Ta2 = Ta0 - Ta1;
3302
                              Ta5 = T6E - T6J;
3303
                         }
3304
                         {
3305
                              E T76, Tah, T7b, Tai;
3306
                              {
3307
                                   E T74, T75, T78, T7a;
3308
                                   T74 = ri[WS(rs, 11)];
3309
                                   T75 = ii[WS(rs, 11)];
3310
                                   T76 = FMA(TA, T74, TE * T75);
3311
                                   Tah = FNMS(TE, T74, TA * T75);
3312
                                   T78 = ri[WS(rs, 43)];
3313
                                   T7a = ii[WS(rs, 43)];
3314
                                   T7b = FMA(T77, T78, T79 * T7a);
3315
                                   Tai = FNMS(T79, T78, T77 * T7a);
3316
                              }
3317
                              T7c = T76 + T7b;
3318
                              Tfd = Tah + Tai;
3319
                              Tae = T76 - T7b;
3320
                              Taj = Tah - Tai;
3321
                         }
3322
                         {
3323
                              E T6N, Ta6, T6S, Ta7;
3324
                              {
3325
                                   E T6L, T6M, T6P, T6R;
3326
                                   T6L = ri[WS(rs, 19)];
3327
                                   T6M = ii[WS(rs, 19)];
3328
                                   T6N = FMA(T2z, T6L, T2C * T6M);
3329
                                   Ta6 = FNMS(T2C, T6L, T2z * T6M);
3330
                                   T6P = ri[WS(rs, 51)];
3331
                                   T6R = ii[WS(rs, 51)];
3332
                                   T6S = FMA(T6O, T6P, T6Q * T6R);
3333
                                   Ta7 = FNMS(T6Q, T6P, T6O * T6R);
3334
                              }
3335
                              T6T = T6N + T6S;
3336
                              Tf7 = Ta6 + Ta7;
3337
                              Ta3 = T6N - T6S;
3338
                              Ta8 = Ta6 - Ta7;
3339
                         }
3340
                         {
3341
                              E T6Z, Tab, T72, Tac;
3342
                              {
3343
                                   E T6W, T6Y, T70, T71;
3344
                                   T6W = ri[WS(rs, 59)];
3345
                                   T6Y = ii[WS(rs, 59)];
3346
                                   T6Z = FMA(T6V, T6W, T6X * T6Y);
3347
                                   Tab = FNMS(T6X, T6W, T6V * T6Y);
3348
                                   T70 = ri[WS(rs, 27)];
3349
                                   T71 = ii[WS(rs, 27)];
3350
                                   T72 = FMA(Th, T70, Tm * T71);
3351
                                   Tac = FNMS(Tm, T70, Th * T71);
3352
                              }
3353
                              T73 = T6Z + T72;
3354
                              Tfc = Tab + Tac;
3355
                              Tad = Tab - Tac;
3356
                              Tag = T6Z - T72;
3357
                         }
3358
                         {
3359
                              E T6U, T7d, Tfb, Tfe;
3360
                              T6U = T6K + T6T;
3361
                              T7d = T73 + T7c;
3362
                              T7e = T6U + T7d;
3363
                              Th9 = T7d - T6U;
3364
                              Tfb = T73 - T7c;
3365
                              Tfe = Tfc - Tfd;
3366
                              Tff = Tfb + Tfe;
3367
                              Tfn = Tfb - Tfe;
3368
                         }
3369
                         {
3370
                              E Th2, Th3, Ta4, Ta9;
3371
                              Th2 = Tf6 + Tf7;
3372
                              Th3 = Tfc + Tfd;
3373
                              Th4 = Th2 - Th3;
3374
                              ThJ = Th2 + Th3;
3375
                              Ta4 = Ta2 + Ta3;
3376
                              Ta9 = Ta5 - Ta8;
3377
                              Taa = FNMS(KP923879532, Ta9, KP382683432 * Ta4);
3378
                              Tay = FMA(KP923879532, Ta4, KP382683432 * Ta9);
3379
                         }
3380
                         {
3381
                              E Taf, Tak, TcB, TcC;
3382
                              Taf = Tad + Tae;
3383
                              Tak = Tag - Taj;
3384
                              Tal = FMA(KP382683432, Taf, KP923879532 * Tak);
3385
                              Tax = FNMS(KP923879532, Taf, KP382683432 * Tak);
3386
                              TcB = Tad - Tae;
3387
                              TcC = Tag + Taj;
3388
                              TcD = FMA(KP923879532, TcB, KP382683432 * TcC);
3389
                              TcJ = FNMS(KP382683432, TcB, KP923879532 * TcC);
3390
                         }
3391
                         {
3392
                              E Tf8, Tf9, Tcy, Tcz;
3393
                              Tf8 = Tf6 - Tf7;
3394
                              Tf9 = T6K - T6T;
3395
                              Tfa = Tf8 - Tf9;
3396
                              Tfo = Tf9 + Tf8;
3397
                              Tcy = Ta2 - Ta3;
3398
                              Tcz = Ta5 + Ta8;
3399
                              TcA = FNMS(KP382683432, Tcz, KP923879532 * Tcy);
3400
                              TcK = FMA(KP382683432, Tcy, KP923879532 * Tcz);
3401
                         }
3402
                    }
3403
                    {
3404
                         E T2L, Thx, ThU, ThV, Ti5, Tib, T4s, Tia, T7g, Ti7, ThG, ThO, ThL, ThP, ThA;
3405
                         E ThW;
3406
                         {
3407
                              E T1L, T2K, ThS, ThT;
3408
                              T1L = T17 + T1K;
3409
                              T2K = T2e + T2J;
3410
                              T2L = T1L + T2K;
3411
                              Thx = T1L - T2K;
3412
                              ThS = ThD + ThE;
3413
                              ThT = ThI + ThJ;
3414
                              ThU = ThS - ThT;
3415
                              ThV = ThS + ThT;
3416
                         }
3417
                         {
3418
                              E ThX, Ti4, T3C, T4r;
3419
                              ThX = TgA + TgB;
3420
                              Ti4 = ThY + Ti3;
3421
                              Ti5 = ThX + Ti4;
3422
                              Tib = Ti4 - ThX;
3423
                              T3C = T36 + T3B;
3424
                              T4r = T45 + T4q;
3425
                              T4s = T3C + T4r;
3426
                              Tia = T4r - T3C;
3427
                         }
3428
                         {
3429
                              E T5S, T7f, ThC, ThF;
3430
                              T5S = T5c + T5R;
3431
                              T7f = T6B + T7e;
3432
                              T7g = T5S + T7f;
3433
                              Ti7 = T7f - T5S;
3434
                              ThC = T5c - T5R;
3435
                              ThF = ThD - ThE;
3436
                              ThG = ThC + ThF;
3437
                              ThO = ThF - ThC;
3438
                         }
3439
                         {
3440
                              E ThH, ThK, Thy, Thz;
3441
                              ThH = T6B - T7e;
3442
                              ThK = ThI - ThJ;
3443
                              ThL = ThH - ThK;
3444
                              ThP = ThH + ThK;
3445
                              Thy = TgE + TgF;
3446
                              Thz = TgK + TgL;
3447
                              ThA = Thy - Thz;
3448
                              ThW = Thy + Thz;
3449
                         }
3450
                         {
3451
                              E T4t, Ti6, ThR, Ti8;
3452
                              T4t = T2L + T4s;
3453
                              ri[WS(rs, 32)] = T4t - T7g;
3454
                              ri[0] = T4t + T7g;
3455
                              Ti6 = ThW + Ti5;
3456
                              ii[0] = ThV + Ti6;
3457
                              ii[WS(rs, 32)] = Ti6 - ThV;
3458
                              ThR = T2L - T4s;
3459
                              ri[WS(rs, 48)] = ThR - ThU;
3460
                              ri[WS(rs, 16)] = ThR + ThU;
3461
                              Ti8 = Ti5 - ThW;
3462
                              ii[WS(rs, 16)] = Ti7 + Ti8;
3463
                              ii[WS(rs, 48)] = Ti8 - Ti7;
3464
                         }
3465
                         {
3466
                              E ThB, ThM, Ti9, Tic;
3467
                              ThB = Thx + ThA;
3468
                              ThM = KP707106781 * (ThG + ThL);
3469
                              ri[WS(rs, 40)] = ThB - ThM;
3470
                              ri[WS(rs, 8)] = ThB + ThM;
3471
                              Ti9 = KP707106781 * (ThO + ThP);
3472
                              Tic = Tia + Tib;
3473
                              ii[WS(rs, 8)] = Ti9 + Tic;
3474
                              ii[WS(rs, 40)] = Tic - Ti9;
3475
                         }
3476
                         {
3477
                              E ThN, ThQ, Tid, Tie;
3478
                              ThN = Thx - ThA;
3479
                              ThQ = KP707106781 * (ThO - ThP);
3480
                              ri[WS(rs, 56)] = ThN - ThQ;
3481
                              ri[WS(rs, 24)] = ThN + ThQ;
3482
                              Tid = KP707106781 * (ThL - ThG);
3483
                              Tie = Tib - Tia;
3484
                              ii[WS(rs, 24)] = Tid + Tie;
3485
                              ii[WS(rs, 56)] = Tie - Tid;
3486
                         }
3487
                    }
3488
                    {
3489
                         E TgD, Thh, Thr, Thv, Tij, Tip, TgO, Tig, Th0, The, Thk, Tio, Tho, Thu, Thb;
3490
                         E Thf;
3491
                         {
3492
                              E Tgz, TgC, Thp, Thq;
3493
                              Tgz = T17 - T1K;
3494
                              TgC = TgA - TgB;
3495
                              TgD = Tgz - TgC;
3496
                              Thh = Tgz + TgC;
3497
                              Thp = Th1 + Th4;
3498
                              Thq = Th8 + Th9;
3499
                              Thr = FNMS(KP382683432, Thq, KP923879532 * Thp);
3500
                              Thv = FMA(KP923879532, Thq, KP382683432 * Thp);
3501
                         }
3502
                         {
3503
                              E Tih, Tii, TgI, TgN;
3504
                              Tih = T2J - T2e;
3505
                              Tii = Ti3 - ThY;
3506
                              Tij = Tih + Tii;
3507
                              Tip = Tii - Tih;
3508
                              TgI = TgG - TgH;
3509
                              TgN = TgJ + TgM;
3510
                              TgO = KP707106781 * (TgI - TgN);
3511
                              Tig = KP707106781 * (TgI + TgN);
3512
                         }
3513
                         {
3514
                              E TgU, TgZ, Thi, Thj;
3515
                              TgU = TgS - TgT;
3516
                              TgZ = TgV - TgY;
3517
                              Th0 = FMA(KP923879532, TgU, KP382683432 * TgZ);
3518
                              The = FNMS(KP923879532, TgZ, KP382683432 * TgU);
3519
                              Thi = TgH + TgG;
3520
                              Thj = TgJ - TgM;
3521
                              Thk = KP707106781 * (Thi + Thj);
3522
                              Tio = KP707106781 * (Thj - Thi);
3523
                         }
3524
                         {
3525
                              E Thm, Thn, Th5, Tha;
3526
                              Thm = TgS + TgT;
3527
                              Thn = TgV + TgY;
3528
                              Tho = FMA(KP382683432, Thm, KP923879532 * Thn);
3529
                              Thu = FNMS(KP382683432, Thn, KP923879532 * Thm);
3530
                              Th5 = Th1 - Th4;
3531
                              Tha = Th8 - Th9;
3532
                              Thb = FNMS(KP923879532, Tha, KP382683432 * Th5);
3533
                              Thf = FMA(KP382683432, Tha, KP923879532 * Th5);
3534
                         }
3535
                         {
3536
                              E TgP, Thc, Tin, Tiq;
3537
                              TgP = TgD + TgO;
3538
                              Thc = Th0 + Thb;
3539
                              ri[WS(rs, 44)] = TgP - Thc;
3540
                              ri[WS(rs, 12)] = TgP + Thc;
3541
                              Tin = The + Thf;
3542
                              Tiq = Tio + Tip;
3543
                              ii[WS(rs, 12)] = Tin + Tiq;
3544
                              ii[WS(rs, 44)] = Tiq - Tin;
3545
                         }
3546
                         {
3547
                              E Thd, Thg, Tir, Tis;
3548
                              Thd = TgD - TgO;
3549
                              Thg = The - Thf;
3550
                              ri[WS(rs, 60)] = Thd - Thg;
3551
                              ri[WS(rs, 28)] = Thd + Thg;
3552
                              Tir = Thb - Th0;
3553
                              Tis = Tip - Tio;
3554
                              ii[WS(rs, 28)] = Tir + Tis;
3555
                              ii[WS(rs, 60)] = Tis - Tir;
3556
                         }
3557
                         {
3558
                              E Thl, Ths, Tif, Tik;
3559
                              Thl = Thh + Thk;
3560
                              Ths = Tho + Thr;
3561
                              ri[WS(rs, 36)] = Thl - Ths;
3562
                              ri[WS(rs, 4)] = Thl + Ths;
3563
                              Tif = Thu + Thv;
3564
                              Tik = Tig + Tij;
3565
                              ii[WS(rs, 4)] = Tif + Tik;
3566
                              ii[WS(rs, 36)] = Tik - Tif;
3567
                         }
3568
                         {
3569
                              E Tht, Thw, Til, Tim;
3570
                              Tht = Thh - Thk;
3571
                              Thw = Thu - Thv;
3572
                              ri[WS(rs, 52)] = Tht - Thw;
3573
                              ri[WS(rs, 20)] = Tht + Thw;
3574
                              Til = Thr - Tho;
3575
                              Tim = Tij - Tig;
3576
                              ii[WS(rs, 20)] = Til + Tim;
3577
                              ii[WS(rs, 52)] = Tim - Til;
3578
                         }
3579
                    }
3580
                    {
3581
                         E Teb, Tfx, Tey, TiK, TiN, TiT, TfA, TiS, Tfr, TfL, Tfv, TfH, Tf0, TfK, Tfu;
3582
                         E TfE;
3583
                         {
3584
                              E TdZ, Tea, Tfy, Tfz;
3585
                              TdZ = TdV - TdY;
3586
                              Tea = KP707106781 * (Te4 - Te9);
3587
                              Teb = TdZ - Tea;
3588
                              Tfx = TdZ + Tea;
3589
                              {
3590
                                   E Tem, Tex, TiL, TiM;
3591
                                   Tem = FNMS(KP923879532, Tel, KP382683432 * Teg);
3592
                                   Tex = FMA(KP382683432, Ter, KP923879532 * Tew);
3593
                                   Tey = Tem - Tex;
3594
                                   TiK = Tem + Tex;
3595
                                   TiL = KP707106781 * (TfP - TfO);
3596
                                   TiM = Tix - Tiw;
3597
                                   TiN = TiL + TiM;
3598
                                   TiT = TiM - TiL;
3599
                              }
3600
                              Tfy = FMA(KP923879532, Teg, KP382683432 * Tel);
3601
                              Tfz = FNMS(KP923879532, Ter, KP382683432 * Tew);
3602
                              TfA = Tfy + Tfz;
3603
                              TiS = Tfz - Tfy;
3604
                              {
3605
                                   E Tfh, TfF, Tfq, TfG, Tfg, Tfp;
3606
                                   Tfg = KP707106781 * (Tfa - Tff);
3607
                                   Tfh = Tf5 - Tfg;
3608
                                   TfF = Tf5 + Tfg;
3609
                                   Tfp = KP707106781 * (Tfn - Tfo);
3610
                                   Tfq = Tfm - Tfp;
3611
                                   TfG = Tfm + Tfp;
3612
                                   Tfr = FNMS(KP980785280, Tfq, KP195090322 * Tfh);
3613
                                   TfL = FMA(KP831469612, TfG, KP555570233 * TfF);
3614
                                   Tfv = FMA(KP195090322, Tfq, KP980785280 * Tfh);
3615
                                   TfH = FNMS(KP555570233, TfG, KP831469612 * TfF);
3616
                              }
3617
                              {
3618
                                   E TeQ, TfC, TeZ, TfD, TeP, TeY;
3619
                                   TeP = KP707106781 * (TeJ - TeO);
3620
                                   TeQ = TeE - TeP;
3621
                                   TfC = TeE + TeP;
3622
                                   TeY = KP707106781 * (TeW - TeX);
3623
                                   TeZ = TeV - TeY;
3624
                                   TfD = TeV + TeY;
3625
                                   Tf0 = FMA(KP980785280, TeQ, KP195090322 * TeZ);
3626
                                   TfK = FNMS(KP555570233, TfD, KP831469612 * TfC);
3627
                                   Tfu = FNMS(KP980785280, TeZ, KP195090322 * TeQ);
3628
                                   TfE = FMA(KP555570233, TfC, KP831469612 * TfD);
3629
                              }
3630
                         }
3631
                         {
3632
                              E Tez, Tfs, TiR, TiU;
3633
                              Tez = Teb + Tey;
3634
                              Tfs = Tf0 + Tfr;
3635
                              ri[WS(rs, 46)] = Tez - Tfs;
3636
                              ri[WS(rs, 14)] = Tez + Tfs;
3637
                              TiR = Tfu + Tfv;
3638
                              TiU = TiS + TiT;
3639
                              ii[WS(rs, 14)] = TiR + TiU;
3640
                              ii[WS(rs, 46)] = TiU - TiR;
3641
                         }
3642
                         {
3643
                              E Tft, Tfw, TiV, TiW;
3644
                              Tft = Teb - Tey;
3645
                              Tfw = Tfu - Tfv;
3646
                              ri[WS(rs, 62)] = Tft - Tfw;
3647
                              ri[WS(rs, 30)] = Tft + Tfw;
3648
                              TiV = Tfr - Tf0;
3649
                              TiW = TiT - TiS;
3650
                              ii[WS(rs, 30)] = TiV + TiW;
3651
                              ii[WS(rs, 62)] = TiW - TiV;
3652
                         }
3653
                         {
3654
                              E TfB, TfI, TiJ, TiO;
3655
                              TfB = Tfx + TfA;
3656
                              TfI = TfE + TfH;
3657
                              ri[WS(rs, 38)] = TfB - TfI;
3658
                              ri[WS(rs, 6)] = TfB + TfI;
3659
                              TiJ = TfK + TfL;
3660
                              TiO = TiK + TiN;
3661
                              ii[WS(rs, 6)] = TiJ + TiO;
3662
                              ii[WS(rs, 38)] = TiO - TiJ;
3663
                         }
3664
                         {
3665
                              E TfJ, TfM, TiP, TiQ;
3666
                              TfJ = Tfx - TfA;
3667
                              TfM = TfK - TfL;
3668
                              ri[WS(rs, 54)] = TfJ - TfM;
3669
                              ri[WS(rs, 22)] = TfJ + TfM;
3670
                              TiP = TfH - TfE;
3671
                              TiQ = TiN - TiK;
3672
                              ii[WS(rs, 22)] = TiP + TiQ;
3673
                              ii[WS(rs, 54)] = TiQ - TiP;
3674
                         }
3675
                    }
3676
                    {
3677
                         E TfR, Tgj, TfY, Tiu, Tiz, TiF, Tgm, TiE, Tgd, Tgx, Tgh, Tgt, Tg6, Tgw, Tgg;
3678
                         E Tgq;
3679
                         {
3680
                              E TfN, TfQ, Tgk, Tgl;
3681
                              TfN = TdV + TdY;
3682
                              TfQ = KP707106781 * (TfO + TfP);
3683
                              TfR = TfN - TfQ;
3684
                              Tgj = TfN + TfQ;
3685
                              {
3686
                                   E TfU, TfX, Tiv, Tiy;
3687
                                   TfU = FNMS(KP382683432, TfT, KP923879532 * TfS);
3688
                                   TfX = FMA(KP923879532, TfV, KP382683432 * TfW);
3689
                                   TfY = TfU - TfX;
3690
                                   Tiu = TfU + TfX;
3691
                                   Tiv = KP707106781 * (Te4 + Te9);
3692
                                   Tiy = Tiw + Tix;
3693
                                   Tiz = Tiv + Tiy;
3694
                                   TiF = Tiy - Tiv;
3695
                              }
3696
                              Tgk = FMA(KP382683432, TfS, KP923879532 * TfT);
3697
                              Tgl = FNMS(KP382683432, TfV, KP923879532 * TfW);
3698
                              Tgm = Tgk + Tgl;
3699
                              TiE = Tgl - Tgk;
3700
                              {
3701
                                   E Tg9, Tgr, Tgc, Tgs, Tg8, Tgb;
3702
                                   Tg8 = KP707106781 * (Tfo + Tfn);
3703
                                   Tg9 = Tg7 - Tg8;
3704
                                   Tgr = Tg7 + Tg8;
3705
                                   Tgb = KP707106781 * (Tfa + Tff);
3706
                                   Tgc = Tga - Tgb;
3707
                                   Tgs = Tga + Tgb;
3708
                                   Tgd = FNMS(KP831469612, Tgc, KP555570233 * Tg9);
3709
                                   Tgx = FMA(KP195090322, Tgr, KP980785280 * Tgs);
3710
                                   Tgh = FMA(KP831469612, Tg9, KP555570233 * Tgc);
3711
                                   Tgt = FNMS(KP195090322, Tgs, KP980785280 * Tgr);
3712
                              }
3713
                              {
3714
                                   E Tg2, Tgo, Tg5, Tgp, Tg1, Tg4;
3715
                                   Tg1 = KP707106781 * (TeO + TeJ);
3716
                                   Tg2 = Tg0 - Tg1;
3717
                                   Tgo = Tg0 + Tg1;
3718
                                   Tg4 = KP707106781 * (TeW + TeX);
3719
                                   Tg5 = Tg3 - Tg4;
3720
                                   Tgp = Tg3 + Tg4;
3721
                                   Tg6 = FMA(KP555570233, Tg2, KP831469612 * Tg5);
3722
                                   Tgw = FNMS(KP195090322, Tgo, KP980785280 * Tgp);
3723
                                   Tgg = FNMS(KP831469612, Tg2, KP555570233 * Tg5);
3724
                                   Tgq = FMA(KP980785280, Tgo, KP195090322 * Tgp);
3725
                              }
3726
                         }
3727
                         {
3728
                              E TfZ, Tge, TiD, TiG;
3729
                              TfZ = TfR + TfY;
3730
                              Tge = Tg6 + Tgd;
3731
                              ri[WS(rs, 42)] = TfZ - Tge;
3732
                              ri[WS(rs, 10)] = TfZ + Tge;
3733
                              TiD = Tgg + Tgh;
3734
                              TiG = TiE + TiF;
3735
                              ii[WS(rs, 10)] = TiD + TiG;
3736
                              ii[WS(rs, 42)] = TiG - TiD;
3737
                         }
3738
                         {
3739
                              E Tgf, Tgi, TiH, TiI;
3740
                              Tgf = TfR - TfY;
3741
                              Tgi = Tgg - Tgh;
3742
                              ri[WS(rs, 58)] = Tgf - Tgi;
3743
                              ri[WS(rs, 26)] = Tgf + Tgi;
3744
                              TiH = Tgd - Tg6;
3745
                              TiI = TiF - TiE;
3746
                              ii[WS(rs, 26)] = TiH + TiI;
3747
                              ii[WS(rs, 58)] = TiI - TiH;
3748
                         }
3749
                         {
3750
                              E Tgn, Tgu, Tit, TiA;
3751
                              Tgn = Tgj + Tgm;
3752
                              Tgu = Tgq + Tgt;
3753
                              ri[WS(rs, 34)] = Tgn - Tgu;
3754
                              ri[WS(rs, 2)] = Tgn + Tgu;
3755
                              Tit = Tgw + Tgx;
3756
                              TiA = Tiu + Tiz;
3757
                              ii[WS(rs, 2)] = Tit + TiA;
3758
                              ii[WS(rs, 34)] = TiA - Tit;
3759
                         }
3760
                         {
3761
                              E Tgv, Tgy, TiB, TiC;
3762
                              Tgv = Tgj - Tgm;
3763
                              Tgy = Tgw - Tgx;
3764
                              ri[WS(rs, 50)] = Tgv - Tgy;
3765
                              ri[WS(rs, 18)] = Tgv + Tgy;
3766
                              TiB = Tgt - Tgq;
3767
                              TiC = Tiz - Tiu;
3768
                              ii[WS(rs, 18)] = TiB + TiC;
3769
                              ii[WS(rs, 50)] = TiC - TiB;
3770
                         }
3771
                    }
3772
                    {
3773
                         E T7V, TaH, TjN, TjT, T8O, TjS, TaK, TjK, T9I, TaU, TaE, TaO, TaB, TaV, TaF;
3774
                         E TaR;
3775
                         {
3776
                              E T7x, T7U, TjL, TjM;
3777
                              T7x = T7l - T7w;
3778
                              T7U = T7I - T7T;
3779
                              T7V = T7x - T7U;
3780
                              TaH = T7x + T7U;
3781
                              TjL = TaZ - TaY;
3782
                              TjM = Tjx - Tjw;
3783
                              TjN = TjL + TjM;
3784
                              TjT = TjM - TjL;
3785
                         }
3786
                         {
3787
                              E T8m, TaI, T8N, TaJ;
3788
                              {
3789
                                   E T8c, T8l, T8D, T8M;
3790
                                   T8c = T80 - T8b;
3791
                                   T8l = T8h - T8k;
3792
                                   T8m = FNMS(KP980785280, T8l, KP195090322 * T8c);
3793
                                   TaI = FMA(KP980785280, T8c, KP195090322 * T8l);
3794
                                   T8D = T8r - T8C;
3795
                                   T8M = T8I - T8L;
3796
                                   T8N = FMA(KP195090322, T8D, KP980785280 * T8M);
3797
                                   TaJ = FNMS(KP980785280, T8D, KP195090322 * T8M);
3798
                              }
3799
                              T8O = T8m - T8N;
3800
                              TjS = TaJ - TaI;
3801
                              TaK = TaI + TaJ;
3802
                              TjK = T8m + T8N;
3803
                         }
3804
                         {
3805
                              E T9u, TaM, T9H, TaN;
3806
                              {
3807
                                   E T96, T9t, T9D, T9G;
3808
                                   T96 = T8U - T95;
3809
                                   T9t = T9h - T9s;
3810
                                   T9u = T96 - T9t;
3811
                                   TaM = T96 + T9t;
3812
                                   T9D = T9z - T9C;
3813
                                   T9G = T9E - T9F;
3814
                                   T9H = T9D - T9G;
3815
                                   TaN = T9D + T9G;
3816
                              }
3817
                              T9I = FMA(KP995184726, T9u, KP098017140 * T9H);
3818
                              TaU = FNMS(KP634393284, TaN, KP773010453 * TaM);
3819
                              TaE = FNMS(KP995184726, T9H, KP098017140 * T9u);
3820
                              TaO = FMA(KP634393284, TaM, KP773010453 * TaN);
3821
                         }
3822
                         {
3823
                              E Tan, TaP, TaA, TaQ;
3824
                              {
3825
                                   E T9Z, Tam, Taw, Taz;
3826
                                   T9Z = T9N - T9Y;
3827
                                   Tam = Taa - Tal;
3828
                                   Tan = T9Z - Tam;
3829
                                   TaP = T9Z + Tam;
3830
                                   Taw = Tas - Tav;
3831
                                   Taz = Tax - Tay;
3832
                                   TaA = Taw - Taz;
3833
                                   TaQ = Taw + Taz;
3834
                              }
3835
                              TaB = FNMS(KP995184726, TaA, KP098017140 * Tan);
3836
                              TaV = FMA(KP773010453, TaQ, KP634393284 * TaP);
3837
                              TaF = FMA(KP098017140, TaA, KP995184726 * Tan);
3838
                              TaR = FNMS(KP634393284, TaQ, KP773010453 * TaP);
3839
                         }
3840
                         {
3841
                              E T8P, TaC, TjR, TjU;
3842
                              T8P = T7V + T8O;
3843
                              TaC = T9I + TaB;
3844
                              ri[WS(rs, 47)] = T8P - TaC;
3845
                              ri[WS(rs, 15)] = T8P + TaC;
3846
                              TjR = TaE + TaF;
3847
                              TjU = TjS + TjT;
3848
                              ii[WS(rs, 15)] = TjR + TjU;
3849
                              ii[WS(rs, 47)] = TjU - TjR;
3850
                         }
3851
                         {
3852
                              E TaD, TaG, TjV, TjW;
3853
                              TaD = T7V - T8O;
3854
                              TaG = TaE - TaF;
3855
                              ri[WS(rs, 63)] = TaD - TaG;
3856
                              ri[WS(rs, 31)] = TaD + TaG;
3857
                              TjV = TaB - T9I;
3858
                              TjW = TjT - TjS;
3859
                              ii[WS(rs, 31)] = TjV + TjW;
3860
                              ii[WS(rs, 63)] = TjW - TjV;
3861
                         }
3862
                         {
3863
                              E TaL, TaS, TjJ, TjO;
3864
                              TaL = TaH + TaK;
3865
                              TaS = TaO + TaR;
3866
                              ri[WS(rs, 39)] = TaL - TaS;
3867
                              ri[WS(rs, 7)] = TaL + TaS;
3868
                              TjJ = TaU + TaV;
3869
                              TjO = TjK + TjN;
3870
                              ii[WS(rs, 7)] = TjJ + TjO;
3871
                              ii[WS(rs, 39)] = TjO - TjJ;
3872
                         }
3873
                         {
3874
                              E TaT, TaW, TjP, TjQ;
3875
                              TaT = TaH - TaK;
3876
                              TaW = TaU - TaV;
3877
                              ri[WS(rs, 55)] = TaT - TaW;
3878
                              ri[WS(rs, 23)] = TaT + TaW;
3879
                              TjP = TaR - TaO;
3880
                              TjQ = TjN - TjK;
3881
                              ii[WS(rs, 23)] = TjP + TjQ;
3882
                              ii[WS(rs, 55)] = TjQ - TjP;
3883
                         }
3884
                    }
3885
                    {
3886
                         E TbV, TcT, Tjj, Tjp, Tca, Tjo, TcW, Tjg, Tcu, Td6, TcQ, Td0, TcN, Td7, TcR;
3887
                         E Td3;
3888
                         {
3889
                              E TbN, TbU, Tjh, Tji;
3890
                              TbN = TbJ - TbM;
3891
                              TbU = TbQ - TbT;
3892
                              TbV = TbN - TbU;
3893
                              TcT = TbN + TbU;
3894
                              Tjh = Tdb - Tda;
3895
                              Tji = Tj3 - Tj0;
3896
                              Tjj = Tjh + Tji;
3897
                              Tjp = Tji - Tjh;
3898
                         }
3899
                         {
3900
                              E Tc2, TcU, Tc9, TcV;
3901
                              {
3902
                                   E TbY, Tc1, Tc5, Tc8;
3903
                                   TbY = TbW - TbX;
3904
                                   Tc1 = TbZ - Tc0;
3905
                                   Tc2 = FNMS(KP831469612, Tc1, KP555570233 * TbY);
3906
                                   TcU = FMA(KP555570233, Tc1, KP831469612 * TbY);
3907
                                   Tc5 = Tc3 - Tc4;
3908
                                   Tc8 = Tc6 - Tc7;
3909
                                   Tc9 = FMA(KP831469612, Tc5, KP555570233 * Tc8);
3910
                                   TcV = FNMS(KP831469612, Tc8, KP555570233 * Tc5);
3911
                              }
3912
                              Tca = Tc2 - Tc9;
3913
                              Tjo = TcV - TcU;
3914
                              TcW = TcU + TcV;
3915
                              Tjg = Tc2 + Tc9;
3916
                         }
3917
                         {
3918
                              E Tcm, TcY, Tct, TcZ;
3919
                              {
3920
                                   E Tce, Tcl, Tcp, Tcs;
3921
                                   Tce = Tcc - Tcd;
3922
                                   Tcl = Tch - Tck;
3923
                                   Tcm = Tce - Tcl;
3924
                                   TcY = Tce + Tcl;
3925
                                   Tcp = Tcn - Tco;
3926
                                   Tcs = Tcq - Tcr;
3927
                                   Tct = Tcp - Tcs;
3928
                                   TcZ = Tcp + Tcs;
3929
                              }
3930
                              Tcu = FMA(KP956940335, Tcm, KP290284677 * Tct);
3931
                              Td6 = FNMS(KP471396736, TcZ, KP881921264 * TcY);
3932
                              TcQ = FNMS(KP956940335, Tct, KP290284677 * Tcm);
3933
                              Td0 = FMA(KP471396736, TcY, KP881921264 * TcZ);
3934
                         }
3935
                         {
3936
                              E TcF, Td1, TcM, Td2;
3937
                              {
3938
                                   E Tcx, TcE, TcI, TcL;
3939
                                   Tcx = Tcv - Tcw;
3940
                                   TcE = TcA - TcD;
3941
                                   TcF = Tcx - TcE;
3942
                                   Td1 = Tcx + TcE;
3943
                                   TcI = TcG - TcH;
3944
                                   TcL = TcJ - TcK;
3945
                                   TcM = TcI - TcL;
3946
                                   Td2 = TcI + TcL;
3947
                              }
3948
                              TcN = FNMS(KP956940335, TcM, KP290284677 * TcF);
3949
                              Td7 = FMA(KP881921264, Td2, KP471396736 * Td1);
3950
                              TcR = FMA(KP290284677, TcM, KP956940335 * TcF);
3951
                              Td3 = FNMS(KP471396736, Td2, KP881921264 * Td1);
3952
                         }
3953
                         {
3954
                              E Tcb, TcO, Tjn, Tjq;
3955
                              Tcb = TbV + Tca;
3956
                              TcO = Tcu + TcN;
3957
                              ri[WS(rs, 45)] = Tcb - TcO;
3958
                              ri[WS(rs, 13)] = Tcb + TcO;
3959
                              Tjn = TcQ + TcR;
3960
                              Tjq = Tjo + Tjp;
3961
                              ii[WS(rs, 13)] = Tjn + Tjq;
3962
                              ii[WS(rs, 45)] = Tjq - Tjn;
3963
                         }
3964
                         {
3965
                              E TcP, TcS, Tjr, Tjs;
3966
                              TcP = TbV - Tca;
3967
                              TcS = TcQ - TcR;
3968
                              ri[WS(rs, 61)] = TcP - TcS;
3969
                              ri[WS(rs, 29)] = TcP + TcS;
3970
                              Tjr = TcN - Tcu;
3971
                              Tjs = Tjp - Tjo;
3972
                              ii[WS(rs, 29)] = Tjr + Tjs;
3973
                              ii[WS(rs, 61)] = Tjs - Tjr;
3974
                         }
3975
                         {
3976
                              E TcX, Td4, Tjf, Tjk;
3977
                              TcX = TcT + TcW;
3978
                              Td4 = Td0 + Td3;
3979
                              ri[WS(rs, 37)] = TcX - Td4;
3980
                              ri[WS(rs, 5)] = TcX + Td4;
3981
                              Tjf = Td6 + Td7;
3982
                              Tjk = Tjg + Tjj;
3983
                              ii[WS(rs, 5)] = Tjf + Tjk;
3984
                              ii[WS(rs, 37)] = Tjk - Tjf;
3985
                         }
3986
                         {
3987
                              E Td5, Td8, Tjl, Tjm;
3988
                              Td5 = TcT - TcW;
3989
                              Td8 = Td6 - Td7;
3990
                              ri[WS(rs, 53)] = Td5 - Td8;
3991
                              ri[WS(rs, 21)] = Td5 + Td8;
3992
                              Tjl = Td3 - Td0;
3993
                              Tjm = Tjj - Tjg;
3994
                              ii[WS(rs, 21)] = Tjl + Tjm;
3995
                              ii[WS(rs, 53)] = Tjm - Tjl;
3996
                         }
3997
                    }
3998
                    {
3999
                         E Tdd, TdF, Tj5, Tjb, Tdk, Tja, TdI, TiY, Tds, TdS, TdC, TdM, Tdz, TdT, TdD;
4000
                         E TdP;
4001
                         {
4002
                              E Td9, Tdc, TiZ, Tj4;
4003
                              Td9 = TbJ + TbM;
4004
                              Tdc = Tda + Tdb;
4005
                              Tdd = Td9 - Tdc;
4006
                              TdF = Td9 + Tdc;
4007
                              TiZ = TbQ + TbT;
4008
                              Tj4 = Tj0 + Tj3;
4009
                              Tj5 = TiZ + Tj4;
4010
                              Tjb = Tj4 - TiZ;
4011
                         }
4012
                         {
4013
                              E Tdg, TdG, Tdj, TdH;
4014
                              {
4015
                                   E Tde, Tdf, Tdh, Tdi;
4016
                                   Tde = TbW + TbX;
4017
                                   Tdf = TbZ + Tc0;
4018
                                   Tdg = FNMS(KP195090322, Tdf, KP980785280 * Tde);
4019
                                   TdG = FMA(KP980785280, Tdf, KP195090322 * Tde);
4020
                                   Tdh = Tc3 + Tc4;
4021
                                   Tdi = Tc6 + Tc7;
4022
                                   Tdj = FMA(KP195090322, Tdh, KP980785280 * Tdi);
4023
                                   TdH = FNMS(KP195090322, Tdi, KP980785280 * Tdh);
4024
                              }
4025
                              Tdk = Tdg - Tdj;
4026
                              Tja = TdH - TdG;
4027
                              TdI = TdG + TdH;
4028
                              TiY = Tdg + Tdj;
4029
                         }
4030
                         {
4031
                              E Tdo, TdK, Tdr, TdL;
4032
                              {
4033
                                   E Tdm, Tdn, Tdp, Tdq;
4034
                                   Tdm = Tcn + Tco;
4035
                                   Tdn = Tck + Tch;
4036
                                   Tdo = Tdm - Tdn;
4037
                                   TdK = Tdm + Tdn;
4038
                                   Tdp = Tcc + Tcd;
4039
                                   Tdq = Tcq + Tcr;
4040
                                   Tdr = Tdp - Tdq;
4041
                                   TdL = Tdp + Tdq;
4042
                              }
4043
                              Tds = FMA(KP634393284, Tdo, KP773010453 * Tdr);
4044
                              TdS = FNMS(KP098017140, TdK, KP995184726 * TdL);
4045
                              TdC = FNMS(KP773010453, Tdo, KP634393284 * Tdr);
4046
                              TdM = FMA(KP995184726, TdK, KP098017140 * TdL);
4047
                         }
4048
                         {
4049
                              E Tdv, TdN, Tdy, TdO;
4050
                              {
4051
                                   E Tdt, Tdu, Tdw, Tdx;
4052
                                   Tdt = Tcv + Tcw;
4053
                                   Tdu = TcK + TcJ;
4054
                                   Tdv = Tdt - Tdu;
4055
                                   TdN = Tdt + Tdu;
4056
                                   Tdw = TcG + TcH;
4057
                                   Tdx = TcA + TcD;
4058
                                   Tdy = Tdw - Tdx;
4059
                                   TdO = Tdw + Tdx;
4060
                              }
4061
                              Tdz = FNMS(KP773010453, Tdy, KP634393284 * Tdv);
4062
                              TdT = FMA(KP098017140, TdN, KP995184726 * TdO);
4063
                              TdD = FMA(KP773010453, Tdv, KP634393284 * Tdy);
4064
                              TdP = FNMS(KP098017140, TdO, KP995184726 * TdN);
4065
                         }
4066
                         {
4067
                              E Tdl, TdA, Tj9, Tjc;
4068
                              Tdl = Tdd + Tdk;
4069
                              TdA = Tds + Tdz;
4070
                              ri[WS(rs, 41)] = Tdl - TdA;
4071
                              ri[WS(rs, 9)] = Tdl + TdA;
4072
                              Tj9 = TdC + TdD;
4073
                              Tjc = Tja + Tjb;
4074
                              ii[WS(rs, 9)] = Tj9 + Tjc;
4075
                              ii[WS(rs, 41)] = Tjc - Tj9;
4076
                         }
4077
                         {
4078
                              E TdB, TdE, Tjd, Tje;
4079
                              TdB = Tdd - Tdk;
4080
                              TdE = TdC - TdD;
4081
                              ri[WS(rs, 57)] = TdB - TdE;
4082
                              ri[WS(rs, 25)] = TdB + TdE;
4083
                              Tjd = Tdz - Tds;
4084
                              Tje = Tjb - Tja;
4085
                              ii[WS(rs, 25)] = Tjd + Tje;
4086
                              ii[WS(rs, 57)] = Tje - Tjd;
4087
                         }
4088
                         {
4089
                              E TdJ, TdQ, TiX, Tj6;
4090
                              TdJ = TdF + TdI;
4091
                              TdQ = TdM + TdP;
4092
                              ri[WS(rs, 33)] = TdJ - TdQ;
4093
                              ri[WS(rs, 1)] = TdJ + TdQ;
4094
                              TiX = TdS + TdT;
4095
                              Tj6 = TiY + Tj5;
4096
                              ii[WS(rs, 1)] = TiX + Tj6;
4097
                              ii[WS(rs, 33)] = Tj6 - TiX;
4098
                         }
4099
                         {
4100
                              E TdR, TdU, Tj7, Tj8;
4101
                              TdR = TdF - TdI;
4102
                              TdU = TdS - TdT;
4103
                              ri[WS(rs, 49)] = TdR - TdU;
4104
                              ri[WS(rs, 17)] = TdR + TdU;
4105
                              Tj7 = TdP - TdM;
4106
                              Tj8 = Tj5 - TiY;
4107
                              ii[WS(rs, 17)] = Tj7 + Tj8;
4108
                              ii[WS(rs, 49)] = Tj8 - Tj7;
4109
                         }
4110
                    }
4111
                    {
4112
                         E Tb1, Tbt, Tjz, TjF, Tb8, TjE, Tbw, Tju, Tbg, TbG, Tbq, TbA, Tbn, TbH, Tbr;
4113
                         E TbD;
4114
                         {
4115
                              E TaX, Tb0, Tjv, Tjy;
4116
                              TaX = T7l + T7w;
4117
                              Tb0 = TaY + TaZ;
4118
                              Tb1 = TaX - Tb0;
4119
                              Tbt = TaX + Tb0;
4120
                              Tjv = T7I + T7T;
4121
                              Tjy = Tjw + Tjx;
4122
                              Tjz = Tjv + Tjy;
4123
                              TjF = Tjy - Tjv;
4124
                         }
4125
                         {
4126
                              E Tb4, Tbu, Tb7, Tbv;
4127
                              {
4128
                                   E Tb2, Tb3, Tb5, Tb6;
4129
                                   Tb2 = T80 + T8b;
4130
                                   Tb3 = T8h + T8k;
4131
                                   Tb4 = FNMS(KP555570233, Tb3, KP831469612 * Tb2);
4132
                                   Tbu = FMA(KP555570233, Tb2, KP831469612 * Tb3);
4133
                                   Tb5 = T8r + T8C;
4134
                                   Tb6 = T8I + T8L;
4135
                                   Tb7 = FMA(KP831469612, Tb5, KP555570233 * Tb6);
4136
                                   Tbv = FNMS(KP555570233, Tb5, KP831469612 * Tb6);
4137
                              }
4138
                              Tb8 = Tb4 - Tb7;
4139
                              TjE = Tbv - Tbu;
4140
                              Tbw = Tbu + Tbv;
4141
                              Tju = Tb4 + Tb7;
4142
                         }
4143
                         {
4144
                              E Tbc, Tby, Tbf, Tbz;
4145
                              {
4146
                                   E Tba, Tbb, Tbd, Tbe;
4147
                                   Tba = T9z + T9C;
4148
                                   Tbb = T9s + T9h;
4149
                                   Tbc = Tba - Tbb;
4150
                                   Tby = Tba + Tbb;
4151
                                   Tbd = T8U + T95;
4152
                                   Tbe = T9E + T9F;
4153
                                   Tbf = Tbd - Tbe;
4154
                                   Tbz = Tbd + Tbe;
4155
                              }
4156
                              Tbg = FMA(KP471396736, Tbc, KP881921264 * Tbf);
4157
                              TbG = FNMS(KP290284677, Tby, KP956940335 * Tbz);
4158
                              Tbq = FNMS(KP881921264, Tbc, KP471396736 * Tbf);
4159
                              TbA = FMA(KP956940335, Tby, KP290284677 * Tbz);
4160
                         }
4161
                         {
4162
                              E Tbj, TbB, Tbm, TbC;
4163
                              {
4164
                                   E Tbh, Tbi, Tbk, Tbl;
4165
                                   Tbh = T9N + T9Y;
4166
                                   Tbi = Tay + Tax;
4167
                                   Tbj = Tbh - Tbi;
4168
                                   TbB = Tbh + Tbi;
4169
                                   Tbk = Tas + Tav;
4170
                                   Tbl = Taa + Tal;
4171
                                   Tbm = Tbk - Tbl;
4172
                                   TbC = Tbk + Tbl;
4173
                              }
4174
                              Tbn = FNMS(KP881921264, Tbm, KP471396736 * Tbj);
4175
                              TbH = FMA(KP290284677, TbB, KP956940335 * TbC);
4176
                              Tbr = FMA(KP881921264, Tbj, KP471396736 * Tbm);
4177
                              TbD = FNMS(KP290284677, TbC, KP956940335 * TbB);
4178
                         }
4179
                         {
4180
                              E Tb9, Tbo, TjD, TjG;
4181
                              Tb9 = Tb1 + Tb8;
4182
                              Tbo = Tbg + Tbn;
4183
                              ri[WS(rs, 43)] = Tb9 - Tbo;
4184
                              ri[WS(rs, 11)] = Tb9 + Tbo;
4185
                              TjD = Tbq + Tbr;
4186
                              TjG = TjE + TjF;
4187
                              ii[WS(rs, 11)] = TjD + TjG;
4188
                              ii[WS(rs, 43)] = TjG - TjD;
4189
                         }
4190
                         {
4191
                              E Tbp, Tbs, TjH, TjI;
4192
                              Tbp = Tb1 - Tb8;
4193
                              Tbs = Tbq - Tbr;
4194
                              ri[WS(rs, 59)] = Tbp - Tbs;
4195
                              ri[WS(rs, 27)] = Tbp + Tbs;
4196
                              TjH = Tbn - Tbg;
4197
                              TjI = TjF - TjE;
4198
                              ii[WS(rs, 27)] = TjH + TjI;
4199
                              ii[WS(rs, 59)] = TjI - TjH;
4200
                         }
4201
                         {
4202
                              E Tbx, TbE, Tjt, TjA;
4203
                              Tbx = Tbt + Tbw;
4204
                              TbE = TbA + TbD;
4205
                              ri[WS(rs, 35)] = Tbx - TbE;
4206
                              ri[WS(rs, 3)] = Tbx + TbE;
4207
                              Tjt = TbG + TbH;
4208
                              TjA = Tju + Tjz;
4209
                              ii[WS(rs, 3)] = Tjt + TjA;
4210
                              ii[WS(rs, 35)] = TjA - Tjt;
4211
                         }
4212
                         {
4213
                              E TbF, TbI, TjB, TjC;
4214
                              TbF = Tbt - Tbw;
4215
                              TbI = TbG - TbH;
4216
                              ri[WS(rs, 51)] = TbF - TbI;
4217
                              ri[WS(rs, 19)] = TbF + TbI;
4218
                              TjB = TbD - TbA;
4219
                              TjC = Tjz - Tju;
4220
                              ii[WS(rs, 19)] = TjB + TjC;
4221
                              ii[WS(rs, 51)] = TjC - TjB;
4222
                         }
4223
                    }
4224
               }
4225
          }
4226
     }
4227
}
4228

    
4229
static const tw_instr twinstr[] = {
4230
     {TW_CEXP, 0, 1},
4231
     {TW_CEXP, 0, 3},
4232
     {TW_CEXP, 0, 9},
4233
     {TW_CEXP, 0, 27},
4234
     {TW_CEXP, 0, 63},
4235
     {TW_NEXT, 1, 0}
4236
};
4237

    
4238
static const ct_desc desc = { 64, "t2_64", twinstr, &GENUS, {880, 386, 274, 0}, 0, 0, 0 };
4239

    
4240
void X(codelet_t2_64) (planner *p) {
4241
     X(kdft_dit_register) (p, t2_64, &desc);
4242
}
4243
#endif