To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / fftw-3.3.8 / dft / scalar / codelets / n1_64.c @ 167:bd3cc4d1df30

History | View | Annotate | Download (79.1 KB)

1
/*
2
 * Copyright (c) 2003, 2007-14 Matteo Frigo
3
 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18
 *
19
 */
20

    
21
/* This file was automatically generated --- DO NOT EDIT */
22
/* Generated on Thu May 24 08:04:12 EDT 2018 */
23

    
24
#include "dft/codelet-dft.h"
25

    
26
#if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
27

    
28
/* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 64 -name n1_64 -include dft/scalar/n.h */
29

    
30
/*
31
 * This function contains 912 FP additions, 392 FP multiplications,
32
 * (or, 520 additions, 0 multiplications, 392 fused multiply/add),
33
 * 172 stack variables, 15 constants, and 256 memory accesses
34
 */
35
#include "dft/scalar/n.h"
36

    
37
static void n1_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
38
{
39
     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
40
     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
41
     DK(KP534511135, +0.534511135950791641089685961295362908582039528);
42
     DK(KP303346683, +0.303346683607342391675883946941299872384187453);
43
     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
44
     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
45
     DK(KP820678790, +0.820678790828660330972281985331011598767386482);
46
     DK(KP098491403, +0.098491403357164253077197521291327432293052451);
47
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
48
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
49
     DK(KP668178637, +0.668178637919298919997757686523080761552472251);
50
     DK(KP198912367, +0.198912367379658006911597622644676228597850501);
51
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
52
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
53
     DK(KP414213562, +0.414213562373095048801688724209698078569671875);
54
     {
55
          INT i;
56
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) {
57
               E T37, T7B, T8F, T5Z, Tf, Td9, TbB, TcB, T62, T7C, T2i, TdH, Tah, Tcb, T3e;
58
               E T8G, Tu, TdI, Tak, TbC, Tan, TbD, T2x, Tda, T3m, T65, T7G, T8I, T7J, T8J;
59
               E T3t, T64, TK, Tdd, Tas, Tce, Tav, Tcf, T2N, Tdc, T3G, T6G, T7O, T9k, T7R;
60
               E T9l, T3N, T6H, T1L, TdA, Tbs, Tct, Tdx, Teo, T5j, T6Y, T5Q, T6V, T8y, T9z;
61
               E Tbb, Tcw, T8n, T9C, TZ, Tdf, Taz, Tch, TaC, Tci, T32, Tdg, T3Z, T6J, T7V;
62
               E T9n, T7Y, T9o, T46, T6K, T1g, Tdp, Tb1, Tcm, Tdm, Tej, T4q, T6R, T4X, T6O;
63
               E T8f, T9s, TaK, Tcp, T84, T9v, T1v, Tdn, Tb4, Tcq, Tds, Tek, T4N, T6P, T50;
64
               E T6S, T8i, T9w, TaV, Tcn, T8b, T9t, T20, Tdy, Tbv, Tcx, TdD, Tep, T5G, T6W;
65
               E T5T, T6Z, T8B, T9D, Tbm, Tcu, T8u, T9A;
66
               {
67
                    E T3, T35, T26, T5Y, T6, T5X, T29, T36, Ta, T39, T2d, T38, Td, T3b, T2g;
68
                    E T3c;
69
                    {
70
                         E T1, T2, T24, T25;
71
                         T1 = ri[0];
72
                         T2 = ri[WS(is, 32)];
73
                         T3 = T1 + T2;
74
                         T35 = T1 - T2;
75
                         T24 = ii[0];
76
                         T25 = ii[WS(is, 32)];
77
                         T26 = T24 + T25;
78
                         T5Y = T24 - T25;
79
                    }
80
                    {
81
                         E T4, T5, T27, T28;
82
                         T4 = ri[WS(is, 16)];
83
                         T5 = ri[WS(is, 48)];
84
                         T6 = T4 + T5;
85
                         T5X = T4 - T5;
86
                         T27 = ii[WS(is, 16)];
87
                         T28 = ii[WS(is, 48)];
88
                         T29 = T27 + T28;
89
                         T36 = T27 - T28;
90
                    }
91
                    {
92
                         E T8, T9, T2b, T2c;
93
                         T8 = ri[WS(is, 8)];
94
                         T9 = ri[WS(is, 40)];
95
                         Ta = T8 + T9;
96
                         T39 = T8 - T9;
97
                         T2b = ii[WS(is, 8)];
98
                         T2c = ii[WS(is, 40)];
99
                         T2d = T2b + T2c;
100
                         T38 = T2b - T2c;
101
                    }
102
                    {
103
                         E Tb, Tc, T2e, T2f;
104
                         Tb = ri[WS(is, 56)];
105
                         Tc = ri[WS(is, 24)];
106
                         Td = Tb + Tc;
107
                         T3b = Tb - Tc;
108
                         T2e = ii[WS(is, 56)];
109
                         T2f = ii[WS(is, 24)];
110
                         T2g = T2e + T2f;
111
                         T3c = T2e - T2f;
112
                    }
113
                    {
114
                         E T7, Te, T2a, T2h;
115
                         T37 = T35 - T36;
116
                         T7B = T35 + T36;
117
                         T8F = T5Y - T5X;
118
                         T5Z = T5X + T5Y;
119
                         T7 = T3 + T6;
120
                         Te = Ta + Td;
121
                         Tf = T7 + Te;
122
                         Td9 = T7 - Te;
123
                         {
124
                              E Tbz, TbA, T60, T61;
125
                              Tbz = Td - Ta;
126
                              TbA = T26 - T29;
127
                              TbB = Tbz + TbA;
128
                              TcB = TbA - Tbz;
129
                              T60 = T3b - T3c;
130
                              T61 = T39 + T38;
131
                              T62 = T60 - T61;
132
                              T7C = T61 + T60;
133
                         }
134
                         T2a = T26 + T29;
135
                         T2h = T2d + T2g;
136
                         T2i = T2a + T2h;
137
                         TdH = T2a - T2h;
138
                         {
139
                              E Taf, Tag, T3a, T3d;
140
                              Taf = T3 - T6;
141
                              Tag = T2d - T2g;
142
                              Tah = Taf + Tag;
143
                              Tcb = Taf - Tag;
144
                              T3a = T38 - T39;
145
                              T3d = T3b + T3c;
146
                              T3e = T3a - T3d;
147
                              T8G = T3a + T3d;
148
                         }
149
                    }
150
               }
151
               {
152
                    E Ti, T3j, T2l, T3h, Tl, T3g, T2o, T3k, Tp, T3q, T2s, T3o, Ts, T3n, T2v;
153
                    E T3r;
154
                    {
155
                         E Tg, Th, T2j, T2k;
156
                         Tg = ri[WS(is, 4)];
157
                         Th = ri[WS(is, 36)];
158
                         Ti = Tg + Th;
159
                         T3j = Tg - Th;
160
                         T2j = ii[WS(is, 4)];
161
                         T2k = ii[WS(is, 36)];
162
                         T2l = T2j + T2k;
163
                         T3h = T2j - T2k;
164
                    }
165
                    {
166
                         E Tj, Tk, T2m, T2n;
167
                         Tj = ri[WS(is, 20)];
168
                         Tk = ri[WS(is, 52)];
169
                         Tl = Tj + Tk;
170
                         T3g = Tj - Tk;
171
                         T2m = ii[WS(is, 20)];
172
                         T2n = ii[WS(is, 52)];
173
                         T2o = T2m + T2n;
174
                         T3k = T2m - T2n;
175
                    }
176
                    {
177
                         E Tn, To, T2q, T2r;
178
                         Tn = ri[WS(is, 60)];
179
                         To = ri[WS(is, 28)];
180
                         Tp = Tn + To;
181
                         T3q = Tn - To;
182
                         T2q = ii[WS(is, 60)];
183
                         T2r = ii[WS(is, 28)];
184
                         T2s = T2q + T2r;
185
                         T3o = T2q - T2r;
186
                    }
187
                    {
188
                         E Tq, Tr, T2t, T2u;
189
                         Tq = ri[WS(is, 12)];
190
                         Tr = ri[WS(is, 44)];
191
                         Ts = Tq + Tr;
192
                         T3n = Tq - Tr;
193
                         T2t = ii[WS(is, 12)];
194
                         T2u = ii[WS(is, 44)];
195
                         T2v = T2t + T2u;
196
                         T3r = T2t - T2u;
197
                    }
198
                    {
199
                         E Tm, Tt, Tai, Taj;
200
                         Tm = Ti + Tl;
201
                         Tt = Tp + Ts;
202
                         Tu = Tm + Tt;
203
                         TdI = Tt - Tm;
204
                         Tai = Ti - Tl;
205
                         Taj = T2l - T2o;
206
                         Tak = Tai + Taj;
207
                         TbC = Taj - Tai;
208
                    }
209
                    {
210
                         E Tal, Tam, T2p, T2w;
211
                         Tal = Tp - Ts;
212
                         Tam = T2s - T2v;
213
                         Tan = Tal - Tam;
214
                         TbD = Tal + Tam;
215
                         T2p = T2l + T2o;
216
                         T2w = T2s + T2v;
217
                         T2x = T2p + T2w;
218
                         Tda = T2p - T2w;
219
                    }
220
                    {
221
                         E T3i, T3l, T7E, T7F;
222
                         T3i = T3g + T3h;
223
                         T3l = T3j - T3k;
224
                         T3m = FMA(KP414213562, T3l, T3i);
225
                         T65 = FNMS(KP414213562, T3i, T3l);
226
                         T7E = T3j + T3k;
227
                         T7F = T3h - T3g;
228
                         T7G = FMA(KP414213562, T7F, T7E);
229
                         T8I = FNMS(KP414213562, T7E, T7F);
230
                    }
231
                    {
232
                         E T7H, T7I, T3p, T3s;
233
                         T7H = T3q + T3r;
234
                         T7I = T3o - T3n;
235
                         T7J = FNMS(KP414213562, T7I, T7H);
236
                         T8J = FMA(KP414213562, T7H, T7I);
237
                         T3p = T3n + T3o;
238
                         T3s = T3q - T3r;
239
                         T3t = FNMS(KP414213562, T3s, T3p);
240
                         T64 = FMA(KP414213562, T3p, T3s);
241
                    }
242
               }
243
               {
244
                    E Ty, T3H, T2B, T3x, TB, T3w, T2E, T3I, TI, T3K, T2L, T3E, TF, T3L, T2I;
245
                    E T3B;
246
                    {
247
                         E Tw, Tx, T2C, T2D;
248
                         Tw = ri[WS(is, 2)];
249
                         Tx = ri[WS(is, 34)];
250
                         Ty = Tw + Tx;
251
                         T3H = Tw - Tx;
252
                         {
253
                              E T2z, T2A, Tz, TA;
254
                              T2z = ii[WS(is, 2)];
255
                              T2A = ii[WS(is, 34)];
256
                              T2B = T2z + T2A;
257
                              T3x = T2z - T2A;
258
                              Tz = ri[WS(is, 18)];
259
                              TA = ri[WS(is, 50)];
260
                              TB = Tz + TA;
261
                              T3w = Tz - TA;
262
                         }
263
                         T2C = ii[WS(is, 18)];
264
                         T2D = ii[WS(is, 50)];
265
                         T2E = T2C + T2D;
266
                         T3I = T2C - T2D;
267
                         {
268
                              E TG, TH, T3C, T2J, T2K, T3D;
269
                              TG = ri[WS(is, 58)];
270
                              TH = ri[WS(is, 26)];
271
                              T3C = TG - TH;
272
                              T2J = ii[WS(is, 58)];
273
                              T2K = ii[WS(is, 26)];
274
                              T3D = T2J - T2K;
275
                              TI = TG + TH;
276
                              T3K = T3C + T3D;
277
                              T2L = T2J + T2K;
278
                              T3E = T3C - T3D;
279
                         }
280
                         {
281
                              E TD, TE, T3z, T2G, T2H, T3A;
282
                              TD = ri[WS(is, 10)];
283
                              TE = ri[WS(is, 42)];
284
                              T3z = TD - TE;
285
                              T2G = ii[WS(is, 10)];
286
                              T2H = ii[WS(is, 42)];
287
                              T3A = T2G - T2H;
288
                              TF = TD + TE;
289
                              T3L = T3A - T3z;
290
                              T2I = T2G + T2H;
291
                              T3B = T3z + T3A;
292
                         }
293
                    }
294
                    {
295
                         E TC, TJ, Taq, Tar;
296
                         TC = Ty + TB;
297
                         TJ = TF + TI;
298
                         TK = TC + TJ;
299
                         Tdd = TC - TJ;
300
                         Taq = TI - TF;
301
                         Tar = T2B - T2E;
302
                         Tas = Taq + Tar;
303
                         Tce = Tar - Taq;
304
                    }
305
                    {
306
                         E Tat, Tau, T2F, T2M;
307
                         Tat = Ty - TB;
308
                         Tau = T2I - T2L;
309
                         Tav = Tat + Tau;
310
                         Tcf = Tat - Tau;
311
                         T2F = T2B + T2E;
312
                         T2M = T2I + T2L;
313
                         T2N = T2F + T2M;
314
                         Tdc = T2F - T2M;
315
                    }
316
                    {
317
                         E T3y, T3F, T7M, T7N;
318
                         T3y = T3w + T3x;
319
                         T3F = T3B - T3E;
320
                         T3G = FNMS(KP707106781, T3F, T3y);
321
                         T6G = FMA(KP707106781, T3F, T3y);
322
                         T7M = T3x - T3w;
323
                         T7N = T3L + T3K;
324
                         T7O = FMA(KP707106781, T7N, T7M);
325
                         T9k = FNMS(KP707106781, T7N, T7M);
326
                    }
327
                    {
328
                         E T7P, T7Q, T3J, T3M;
329
                         T7P = T3H + T3I;
330
                         T7Q = T3B + T3E;
331
                         T7R = FMA(KP707106781, T7Q, T7P);
332
                         T9l = FNMS(KP707106781, T7Q, T7P);
333
                         T3J = T3H - T3I;
334
                         T3M = T3K - T3L;
335
                         T3N = FNMS(KP707106781, T3M, T3J);
336
                         T6H = FMA(KP707106781, T3M, T3J);
337
                    }
338
               }
339
               {
340
                    E T1z, T5I, T56, Tb8, T1C, T53, T5L, Tb9, T1J, Tbq, T5h, T5N, T1G, Tbp, T5c;
341
                    E T5O;
342
                    {
343
                         E T1x, T1y, T5J, T5K;
344
                         T1x = ri[WS(is, 63)];
345
                         T1y = ri[WS(is, 31)];
346
                         T1z = T1x + T1y;
347
                         T5I = T1x - T1y;
348
                         {
349
                              E T54, T55, T1A, T1B;
350
                              T54 = ii[WS(is, 63)];
351
                              T55 = ii[WS(is, 31)];
352
                              T56 = T54 - T55;
353
                              Tb8 = T54 + T55;
354
                              T1A = ri[WS(is, 15)];
355
                              T1B = ri[WS(is, 47)];
356
                              T1C = T1A + T1B;
357
                              T53 = T1A - T1B;
358
                         }
359
                         T5J = ii[WS(is, 15)];
360
                         T5K = ii[WS(is, 47)];
361
                         T5L = T5J - T5K;
362
                         Tb9 = T5J + T5K;
363
                         {
364
                              E T1H, T1I, T5d, T5e, T5f, T5g;
365
                              T1H = ri[WS(is, 55)];
366
                              T1I = ri[WS(is, 23)];
367
                              T5d = T1H - T1I;
368
                              T5e = ii[WS(is, 55)];
369
                              T5f = ii[WS(is, 23)];
370
                              T5g = T5e - T5f;
371
                              T1J = T1H + T1I;
372
                              Tbq = T5e + T5f;
373
                              T5h = T5d - T5g;
374
                              T5N = T5d + T5g;
375
                         }
376
                         {
377
                              E T1E, T1F, T58, T59, T5a, T5b;
378
                              T1E = ri[WS(is, 7)];
379
                              T1F = ri[WS(is, 39)];
380
                              T58 = T1E - T1F;
381
                              T59 = ii[WS(is, 7)];
382
                              T5a = ii[WS(is, 39)];
383
                              T5b = T59 - T5a;
384
                              T1G = T1E + T1F;
385
                              Tbp = T59 + T5a;
386
                              T5c = T58 + T5b;
387
                              T5O = T5b - T58;
388
                         }
389
                    }
390
                    {
391
                         E T1D, T1K, Tbo, Tbr;
392
                         T1D = T1z + T1C;
393
                         T1K = T1G + T1J;
394
                         T1L = T1D + T1K;
395
                         TdA = T1D - T1K;
396
                         Tbo = T1z - T1C;
397
                         Tbr = Tbp - Tbq;
398
                         Tbs = Tbo + Tbr;
399
                         Tct = Tbo - Tbr;
400
                    }
401
                    {
402
                         E Tdv, Tdw, T57, T5i;
403
                         Tdv = Tb8 + Tb9;
404
                         Tdw = Tbp + Tbq;
405
                         Tdx = Tdv - Tdw;
406
                         Teo = Tdv + Tdw;
407
                         T57 = T53 + T56;
408
                         T5i = T5c - T5h;
409
                         T5j = FNMS(KP707106781, T5i, T57);
410
                         T6Y = FMA(KP707106781, T5i, T57);
411
                    }
412
                    {
413
                         E T5M, T5P, T8w, T8x;
414
                         T5M = T5I - T5L;
415
                         T5P = T5N - T5O;
416
                         T5Q = FNMS(KP707106781, T5P, T5M);
417
                         T6V = FMA(KP707106781, T5P, T5M);
418
                         T8w = T5I + T5L;
419
                         T8x = T5c + T5h;
420
                         T8y = FMA(KP707106781, T8x, T8w);
421
                         T9z = FNMS(KP707106781, T8x, T8w);
422
                    }
423
                    {
424
                         E Tb7, Tba, T8l, T8m;
425
                         Tb7 = T1J - T1G;
426
                         Tba = Tb8 - Tb9;
427
                         Tbb = Tb7 + Tba;
428
                         Tcw = Tba - Tb7;
429
                         T8l = T56 - T53;
430
                         T8m = T5O + T5N;
431
                         T8n = FMA(KP707106781, T8m, T8l);
432
                         T9C = FNMS(KP707106781, T8m, T8l);
433
                    }
434
               }
435
               {
436
                    E TN, T40, T2Q, T3Q, TQ, T3P, T2T, T41, TX, T43, T30, T3X, TU, T44, T2X;
437
                    E T3U;
438
                    {
439
                         E TL, TM, T2R, T2S;
440
                         TL = ri[WS(is, 62)];
441
                         TM = ri[WS(is, 30)];
442
                         TN = TL + TM;
443
                         T40 = TL - TM;
444
                         {
445
                              E T2O, T2P, TO, TP;
446
                              T2O = ii[WS(is, 62)];
447
                              T2P = ii[WS(is, 30)];
448
                              T2Q = T2O + T2P;
449
                              T3Q = T2O - T2P;
450
                              TO = ri[WS(is, 14)];
451
                              TP = ri[WS(is, 46)];
452
                              TQ = TO + TP;
453
                              T3P = TO - TP;
454
                         }
455
                         T2R = ii[WS(is, 14)];
456
                         T2S = ii[WS(is, 46)];
457
                         T2T = T2R + T2S;
458
                         T41 = T2R - T2S;
459
                         {
460
                              E TV, TW, T3V, T2Y, T2Z, T3W;
461
                              TV = ri[WS(is, 54)];
462
                              TW = ri[WS(is, 22)];
463
                              T3V = TV - TW;
464
                              T2Y = ii[WS(is, 54)];
465
                              T2Z = ii[WS(is, 22)];
466
                              T3W = T2Y - T2Z;
467
                              TX = TV + TW;
468
                              T43 = T3V + T3W;
469
                              T30 = T2Y + T2Z;
470
                              T3X = T3V - T3W;
471
                         }
472
                         {
473
                              E TS, TT, T3S, T2V, T2W, T3T;
474
                              TS = ri[WS(is, 6)];
475
                              TT = ri[WS(is, 38)];
476
                              T3S = TS - TT;
477
                              T2V = ii[WS(is, 6)];
478
                              T2W = ii[WS(is, 38)];
479
                              T3T = T2V - T2W;
480
                              TU = TS + TT;
481
                              T44 = T3T - T3S;
482
                              T2X = T2V + T2W;
483
                              T3U = T3S + T3T;
484
                         }
485
                    }
486
                    {
487
                         E TR, TY, Tax, Tay;
488
                         TR = TN + TQ;
489
                         TY = TU + TX;
490
                         TZ = TR + TY;
491
                         Tdf = TR - TY;
492
                         Tax = TX - TU;
493
                         Tay = T2Q - T2T;
494
                         Taz = Tax + Tay;
495
                         Tch = Tay - Tax;
496
                    }
497
                    {
498
                         E TaA, TaB, T2U, T31;
499
                         TaA = TN - TQ;
500
                         TaB = T2X - T30;
501
                         TaC = TaA + TaB;
502
                         Tci = TaA - TaB;
503
                         T2U = T2Q + T2T;
504
                         T31 = T2X + T30;
505
                         T32 = T2U + T31;
506
                         Tdg = T2U - T31;
507
                    }
508
                    {
509
                         E T3R, T3Y, T7T, T7U;
510
                         T3R = T3P + T3Q;
511
                         T3Y = T3U - T3X;
512
                         T3Z = FNMS(KP707106781, T3Y, T3R);
513
                         T6J = FMA(KP707106781, T3Y, T3R);
514
                         T7T = T3Q - T3P;
515
                         T7U = T44 + T43;
516
                         T7V = FMA(KP707106781, T7U, T7T);
517
                         T9n = FNMS(KP707106781, T7U, T7T);
518
                    }
519
                    {
520
                         E T7W, T7X, T42, T45;
521
                         T7W = T40 + T41;
522
                         T7X = T3U + T3X;
523
                         T7Y = FMA(KP707106781, T7X, T7W);
524
                         T9o = FNMS(KP707106781, T7X, T7W);
525
                         T42 = T40 - T41;
526
                         T45 = T43 - T44;
527
                         T46 = FNMS(KP707106781, T45, T42);
528
                         T6K = FMA(KP707106781, T45, T42);
529
                    }
530
               }
531
               {
532
                    E T14, T4P, T4d, TaH, T17, T4a, T4S, TaI, T1e, TaZ, T4o, T4U, T1b, TaY, T4j;
533
                    E T4V;
534
                    {
535
                         E T12, T13, T4Q, T4R;
536
                         T12 = ri[WS(is, 1)];
537
                         T13 = ri[WS(is, 33)];
538
                         T14 = T12 + T13;
539
                         T4P = T12 - T13;
540
                         {
541
                              E T4b, T4c, T15, T16;
542
                              T4b = ii[WS(is, 1)];
543
                              T4c = ii[WS(is, 33)];
544
                              T4d = T4b - T4c;
545
                              TaH = T4b + T4c;
546
                              T15 = ri[WS(is, 17)];
547
                              T16 = ri[WS(is, 49)];
548
                              T17 = T15 + T16;
549
                              T4a = T15 - T16;
550
                         }
551
                         T4Q = ii[WS(is, 17)];
552
                         T4R = ii[WS(is, 49)];
553
                         T4S = T4Q - T4R;
554
                         TaI = T4Q + T4R;
555
                         {
556
                              E T1c, T1d, T4k, T4l, T4m, T4n;
557
                              T1c = ri[WS(is, 57)];
558
                              T1d = ri[WS(is, 25)];
559
                              T4k = T1c - T1d;
560
                              T4l = ii[WS(is, 57)];
561
                              T4m = ii[WS(is, 25)];
562
                              T4n = T4l - T4m;
563
                              T1e = T1c + T1d;
564
                              TaZ = T4l + T4m;
565
                              T4o = T4k - T4n;
566
                              T4U = T4k + T4n;
567
                         }
568
                         {
569
                              E T19, T1a, T4f, T4g, T4h, T4i;
570
                              T19 = ri[WS(is, 9)];
571
                              T1a = ri[WS(is, 41)];
572
                              T4f = T19 - T1a;
573
                              T4g = ii[WS(is, 9)];
574
                              T4h = ii[WS(is, 41)];
575
                              T4i = T4g - T4h;
576
                              T1b = T19 + T1a;
577
                              TaY = T4g + T4h;
578
                              T4j = T4f + T4i;
579
                              T4V = T4i - T4f;
580
                         }
581
                    }
582
                    {
583
                         E T18, T1f, TaX, Tb0;
584
                         T18 = T14 + T17;
585
                         T1f = T1b + T1e;
586
                         T1g = T18 + T1f;
587
                         Tdp = T18 - T1f;
588
                         TaX = T14 - T17;
589
                         Tb0 = TaY - TaZ;
590
                         Tb1 = TaX + Tb0;
591
                         Tcm = TaX - Tb0;
592
                    }
593
                    {
594
                         E Tdk, Tdl, T4e, T4p;
595
                         Tdk = TaH + TaI;
596
                         Tdl = TaY + TaZ;
597
                         Tdm = Tdk - Tdl;
598
                         Tej = Tdk + Tdl;
599
                         T4e = T4a + T4d;
600
                         T4p = T4j - T4o;
601
                         T4q = FNMS(KP707106781, T4p, T4e);
602
                         T6R = FMA(KP707106781, T4p, T4e);
603
                    }
604
                    {
605
                         E T4T, T4W, T8d, T8e;
606
                         T4T = T4P - T4S;
607
                         T4W = T4U - T4V;
608
                         T4X = FNMS(KP707106781, T4W, T4T);
609
                         T6O = FMA(KP707106781, T4W, T4T);
610
                         T8d = T4P + T4S;
611
                         T8e = T4j + T4o;
612
                         T8f = FMA(KP707106781, T8e, T8d);
613
                         T9s = FNMS(KP707106781, T8e, T8d);
614
                    }
615
                    {
616
                         E TaG, TaJ, T82, T83;
617
                         TaG = T1e - T1b;
618
                         TaJ = TaH - TaI;
619
                         TaK = TaG + TaJ;
620
                         Tcp = TaJ - TaG;
621
                         T82 = T4d - T4a;
622
                         T83 = T4V + T4U;
623
                         T84 = FMA(KP707106781, T83, T82);
624
                         T9v = FNMS(KP707106781, T83, T82);
625
                    }
626
               }
627
               {
628
                    E T1j, TaL, T1m, TaM, T4G, T4L, TaO, TaN, T86, T85, T1q, TaR, T1t, TaS, T4v;
629
                    E T4A, TaT, TaQ, T89, T88;
630
                    {
631
                         E T4C, T4K, T4H, T4F;
632
                         {
633
                              E T1h, T1i, T4I, T4J;
634
                              T1h = ri[WS(is, 5)];
635
                              T1i = ri[WS(is, 37)];
636
                              T1j = T1h + T1i;
637
                              T4C = T1h - T1i;
638
                              T4I = ii[WS(is, 5)];
639
                              T4J = ii[WS(is, 37)];
640
                              T4K = T4I - T4J;
641
                              TaL = T4I + T4J;
642
                         }
643
                         {
644
                              E T1k, T1l, T4D, T4E;
645
                              T1k = ri[WS(is, 21)];
646
                              T1l = ri[WS(is, 53)];
647
                              T1m = T1k + T1l;
648
                              T4H = T1k - T1l;
649
                              T4D = ii[WS(is, 21)];
650
                              T4E = ii[WS(is, 53)];
651
                              T4F = T4D - T4E;
652
                              TaM = T4D + T4E;
653
                         }
654
                         T4G = T4C - T4F;
655
                         T4L = T4H + T4K;
656
                         TaO = T1j - T1m;
657
                         TaN = TaL - TaM;
658
                         T86 = T4C + T4F;
659
                         T85 = T4K - T4H;
660
                    }
661
                    {
662
                         E T4r, T4z, T4w, T4u;
663
                         {
664
                              E T1o, T1p, T4x, T4y;
665
                              T1o = ri[WS(is, 61)];
666
                              T1p = ri[WS(is, 29)];
667
                              T1q = T1o + T1p;
668
                              T4r = T1o - T1p;
669
                              T4x = ii[WS(is, 61)];
670
                              T4y = ii[WS(is, 29)];
671
                              T4z = T4x - T4y;
672
                              TaR = T4x + T4y;
673
                         }
674
                         {
675
                              E T1r, T1s, T4s, T4t;
676
                              T1r = ri[WS(is, 13)];
677
                              T1s = ri[WS(is, 45)];
678
                              T1t = T1r + T1s;
679
                              T4w = T1r - T1s;
680
                              T4s = ii[WS(is, 13)];
681
                              T4t = ii[WS(is, 45)];
682
                              T4u = T4s - T4t;
683
                              TaS = T4s + T4t;
684
                         }
685
                         T4v = T4r - T4u;
686
                         T4A = T4w + T4z;
687
                         TaT = TaR - TaS;
688
                         TaQ = T1q - T1t;
689
                         T89 = T4r + T4u;
690
                         T88 = T4z - T4w;
691
                    }
692
                    {
693
                         E T1n, T1u, Tb2, Tb3;
694
                         T1n = T1j + T1m;
695
                         T1u = T1q + T1t;
696
                         T1v = T1n + T1u;
697
                         Tdn = T1u - T1n;
698
                         Tb2 = TaO + TaN;
699
                         Tb3 = TaQ - TaT;
700
                         Tb4 = Tb2 + Tb3;
701
                         Tcq = Tb2 - Tb3;
702
                    }
703
                    {
704
                         E Tdq, Tdr, T4B, T4M;
705
                         Tdq = TaL + TaM;
706
                         Tdr = TaR + TaS;
707
                         Tds = Tdq - Tdr;
708
                         Tek = Tdq + Tdr;
709
                         T4B = FMA(KP414213562, T4A, T4v);
710
                         T4M = FNMS(KP414213562, T4L, T4G);
711
                         T4N = T4B - T4M;
712
                         T6P = T4M + T4B;
713
                    }
714
                    {
715
                         E T4Y, T4Z, T8g, T8h;
716
                         T4Y = FMA(KP414213562, T4G, T4L);
717
                         T4Z = FNMS(KP414213562, T4v, T4A);
718
                         T50 = T4Y - T4Z;
719
                         T6S = T4Y + T4Z;
720
                         T8g = FMA(KP414213562, T85, T86);
721
                         T8h = FNMS(KP414213562, T88, T89);
722
                         T8i = T8g + T8h;
723
                         T9w = T8g - T8h;
724
                    }
725
                    {
726
                         E TaP, TaU, T87, T8a;
727
                         TaP = TaN - TaO;
728
                         TaU = TaQ + TaT;
729
                         TaV = TaP + TaU;
730
                         Tcn = TaU - TaP;
731
                         T87 = FNMS(KP414213562, T86, T85);
732
                         T8a = FMA(KP414213562, T89, T88);
733
                         T8b = T87 + T8a;
734
                         T9t = T8a - T87;
735
                    }
736
               }
737
               {
738
                    E T1O, Tbc, T1R, Tbd, T5z, T5E, Tbf, Tbe, T8p, T8o, T1V, Tbi, T1Y, Tbj, T5o;
739
                    E T5t, Tbk, Tbh, T8s, T8r;
740
                    {
741
                         E T5v, T5D, T5A, T5y;
742
                         {
743
                              E T1M, T1N, T5B, T5C;
744
                              T1M = ri[WS(is, 3)];
745
                              T1N = ri[WS(is, 35)];
746
                              T1O = T1M + T1N;
747
                              T5v = T1M - T1N;
748
                              T5B = ii[WS(is, 3)];
749
                              T5C = ii[WS(is, 35)];
750
                              T5D = T5B - T5C;
751
                              Tbc = T5B + T5C;
752
                         }
753
                         {
754
                              E T1P, T1Q, T5w, T5x;
755
                              T1P = ri[WS(is, 19)];
756
                              T1Q = ri[WS(is, 51)];
757
                              T1R = T1P + T1Q;
758
                              T5A = T1P - T1Q;
759
                              T5w = ii[WS(is, 19)];
760
                              T5x = ii[WS(is, 51)];
761
                              T5y = T5w - T5x;
762
                              Tbd = T5w + T5x;
763
                         }
764
                         T5z = T5v - T5y;
765
                         T5E = T5A + T5D;
766
                         Tbf = T1O - T1R;
767
                         Tbe = Tbc - Tbd;
768
                         T8p = T5v + T5y;
769
                         T8o = T5D - T5A;
770
                    }
771
                    {
772
                         E T5k, T5s, T5p, T5n;
773
                         {
774
                              E T1T, T1U, T5q, T5r;
775
                              T1T = ri[WS(is, 59)];
776
                              T1U = ri[WS(is, 27)];
777
                              T1V = T1T + T1U;
778
                              T5k = T1T - T1U;
779
                              T5q = ii[WS(is, 59)];
780
                              T5r = ii[WS(is, 27)];
781
                              T5s = T5q - T5r;
782
                              Tbi = T5q + T5r;
783
                         }
784
                         {
785
                              E T1W, T1X, T5l, T5m;
786
                              T1W = ri[WS(is, 11)];
787
                              T1X = ri[WS(is, 43)];
788
                              T1Y = T1W + T1X;
789
                              T5p = T1W - T1X;
790
                              T5l = ii[WS(is, 11)];
791
                              T5m = ii[WS(is, 43)];
792
                              T5n = T5l - T5m;
793
                              Tbj = T5l + T5m;
794
                         }
795
                         T5o = T5k - T5n;
796
                         T5t = T5p + T5s;
797
                         Tbk = Tbi - Tbj;
798
                         Tbh = T1V - T1Y;
799
                         T8s = T5k + T5n;
800
                         T8r = T5s - T5p;
801
                    }
802
                    {
803
                         E T1S, T1Z, Tbt, Tbu;
804
                         T1S = T1O + T1R;
805
                         T1Z = T1V + T1Y;
806
                         T20 = T1S + T1Z;
807
                         Tdy = T1Z - T1S;
808
                         Tbt = Tbf + Tbe;
809
                         Tbu = Tbh - Tbk;
810
                         Tbv = Tbt + Tbu;
811
                         Tcx = Tbt - Tbu;
812
                    }
813
                    {
814
                         E TdB, TdC, T5u, T5F;
815
                         TdB = Tbc + Tbd;
816
                         TdC = Tbi + Tbj;
817
                         TdD = TdB - TdC;
818
                         Tep = TdB + TdC;
819
                         T5u = FMA(KP414213562, T5t, T5o);
820
                         T5F = FNMS(KP414213562, T5E, T5z);
821
                         T5G = T5u - T5F;
822
                         T6W = T5F + T5u;
823
                    }
824
                    {
825
                         E T5R, T5S, T8z, T8A;
826
                         T5R = FMA(KP414213562, T5z, T5E);
827
                         T5S = FNMS(KP414213562, T5o, T5t);
828
                         T5T = T5R - T5S;
829
                         T6Z = T5R + T5S;
830
                         T8z = FMA(KP414213562, T8o, T8p);
831
                         T8A = FNMS(KP414213562, T8r, T8s);
832
                         T8B = T8z + T8A;
833
                         T9D = T8z - T8A;
834
                    }
835
                    {
836
                         E Tbg, Tbl, T8q, T8t;
837
                         Tbg = Tbe - Tbf;
838
                         Tbl = Tbh + Tbk;
839
                         Tbm = Tbg + Tbl;
840
                         Tcu = Tbl - Tbg;
841
                         T8q = FNMS(KP414213562, T8p, T8o);
842
                         T8t = FMA(KP414213562, T8s, T8r);
843
                         T8u = T8q + T8t;
844
                         T9A = T8t - T8q;
845
                    }
846
               }
847
               {
848
                    E T11, TeD, TeG, TeI, T22, T23, T34, TeH;
849
                    {
850
                         E Tv, T10, TeE, TeF;
851
                         Tv = Tf + Tu;
852
                         T10 = TK + TZ;
853
                         T11 = Tv + T10;
854
                         TeD = Tv - T10;
855
                         TeE = Tej + Tek;
856
                         TeF = Teo + Tep;
857
                         TeG = TeE - TeF;
858
                         TeI = TeE + TeF;
859
                    }
860
                    {
861
                         E T1w, T21, T2y, T33;
862
                         T1w = T1g + T1v;
863
                         T21 = T1L + T20;
864
                         T22 = T1w + T21;
865
                         T23 = T21 - T1w;
866
                         T2y = T2i + T2x;
867
                         T33 = T2N + T32;
868
                         T34 = T2y - T33;
869
                         TeH = T2y + T33;
870
                    }
871
                    ro[WS(os, 32)] = T11 - T22;
872
                    io[WS(os, 32)] = TeH - TeI;
873
                    ro[0] = T11 + T22;
874
                    io[0] = TeH + TeI;
875
                    io[WS(os, 16)] = T23 + T34;
876
                    ro[WS(os, 16)] = TeD + TeG;
877
                    io[WS(os, 48)] = T34 - T23;
878
                    ro[WS(os, 48)] = TeD - TeG;
879
               }
880
               {
881
                    E Teh, Tex, Tev, TeB, Tem, Tey, Ter, Tez;
882
                    {
883
                         E Tef, Teg, Tet, Teu;
884
                         Tef = Tf - Tu;
885
                         Teg = T2N - T32;
886
                         Teh = Tef + Teg;
887
                         Tex = Tef - Teg;
888
                         Tet = T2i - T2x;
889
                         Teu = TZ - TK;
890
                         Tev = Tet - Teu;
891
                         TeB = Teu + Tet;
892
                    }
893
                    {
894
                         E Tei, Tel, Ten, Teq;
895
                         Tei = T1g - T1v;
896
                         Tel = Tej - Tek;
897
                         Tem = Tei + Tel;
898
                         Tey = Tel - Tei;
899
                         Ten = T1L - T20;
900
                         Teq = Teo - Tep;
901
                         Ter = Ten - Teq;
902
                         Tez = Ten + Teq;
903
                    }
904
                    {
905
                         E Tes, TeC, Tew, TeA;
906
                         Tes = Tem + Ter;
907
                         ro[WS(os, 40)] = FNMS(KP707106781, Tes, Teh);
908
                         ro[WS(os, 8)] = FMA(KP707106781, Tes, Teh);
909
                         TeC = Tey + Tez;
910
                         io[WS(os, 40)] = FNMS(KP707106781, TeC, TeB);
911
                         io[WS(os, 8)] = FMA(KP707106781, TeC, TeB);
912
                         Tew = Ter - Tem;
913
                         io[WS(os, 56)] = FNMS(KP707106781, Tew, Tev);
914
                         io[WS(os, 24)] = FMA(KP707106781, Tew, Tev);
915
                         TeA = Tey - Tez;
916
                         ro[WS(os, 56)] = FNMS(KP707106781, TeA, Tex);
917
                         ro[WS(os, 24)] = FMA(KP707106781, TeA, Tex);
918
                    }
919
               }
920
               {
921
                    E Tdb, TdV, Te5, TdJ, Tdi, Te6, Te3, Teb, TdM, TdW, Tdu, TdR, Te0, Tea, TdF;
922
                    E TdQ;
923
                    {
924
                         E Tde, Tdh, Tdo, Tdt;
925
                         Tdb = Td9 - Tda;
926
                         TdV = Td9 + Tda;
927
                         Te5 = TdI + TdH;
928
                         TdJ = TdH - TdI;
929
                         Tde = Tdc - Tdd;
930
                         Tdh = Tdf + Tdg;
931
                         Tdi = Tde - Tdh;
932
                         Te6 = Tde + Tdh;
933
                         {
934
                              E Te1, Te2, TdK, TdL;
935
                              Te1 = TdA + TdD;
936
                              Te2 = Tdy + Tdx;
937
                              Te3 = FNMS(KP414213562, Te2, Te1);
938
                              Teb = FMA(KP414213562, Te1, Te2);
939
                              TdK = Tdf - Tdg;
940
                              TdL = Tdd + Tdc;
941
                              TdM = TdK - TdL;
942
                              TdW = TdL + TdK;
943
                         }
944
                         Tdo = Tdm - Tdn;
945
                         Tdt = Tdp - Tds;
946
                         Tdu = FMA(KP414213562, Tdt, Tdo);
947
                         TdR = FNMS(KP414213562, Tdo, Tdt);
948
                         {
949
                              E TdY, TdZ, Tdz, TdE;
950
                              TdY = Tdp + Tds;
951
                              TdZ = Tdn + Tdm;
952
                              Te0 = FMA(KP414213562, TdZ, TdY);
953
                              Tea = FNMS(KP414213562, TdY, TdZ);
954
                              Tdz = Tdx - Tdy;
955
                              TdE = TdA - TdD;
956
                              TdF = FNMS(KP414213562, TdE, Tdz);
957
                              TdQ = FMA(KP414213562, Tdz, TdE);
958
                         }
959
                    }
960
                    {
961
                         E Tdj, TdG, TdP, TdS;
962
                         Tdj = FMA(KP707106781, Tdi, Tdb);
963
                         TdG = Tdu - TdF;
964
                         ro[WS(os, 44)] = FNMS(KP923879532, TdG, Tdj);
965
                         ro[WS(os, 12)] = FMA(KP923879532, TdG, Tdj);
966
                         TdP = FMA(KP707106781, TdM, TdJ);
967
                         TdS = TdQ - TdR;
968
                         io[WS(os, 44)] = FNMS(KP923879532, TdS, TdP);
969
                         io[WS(os, 12)] = FMA(KP923879532, TdS, TdP);
970
                    }
971
                    {
972
                         E TdN, TdO, TdT, TdU;
973
                         TdN = FNMS(KP707106781, TdM, TdJ);
974
                         TdO = Tdu + TdF;
975
                         io[WS(os, 28)] = FNMS(KP923879532, TdO, TdN);
976
                         io[WS(os, 60)] = FMA(KP923879532, TdO, TdN);
977
                         TdT = FNMS(KP707106781, Tdi, Tdb);
978
                         TdU = TdR + TdQ;
979
                         ro[WS(os, 28)] = FNMS(KP923879532, TdU, TdT);
980
                         ro[WS(os, 60)] = FMA(KP923879532, TdU, TdT);
981
                    }
982
                    {
983
                         E TdX, Te4, Ted, Tee;
984
                         TdX = FMA(KP707106781, TdW, TdV);
985
                         Te4 = Te0 + Te3;
986
                         ro[WS(os, 36)] = FNMS(KP923879532, Te4, TdX);
987
                         ro[WS(os, 4)] = FMA(KP923879532, Te4, TdX);
988
                         Ted = FMA(KP707106781, Te6, Te5);
989
                         Tee = Tea + Teb;
990
                         io[WS(os, 36)] = FNMS(KP923879532, Tee, Ted);
991
                         io[WS(os, 4)] = FMA(KP923879532, Tee, Ted);
992
                    }
993
                    {
994
                         E Te7, Te8, Te9, Tec;
995
                         Te7 = FNMS(KP707106781, Te6, Te5);
996
                         Te8 = Te3 - Te0;
997
                         io[WS(os, 52)] = FNMS(KP923879532, Te8, Te7);
998
                         io[WS(os, 20)] = FMA(KP923879532, Te8, Te7);
999
                         Te9 = FNMS(KP707106781, TdW, TdV);
1000
                         Tec = Tea - Teb;
1001
                         ro[WS(os, 52)] = FNMS(KP923879532, Tec, Te9);
1002
                         ro[WS(os, 20)] = FMA(KP923879532, Tec, Te9);
1003
                    }
1004
               }
1005
               {
1006
                    E Tcd, TcP, TcD, TcZ, Tck, Td0, TcX, Td4, Tcs, TcK, TcG, TcQ, TcU, Td5, Tcz;
1007
                    E TcL, Tcc, TcC;
1008
                    Tcc = TbC - TbD;
1009
                    Tcd = FMA(KP707106781, Tcc, Tcb);
1010
                    TcP = FNMS(KP707106781, Tcc, Tcb);
1011
                    TcC = Tan - Tak;
1012
                    TcD = FMA(KP707106781, TcC, TcB);
1013
                    TcZ = FNMS(KP707106781, TcC, TcB);
1014
                    {
1015
                         E Tcg, Tcj, TcV, TcW;
1016
                         Tcg = FMA(KP414213562, Tcf, Tce);
1017
                         Tcj = FNMS(KP414213562, Tci, Tch);
1018
                         Tck = Tcg - Tcj;
1019
                         Td0 = Tcg + Tcj;
1020
                         TcV = FMA(KP707106781, Tcx, Tcw);
1021
                         TcW = FMA(KP707106781, Tcu, Tct);
1022
                         TcX = FNMS(KP198912367, TcW, TcV);
1023
                         Td4 = FMA(KP198912367, TcV, TcW);
1024
                    }
1025
                    {
1026
                         E Tco, Tcr, TcE, TcF;
1027
                         Tco = FNMS(KP707106781, Tcn, Tcm);
1028
                         Tcr = FNMS(KP707106781, Tcq, Tcp);
1029
                         Tcs = FMA(KP668178637, Tcr, Tco);
1030
                         TcK = FNMS(KP668178637, Tco, Tcr);
1031
                         TcE = FMA(KP414213562, Tch, Tci);
1032
                         TcF = FNMS(KP414213562, Tce, Tcf);
1033
                         TcG = TcE - TcF;
1034
                         TcQ = TcF + TcE;
1035
                    }
1036
                    {
1037
                         E TcS, TcT, Tcv, Tcy;
1038
                         TcS = FMA(KP707106781, Tcq, Tcp);
1039
                         TcT = FMA(KP707106781, Tcn, Tcm);
1040
                         TcU = FMA(KP198912367, TcT, TcS);
1041
                         Td5 = FNMS(KP198912367, TcS, TcT);
1042
                         Tcv = FNMS(KP707106781, Tcu, Tct);
1043
                         Tcy = FNMS(KP707106781, Tcx, Tcw);
1044
                         Tcz = FNMS(KP668178637, Tcy, Tcv);
1045
                         TcL = FMA(KP668178637, Tcv, Tcy);
1046
                    }
1047
                    {
1048
                         E Tcl, TcA, TcN, TcO;
1049
                         Tcl = FMA(KP923879532, Tck, Tcd);
1050
                         TcA = Tcs + Tcz;
1051
                         ro[WS(os, 38)] = FNMS(KP831469612, TcA, Tcl);
1052
                         ro[WS(os, 6)] = FMA(KP831469612, TcA, Tcl);
1053
                         TcN = FMA(KP923879532, TcG, TcD);
1054
                         TcO = TcK + TcL;
1055
                         io[WS(os, 38)] = FNMS(KP831469612, TcO, TcN);
1056
                         io[WS(os, 6)] = FMA(KP831469612, TcO, TcN);
1057
                    }
1058
                    {
1059
                         E TcH, TcI, TcJ, TcM;
1060
                         TcH = FNMS(KP923879532, TcG, TcD);
1061
                         TcI = Tcz - Tcs;
1062
                         io[WS(os, 54)] = FNMS(KP831469612, TcI, TcH);
1063
                         io[WS(os, 22)] = FMA(KP831469612, TcI, TcH);
1064
                         TcJ = FNMS(KP923879532, Tck, Tcd);
1065
                         TcM = TcK - TcL;
1066
                         ro[WS(os, 54)] = FNMS(KP831469612, TcM, TcJ);
1067
                         ro[WS(os, 22)] = FMA(KP831469612, TcM, TcJ);
1068
                    }
1069
                    {
1070
                         E TcR, TcY, Td3, Td6;
1071
                         TcR = FNMS(KP923879532, TcQ, TcP);
1072
                         TcY = TcU - TcX;
1073
                         ro[WS(os, 46)] = FNMS(KP980785280, TcY, TcR);
1074
                         ro[WS(os, 14)] = FMA(KP980785280, TcY, TcR);
1075
                         Td3 = FNMS(KP923879532, Td0, TcZ);
1076
                         Td6 = Td4 - Td5;
1077
                         io[WS(os, 46)] = FNMS(KP980785280, Td6, Td3);
1078
                         io[WS(os, 14)] = FMA(KP980785280, Td6, Td3);
1079
                    }
1080
                    {
1081
                         E Td1, Td2, Td7, Td8;
1082
                         Td1 = FMA(KP923879532, Td0, TcZ);
1083
                         Td2 = TcU + TcX;
1084
                         io[WS(os, 30)] = FNMS(KP980785280, Td2, Td1);
1085
                         io[WS(os, 62)] = FMA(KP980785280, Td2, Td1);
1086
                         Td7 = FMA(KP923879532, TcQ, TcP);
1087
                         Td8 = Td5 + Td4;
1088
                         ro[WS(os, 30)] = FNMS(KP980785280, Td8, Td7);
1089
                         ro[WS(os, 62)] = FMA(KP980785280, Td8, Td7);
1090
                    }
1091
               }
1092
               {
1093
                    E Tap, TbR, TbF, Tc1, TaE, Tc2, TbZ, Tc7, Tb6, TbN, TbI, TbS, TbW, Tc6, Tbx;
1094
                    E TbM, Tao, TbE;
1095
                    Tao = Tak + Tan;
1096
                    Tap = FNMS(KP707106781, Tao, Tah);
1097
                    TbR = FMA(KP707106781, Tao, Tah);
1098
                    TbE = TbC + TbD;
1099
                    TbF = FNMS(KP707106781, TbE, TbB);
1100
                    Tc1 = FMA(KP707106781, TbE, TbB);
1101
                    {
1102
                         E Taw, TaD, TbX, TbY;
1103
                         Taw = FNMS(KP414213562, Tav, Tas);
1104
                         TaD = FMA(KP414213562, TaC, Taz);
1105
                         TaE = Taw - TaD;
1106
                         Tc2 = Taw + TaD;
1107
                         TbX = FMA(KP707106781, Tbv, Tbs);
1108
                         TbY = FMA(KP707106781, Tbm, Tbb);
1109
                         TbZ = FNMS(KP198912367, TbY, TbX);
1110
                         Tc7 = FMA(KP198912367, TbX, TbY);
1111
                    }
1112
                    {
1113
                         E TaW, Tb5, TbG, TbH;
1114
                         TaW = FNMS(KP707106781, TaV, TaK);
1115
                         Tb5 = FNMS(KP707106781, Tb4, Tb1);
1116
                         Tb6 = FMA(KP668178637, Tb5, TaW);
1117
                         TbN = FNMS(KP668178637, TaW, Tb5);
1118
                         TbG = FNMS(KP414213562, Taz, TaC);
1119
                         TbH = FMA(KP414213562, Tas, Tav);
1120
                         TbI = TbG - TbH;
1121
                         TbS = TbH + TbG;
1122
                    }
1123
                    {
1124
                         E TbU, TbV, Tbn, Tbw;
1125
                         TbU = FMA(KP707106781, Tb4, Tb1);
1126
                         TbV = FMA(KP707106781, TaV, TaK);
1127
                         TbW = FMA(KP198912367, TbV, TbU);
1128
                         Tc6 = FNMS(KP198912367, TbU, TbV);
1129
                         Tbn = FNMS(KP707106781, Tbm, Tbb);
1130
                         Tbw = FNMS(KP707106781, Tbv, Tbs);
1131
                         Tbx = FNMS(KP668178637, Tbw, Tbn);
1132
                         TbM = FMA(KP668178637, Tbn, Tbw);
1133
                    }
1134
                    {
1135
                         E TaF, Tby, TbL, TbO;
1136
                         TaF = FMA(KP923879532, TaE, Tap);
1137
                         Tby = Tb6 - Tbx;
1138
                         ro[WS(os, 42)] = FNMS(KP831469612, Tby, TaF);
1139
                         ro[WS(os, 10)] = FMA(KP831469612, Tby, TaF);
1140
                         TbL = FMA(KP923879532, TbI, TbF);
1141
                         TbO = TbM - TbN;
1142
                         io[WS(os, 42)] = FNMS(KP831469612, TbO, TbL);
1143
                         io[WS(os, 10)] = FMA(KP831469612, TbO, TbL);
1144
                    }
1145
                    {
1146
                         E TbJ, TbK, TbP, TbQ;
1147
                         TbJ = FNMS(KP923879532, TbI, TbF);
1148
                         TbK = Tb6 + Tbx;
1149
                         io[WS(os, 26)] = FNMS(KP831469612, TbK, TbJ);
1150
                         io[WS(os, 58)] = FMA(KP831469612, TbK, TbJ);
1151
                         TbP = FNMS(KP923879532, TaE, Tap);
1152
                         TbQ = TbN + TbM;
1153
                         ro[WS(os, 26)] = FNMS(KP831469612, TbQ, TbP);
1154
                         ro[WS(os, 58)] = FMA(KP831469612, TbQ, TbP);
1155
                    }
1156
                    {
1157
                         E TbT, Tc0, Tc9, Tca;
1158
                         TbT = FMA(KP923879532, TbS, TbR);
1159
                         Tc0 = TbW + TbZ;
1160
                         ro[WS(os, 34)] = FNMS(KP980785280, Tc0, TbT);
1161
                         ro[WS(os, 2)] = FMA(KP980785280, Tc0, TbT);
1162
                         Tc9 = FMA(KP923879532, Tc2, Tc1);
1163
                         Tca = Tc6 + Tc7;
1164
                         io[WS(os, 34)] = FNMS(KP980785280, Tca, Tc9);
1165
                         io[WS(os, 2)] = FMA(KP980785280, Tca, Tc9);
1166
                    }
1167
                    {
1168
                         E Tc3, Tc4, Tc5, Tc8;
1169
                         Tc3 = FNMS(KP923879532, Tc2, Tc1);
1170
                         Tc4 = TbZ - TbW;
1171
                         io[WS(os, 50)] = FNMS(KP980785280, Tc4, Tc3);
1172
                         io[WS(os, 18)] = FMA(KP980785280, Tc4, Tc3);
1173
                         Tc5 = FNMS(KP923879532, TbS, TbR);
1174
                         Tc8 = Tc6 - Tc7;
1175
                         ro[WS(os, 50)] = FNMS(KP980785280, Tc8, Tc5);
1176
                         ro[WS(os, 18)] = FMA(KP980785280, Tc8, Tc5);
1177
                    }
1178
               }
1179
               {
1180
                    E T6F, T7h, T7m, T7x, T7p, T7w, T6M, T7s, T6U, T7c, T75, T7r, T78, T7i, T71;
1181
                    E T7d;
1182
                    {
1183
                         E T6D, T6E, T7k, T7l;
1184
                         T6D = FNMS(KP707106781, T3e, T37);
1185
                         T6E = T65 + T64;
1186
                         T6F = FNMS(KP923879532, T6E, T6D);
1187
                         T7h = FMA(KP923879532, T6E, T6D);
1188
                         T7k = FMA(KP923879532, T6S, T6R);
1189
                         T7l = FMA(KP923879532, T6P, T6O);
1190
                         T7m = FMA(KP098491403, T7l, T7k);
1191
                         T7x = FNMS(KP098491403, T7k, T7l);
1192
                    }
1193
                    {
1194
                         E T7n, T7o, T6I, T6L;
1195
                         T7n = FMA(KP923879532, T6Z, T6Y);
1196
                         T7o = FMA(KP923879532, T6W, T6V);
1197
                         T7p = FNMS(KP098491403, T7o, T7n);
1198
                         T7w = FMA(KP098491403, T7n, T7o);
1199
                         T6I = FMA(KP198912367, T6H, T6G);
1200
                         T6L = FNMS(KP198912367, T6K, T6J);
1201
                         T6M = T6I - T6L;
1202
                         T7s = T6I + T6L;
1203
                    }
1204
                    {
1205
                         E T6Q, T6T, T73, T74;
1206
                         T6Q = FNMS(KP923879532, T6P, T6O);
1207
                         T6T = FNMS(KP923879532, T6S, T6R);
1208
                         T6U = FMA(KP820678790, T6T, T6Q);
1209
                         T7c = FNMS(KP820678790, T6Q, T6T);
1210
                         T73 = FNMS(KP707106781, T62, T5Z);
1211
                         T74 = T3m + T3t;
1212
                         T75 = FNMS(KP923879532, T74, T73);
1213
                         T7r = FMA(KP923879532, T74, T73);
1214
                    }
1215
                    {
1216
                         E T76, T77, T6X, T70;
1217
                         T76 = FMA(KP198912367, T6J, T6K);
1218
                         T77 = FNMS(KP198912367, T6G, T6H);
1219
                         T78 = T76 - T77;
1220
                         T7i = T77 + T76;
1221
                         T6X = FNMS(KP923879532, T6W, T6V);
1222
                         T70 = FNMS(KP923879532, T6Z, T6Y);
1223
                         T71 = FNMS(KP820678790, T70, T6X);
1224
                         T7d = FMA(KP820678790, T6X, T70);
1225
                    }
1226
                    {
1227
                         E T6N, T72, T7f, T7g;
1228
                         T6N = FMA(KP980785280, T6M, T6F);
1229
                         T72 = T6U + T71;
1230
                         ro[WS(os, 39)] = FNMS(KP773010453, T72, T6N);
1231
                         ro[WS(os, 7)] = FMA(KP773010453, T72, T6N);
1232
                         T7f = FMA(KP980785280, T78, T75);
1233
                         T7g = T7c + T7d;
1234
                         io[WS(os, 39)] = FNMS(KP773010453, T7g, T7f);
1235
                         io[WS(os, 7)] = FMA(KP773010453, T7g, T7f);
1236
                    }
1237
                    {
1238
                         E T79, T7a, T7b, T7e;
1239
                         T79 = FNMS(KP980785280, T78, T75);
1240
                         T7a = T71 - T6U;
1241
                         io[WS(os, 55)] = FNMS(KP773010453, T7a, T79);
1242
                         io[WS(os, 23)] = FMA(KP773010453, T7a, T79);
1243
                         T7b = FNMS(KP980785280, T6M, T6F);
1244
                         T7e = T7c - T7d;
1245
                         ro[WS(os, 55)] = FNMS(KP773010453, T7e, T7b);
1246
                         ro[WS(os, 23)] = FMA(KP773010453, T7e, T7b);
1247
                    }
1248
                    {
1249
                         E T7j, T7q, T7v, T7y;
1250
                         T7j = FNMS(KP980785280, T7i, T7h);
1251
                         T7q = T7m - T7p;
1252
                         ro[WS(os, 47)] = FNMS(KP995184726, T7q, T7j);
1253
                         ro[WS(os, 15)] = FMA(KP995184726, T7q, T7j);
1254
                         T7v = FNMS(KP980785280, T7s, T7r);
1255
                         T7y = T7w - T7x;
1256
                         io[WS(os, 47)] = FNMS(KP995184726, T7y, T7v);
1257
                         io[WS(os, 15)] = FMA(KP995184726, T7y, T7v);
1258
                    }
1259
                    {
1260
                         E T7t, T7u, T7z, T7A;
1261
                         T7t = FMA(KP980785280, T7s, T7r);
1262
                         T7u = T7m + T7p;
1263
                         io[WS(os, 31)] = FNMS(KP995184726, T7u, T7t);
1264
                         io[WS(os, 63)] = FMA(KP995184726, T7u, T7t);
1265
                         T7z = FMA(KP980785280, T7i, T7h);
1266
                         T7A = T7x + T7w;
1267
                         ro[WS(os, 31)] = FNMS(KP995184726, T7A, T7z);
1268
                         ro[WS(os, 63)] = FMA(KP995184726, T7A, T7z);
1269
                    }
1270
               }
1271
               {
1272
                    E T9j, T9V, Ta0, Tab, Ta3, Taa, T9q, Ta6, T9y, T9Q, T9J, Ta5, T9M, T9W, T9F;
1273
                    E T9R;
1274
                    {
1275
                         E T9h, T9i, T9Y, T9Z;
1276
                         T9h = FNMS(KP707106781, T7C, T7B);
1277
                         T9i = T8I - T8J;
1278
                         T9j = FMA(KP923879532, T9i, T9h);
1279
                         T9V = FNMS(KP923879532, T9i, T9h);
1280
                         T9Y = FMA(KP923879532, T9w, T9v);
1281
                         T9Z = FMA(KP923879532, T9t, T9s);
1282
                         Ta0 = FMA(KP303346683, T9Z, T9Y);
1283
                         Tab = FNMS(KP303346683, T9Y, T9Z);
1284
                    }
1285
                    {
1286
                         E Ta1, Ta2, T9m, T9p;
1287
                         Ta1 = FMA(KP923879532, T9D, T9C);
1288
                         Ta2 = FMA(KP923879532, T9A, T9z);
1289
                         Ta3 = FNMS(KP303346683, Ta2, Ta1);
1290
                         Taa = FMA(KP303346683, Ta1, Ta2);
1291
                         T9m = FMA(KP668178637, T9l, T9k);
1292
                         T9p = FNMS(KP668178637, T9o, T9n);
1293
                         T9q = T9m - T9p;
1294
                         Ta6 = T9m + T9p;
1295
                    }
1296
                    {
1297
                         E T9u, T9x, T9H, T9I;
1298
                         T9u = FNMS(KP923879532, T9t, T9s);
1299
                         T9x = FNMS(KP923879532, T9w, T9v);
1300
                         T9y = FMA(KP534511135, T9x, T9u);
1301
                         T9Q = FNMS(KP534511135, T9u, T9x);
1302
                         T9H = FNMS(KP707106781, T8G, T8F);
1303
                         T9I = T7J - T7G;
1304
                         T9J = FMA(KP923879532, T9I, T9H);
1305
                         Ta5 = FNMS(KP923879532, T9I, T9H);
1306
                    }
1307
                    {
1308
                         E T9K, T9L, T9B, T9E;
1309
                         T9K = FMA(KP668178637, T9n, T9o);
1310
                         T9L = FNMS(KP668178637, T9k, T9l);
1311
                         T9M = T9K - T9L;
1312
                         T9W = T9L + T9K;
1313
                         T9B = FNMS(KP923879532, T9A, T9z);
1314
                         T9E = FNMS(KP923879532, T9D, T9C);
1315
                         T9F = FNMS(KP534511135, T9E, T9B);
1316
                         T9R = FMA(KP534511135, T9B, T9E);
1317
                    }
1318
                    {
1319
                         E T9r, T9G, T9T, T9U;
1320
                         T9r = FMA(KP831469612, T9q, T9j);
1321
                         T9G = T9y + T9F;
1322
                         ro[WS(os, 37)] = FNMS(KP881921264, T9G, T9r);
1323
                         ro[WS(os, 5)] = FMA(KP881921264, T9G, T9r);
1324
                         T9T = FMA(KP831469612, T9M, T9J);
1325
                         T9U = T9Q + T9R;
1326
                         io[WS(os, 37)] = FNMS(KP881921264, T9U, T9T);
1327
                         io[WS(os, 5)] = FMA(KP881921264, T9U, T9T);
1328
                    }
1329
                    {
1330
                         E T9N, T9O, T9P, T9S;
1331
                         T9N = FNMS(KP831469612, T9M, T9J);
1332
                         T9O = T9F - T9y;
1333
                         io[WS(os, 53)] = FNMS(KP881921264, T9O, T9N);
1334
                         io[WS(os, 21)] = FMA(KP881921264, T9O, T9N);
1335
                         T9P = FNMS(KP831469612, T9q, T9j);
1336
                         T9S = T9Q - T9R;
1337
                         ro[WS(os, 53)] = FNMS(KP881921264, T9S, T9P);
1338
                         ro[WS(os, 21)] = FMA(KP881921264, T9S, T9P);
1339
                    }
1340
                    {
1341
                         E T9X, Ta4, Ta9, Tac;
1342
                         T9X = FNMS(KP831469612, T9W, T9V);
1343
                         Ta4 = Ta0 - Ta3;
1344
                         ro[WS(os, 45)] = FNMS(KP956940335, Ta4, T9X);
1345
                         ro[WS(os, 13)] = FMA(KP956940335, Ta4, T9X);
1346
                         Ta9 = FNMS(KP831469612, Ta6, Ta5);
1347
                         Tac = Taa - Tab;
1348
                         io[WS(os, 45)] = FNMS(KP956940335, Tac, Ta9);
1349
                         io[WS(os, 13)] = FMA(KP956940335, Tac, Ta9);
1350
                    }
1351
                    {
1352
                         E Ta7, Ta8, Tad, Tae;
1353
                         Ta7 = FMA(KP831469612, Ta6, Ta5);
1354
                         Ta8 = Ta0 + Ta3;
1355
                         io[WS(os, 29)] = FNMS(KP956940335, Ta8, Ta7);
1356
                         io[WS(os, 61)] = FMA(KP956940335, Ta8, Ta7);
1357
                         Tad = FMA(KP831469612, T9W, T9V);
1358
                         Tae = Tab + Taa;
1359
                         ro[WS(os, 29)] = FNMS(KP956940335, Tae, Tad);
1360
                         ro[WS(os, 61)] = FMA(KP956940335, Tae, Tad);
1361
                    }
1362
               }
1363
               {
1364
                    E T3v, T6j, T6o, T6y, T6r, T6z, T48, T6u, T52, T6f, T67, T6t, T6a, T6k, T5V;
1365
                    E T6e;
1366
                    {
1367
                         E T3f, T3u, T6m, T6n;
1368
                         T3f = FMA(KP707106781, T3e, T37);
1369
                         T3u = T3m - T3t;
1370
                         T3v = FNMS(KP923879532, T3u, T3f);
1371
                         T6j = FMA(KP923879532, T3u, T3f);
1372
                         T6m = FMA(KP923879532, T50, T4X);
1373
                         T6n = FMA(KP923879532, T4N, T4q);
1374
                         T6o = FMA(KP303346683, T6n, T6m);
1375
                         T6y = FNMS(KP303346683, T6m, T6n);
1376
                    }
1377
                    {
1378
                         E T6p, T6q, T3O, T47;
1379
                         T6p = FMA(KP923879532, T5T, T5Q);
1380
                         T6q = FMA(KP923879532, T5G, T5j);
1381
                         T6r = FNMS(KP303346683, T6q, T6p);
1382
                         T6z = FMA(KP303346683, T6p, T6q);
1383
                         T3O = FNMS(KP668178637, T3N, T3G);
1384
                         T47 = FMA(KP668178637, T46, T3Z);
1385
                         T48 = T3O - T47;
1386
                         T6u = T3O + T47;
1387
                    }
1388
                    {
1389
                         E T4O, T51, T63, T66;
1390
                         T4O = FNMS(KP923879532, T4N, T4q);
1391
                         T51 = FNMS(KP923879532, T50, T4X);
1392
                         T52 = FMA(KP534511135, T51, T4O);
1393
                         T6f = FNMS(KP534511135, T4O, T51);
1394
                         T63 = FMA(KP707106781, T62, T5Z);
1395
                         T66 = T64 - T65;
1396
                         T67 = FNMS(KP923879532, T66, T63);
1397
                         T6t = FMA(KP923879532, T66, T63);
1398
                    }
1399
                    {
1400
                         E T68, T69, T5H, T5U;
1401
                         T68 = FNMS(KP668178637, T3Z, T46);
1402
                         T69 = FMA(KP668178637, T3G, T3N);
1403
                         T6a = T68 - T69;
1404
                         T6k = T69 + T68;
1405
                         T5H = FNMS(KP923879532, T5G, T5j);
1406
                         T5U = FNMS(KP923879532, T5T, T5Q);
1407
                         T5V = FNMS(KP534511135, T5U, T5H);
1408
                         T6e = FMA(KP534511135, T5H, T5U);
1409
                    }
1410
                    {
1411
                         E T49, T5W, T6d, T6g;
1412
                         T49 = FMA(KP831469612, T48, T3v);
1413
                         T5W = T52 - T5V;
1414
                         ro[WS(os, 43)] = FNMS(KP881921264, T5W, T49);
1415
                         ro[WS(os, 11)] = FMA(KP881921264, T5W, T49);
1416
                         T6d = FMA(KP831469612, T6a, T67);
1417
                         T6g = T6e - T6f;
1418
                         io[WS(os, 43)] = FNMS(KP881921264, T6g, T6d);
1419
                         io[WS(os, 11)] = FMA(KP881921264, T6g, T6d);
1420
                    }
1421
                    {
1422
                         E T6b, T6c, T6h, T6i;
1423
                         T6b = FNMS(KP831469612, T6a, T67);
1424
                         T6c = T52 + T5V;
1425
                         io[WS(os, 27)] = FNMS(KP881921264, T6c, T6b);
1426
                         io[WS(os, 59)] = FMA(KP881921264, T6c, T6b);
1427
                         T6h = FNMS(KP831469612, T48, T3v);
1428
                         T6i = T6f + T6e;
1429
                         ro[WS(os, 27)] = FNMS(KP881921264, T6i, T6h);
1430
                         ro[WS(os, 59)] = FMA(KP881921264, T6i, T6h);
1431
                    }
1432
                    {
1433
                         E T6l, T6s, T6B, T6C;
1434
                         T6l = FMA(KP831469612, T6k, T6j);
1435
                         T6s = T6o + T6r;
1436
                         ro[WS(os, 35)] = FNMS(KP956940335, T6s, T6l);
1437
                         ro[WS(os, 3)] = FMA(KP956940335, T6s, T6l);
1438
                         T6B = FMA(KP831469612, T6u, T6t);
1439
                         T6C = T6y + T6z;
1440
                         io[WS(os, 35)] = FNMS(KP956940335, T6C, T6B);
1441
                         io[WS(os, 3)] = FMA(KP956940335, T6C, T6B);
1442
                    }
1443
                    {
1444
                         E T6v, T6w, T6x, T6A;
1445
                         T6v = FNMS(KP831469612, T6u, T6t);
1446
                         T6w = T6r - T6o;
1447
                         io[WS(os, 51)] = FNMS(KP956940335, T6w, T6v);
1448
                         io[WS(os, 19)] = FMA(KP956940335, T6w, T6v);
1449
                         T6x = FNMS(KP831469612, T6k, T6j);
1450
                         T6A = T6y - T6z;
1451
                         ro[WS(os, 51)] = FNMS(KP956940335, T6A, T6x);
1452
                         ro[WS(os, 19)] = FMA(KP956940335, T6A, T6x);
1453
                    }
1454
               }
1455
               {
1456
                    E T7L, T8X, T92, T9c, T95, T9d, T80, T98, T8k, T8T, T8L, T97, T8O, T8Y, T8D;
1457
                    E T8S;
1458
                    {
1459
                         E T7D, T7K, T90, T91;
1460
                         T7D = FMA(KP707106781, T7C, T7B);
1461
                         T7K = T7G + T7J;
1462
                         T7L = FNMS(KP923879532, T7K, T7D);
1463
                         T8X = FMA(KP923879532, T7K, T7D);
1464
                         T90 = FMA(KP923879532, T8i, T8f);
1465
                         T91 = FMA(KP923879532, T8b, T84);
1466
                         T92 = FMA(KP098491403, T91, T90);
1467
                         T9c = FNMS(KP098491403, T90, T91);
1468
                    }
1469
                    {
1470
                         E T93, T94, T7S, T7Z;
1471
                         T93 = FMA(KP923879532, T8B, T8y);
1472
                         T94 = FMA(KP923879532, T8u, T8n);
1473
                         T95 = FNMS(KP098491403, T94, T93);
1474
                         T9d = FMA(KP098491403, T93, T94);
1475
                         T7S = FNMS(KP198912367, T7R, T7O);
1476
                         T7Z = FMA(KP198912367, T7Y, T7V);
1477
                         T80 = T7S - T7Z;
1478
                         T98 = T7S + T7Z;
1479
                    }
1480
                    {
1481
                         E T8c, T8j, T8H, T8K;
1482
                         T8c = FNMS(KP923879532, T8b, T84);
1483
                         T8j = FNMS(KP923879532, T8i, T8f);
1484
                         T8k = FMA(KP820678790, T8j, T8c);
1485
                         T8T = FNMS(KP820678790, T8c, T8j);
1486
                         T8H = FMA(KP707106781, T8G, T8F);
1487
                         T8K = T8I + T8J;
1488
                         T8L = FNMS(KP923879532, T8K, T8H);
1489
                         T97 = FMA(KP923879532, T8K, T8H);
1490
                    }
1491
                    {
1492
                         E T8M, T8N, T8v, T8C;
1493
                         T8M = FNMS(KP198912367, T7V, T7Y);
1494
                         T8N = FMA(KP198912367, T7O, T7R);
1495
                         T8O = T8M - T8N;
1496
                         T8Y = T8N + T8M;
1497
                         T8v = FNMS(KP923879532, T8u, T8n);
1498
                         T8C = FNMS(KP923879532, T8B, T8y);
1499
                         T8D = FNMS(KP820678790, T8C, T8v);
1500
                         T8S = FMA(KP820678790, T8v, T8C);
1501
                    }
1502
                    {
1503
                         E T81, T8E, T8R, T8U;
1504
                         T81 = FMA(KP980785280, T80, T7L);
1505
                         T8E = T8k - T8D;
1506
                         ro[WS(os, 41)] = FNMS(KP773010453, T8E, T81);
1507
                         ro[WS(os, 9)] = FMA(KP773010453, T8E, T81);
1508
                         T8R = FMA(KP980785280, T8O, T8L);
1509
                         T8U = T8S - T8T;
1510
                         io[WS(os, 41)] = FNMS(KP773010453, T8U, T8R);
1511
                         io[WS(os, 9)] = FMA(KP773010453, T8U, T8R);
1512
                    }
1513
                    {
1514
                         E T8P, T8Q, T8V, T8W;
1515
                         T8P = FNMS(KP980785280, T8O, T8L);
1516
                         T8Q = T8k + T8D;
1517
                         io[WS(os, 25)] = FNMS(KP773010453, T8Q, T8P);
1518
                         io[WS(os, 57)] = FMA(KP773010453, T8Q, T8P);
1519
                         T8V = FNMS(KP980785280, T80, T7L);
1520
                         T8W = T8T + T8S;
1521
                         ro[WS(os, 25)] = FNMS(KP773010453, T8W, T8V);
1522
                         ro[WS(os, 57)] = FMA(KP773010453, T8W, T8V);
1523
                    }
1524
                    {
1525
                         E T8Z, T96, T9f, T9g;
1526
                         T8Z = FMA(KP980785280, T8Y, T8X);
1527
                         T96 = T92 + T95;
1528
                         ro[WS(os, 33)] = FNMS(KP995184726, T96, T8Z);
1529
                         ro[WS(os, 1)] = FMA(KP995184726, T96, T8Z);
1530
                         T9f = FMA(KP980785280, T98, T97);
1531
                         T9g = T9c + T9d;
1532
                         io[WS(os, 33)] = FNMS(KP995184726, T9g, T9f);
1533
                         io[WS(os, 1)] = FMA(KP995184726, T9g, T9f);
1534
                    }
1535
                    {
1536
                         E T99, T9a, T9b, T9e;
1537
                         T99 = FNMS(KP980785280, T98, T97);
1538
                         T9a = T95 - T92;
1539
                         io[WS(os, 49)] = FNMS(KP995184726, T9a, T99);
1540
                         io[WS(os, 17)] = FMA(KP995184726, T9a, T99);
1541
                         T9b = FNMS(KP980785280, T8Y, T8X);
1542
                         T9e = T9c - T9d;
1543
                         ro[WS(os, 49)] = FNMS(KP995184726, T9e, T9b);
1544
                         ro[WS(os, 17)] = FMA(KP995184726, T9e, T9b);
1545
                    }
1546
               }
1547
          }
1548
     }
1549
}
1550

    
1551
static const kdft_desc desc = { 64, "n1_64", {520, 0, 392, 0}, &GENUS, 0, 0, 0, 0 };
1552

    
1553
void X(codelet_n1_64) (planner *p) {
1554
     X(kdft_register) (p, n1_64, &desc);
1555
}
1556

    
1557
#else
1558

    
1559
/* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 64 -name n1_64 -include dft/scalar/n.h */
1560

    
1561
/*
1562
 * This function contains 912 FP additions, 248 FP multiplications,
1563
 * (or, 808 additions, 144 multiplications, 104 fused multiply/add),
1564
 * 172 stack variables, 15 constants, and 256 memory accesses
1565
 */
1566
#include "dft/scalar/n.h"
1567

    
1568
static void n1_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
1569
{
1570
     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
1571
     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
1572
     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
1573
     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
1574
     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
1575
     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
1576
     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
1577
     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
1578
     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
1579
     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
1580
     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
1581
     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
1582
     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
1583
     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
1584
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
1585
     {
1586
          INT i;
1587
          for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(256, is), MAKE_VOLATILE_STRIDE(256, os)) {
1588
               E T37, T7B, T8F, T5Z, Tf, Td9, TbB, TcB, T62, T7C, T2i, TdH, Tah, Tcb, T3e;
1589
               E T8G, Tu, TdI, Tak, TbD, Tan, TbC, T2x, Tda, T3m, T65, T7G, T8J, T7J, T8I;
1590
               E T3t, T64, TK, Tdd, Tas, Tce, Tav, Tcf, T2N, Tdc, T3G, T6G, T7O, T9k, T7R;
1591
               E T9l, T3N, T6H, T1L, Tdv, Tbs, Tcw, TdC, Teo, T5j, T6V, T5Q, T6Y, T8y, T9C;
1592
               E Tbb, Tct, T8n, T9z, TZ, Tdf, Taz, Tch, TaC, Tci, T32, Tdg, T3Z, T6J, T7V;
1593
               E T9n, T7Y, T9o, T46, T6K, T1g, Tdp, Tb1, Tcm, Tdm, Tej, T4q, T6R, T4X, T6O;
1594
               E T8f, T9s, TaK, Tcp, T84, T9v, T1v, Tdn, Tb4, Tcq, Tds, Tek, T4N, T6P, T50;
1595
               E T6S, T8i, T9w, TaV, Tcn, T8b, T9t, T20, TdD, Tbv, Tcu, Tdy, Tep, T5G, T6Z;
1596
               E T5T, T6W, T8B, T9A, Tbm, Tcx, T8u, T9D;
1597
               {
1598
                    E T3, T35, T26, T5Y, T6, T5X, T29, T36, Ta, T39, T2d, T38, Td, T3b, T2g;
1599
                    E T3c;
1600
                    {
1601
                         E T1, T2, T24, T25;
1602
                         T1 = ri[0];
1603
                         T2 = ri[WS(is, 32)];
1604
                         T3 = T1 + T2;
1605
                         T35 = T1 - T2;
1606
                         T24 = ii[0];
1607
                         T25 = ii[WS(is, 32)];
1608
                         T26 = T24 + T25;
1609
                         T5Y = T24 - T25;
1610
                    }
1611
                    {
1612
                         E T4, T5, T27, T28;
1613
                         T4 = ri[WS(is, 16)];
1614
                         T5 = ri[WS(is, 48)];
1615
                         T6 = T4 + T5;
1616
                         T5X = T4 - T5;
1617
                         T27 = ii[WS(is, 16)];
1618
                         T28 = ii[WS(is, 48)];
1619
                         T29 = T27 + T28;
1620
                         T36 = T27 - T28;
1621
                    }
1622
                    {
1623
                         E T8, T9, T2b, T2c;
1624
                         T8 = ri[WS(is, 8)];
1625
                         T9 = ri[WS(is, 40)];
1626
                         Ta = T8 + T9;
1627
                         T39 = T8 - T9;
1628
                         T2b = ii[WS(is, 8)];
1629
                         T2c = ii[WS(is, 40)];
1630
                         T2d = T2b + T2c;
1631
                         T38 = T2b - T2c;
1632
                    }
1633
                    {
1634
                         E Tb, Tc, T2e, T2f;
1635
                         Tb = ri[WS(is, 56)];
1636
                         Tc = ri[WS(is, 24)];
1637
                         Td = Tb + Tc;
1638
                         T3b = Tb - Tc;
1639
                         T2e = ii[WS(is, 56)];
1640
                         T2f = ii[WS(is, 24)];
1641
                         T2g = T2e + T2f;
1642
                         T3c = T2e - T2f;
1643
                    }
1644
                    {
1645
                         E T7, Te, T2a, T2h;
1646
                         T37 = T35 - T36;
1647
                         T7B = T35 + T36;
1648
                         T8F = T5Y - T5X;
1649
                         T5Z = T5X + T5Y;
1650
                         T7 = T3 + T6;
1651
                         Te = Ta + Td;
1652
                         Tf = T7 + Te;
1653
                         Td9 = T7 - Te;
1654
                         {
1655
                              E Tbz, TbA, T60, T61;
1656
                              Tbz = T26 - T29;
1657
                              TbA = Td - Ta;
1658
                              TbB = Tbz - TbA;
1659
                              TcB = TbA + Tbz;
1660
                              T60 = T3b - T3c;
1661
                              T61 = T39 + T38;
1662
                              T62 = KP707106781 * (T60 - T61);
1663
                              T7C = KP707106781 * (T61 + T60);
1664
                         }
1665
                         T2a = T26 + T29;
1666
                         T2h = T2d + T2g;
1667
                         T2i = T2a + T2h;
1668
                         TdH = T2a - T2h;
1669
                         {
1670
                              E Taf, Tag, T3a, T3d;
1671
                              Taf = T3 - T6;
1672
                              Tag = T2d - T2g;
1673
                              Tah = Taf - Tag;
1674
                              Tcb = Taf + Tag;
1675
                              T3a = T38 - T39;
1676
                              T3d = T3b + T3c;
1677
                              T3e = KP707106781 * (T3a - T3d);
1678
                              T8G = KP707106781 * (T3a + T3d);
1679
                         }
1680
                    }
1681
               }
1682
               {
1683
                    E Ti, T3j, T2l, T3h, Tl, T3g, T2o, T3k, Tp, T3q, T2s, T3o, Ts, T3n, T2v;
1684
                    E T3r;
1685
                    {
1686
                         E Tg, Th, T2j, T2k;
1687
                         Tg = ri[WS(is, 4)];
1688
                         Th = ri[WS(is, 36)];
1689
                         Ti = Tg + Th;
1690
                         T3j = Tg - Th;
1691
                         T2j = ii[WS(is, 4)];
1692
                         T2k = ii[WS(is, 36)];
1693
                         T2l = T2j + T2k;
1694
                         T3h = T2j - T2k;
1695
                    }
1696
                    {
1697
                         E Tj, Tk, T2m, T2n;
1698
                         Tj = ri[WS(is, 20)];
1699
                         Tk = ri[WS(is, 52)];
1700
                         Tl = Tj + Tk;
1701
                         T3g = Tj - Tk;
1702
                         T2m = ii[WS(is, 20)];
1703
                         T2n = ii[WS(is, 52)];
1704
                         T2o = T2m + T2n;
1705
                         T3k = T2m - T2n;
1706
                    }
1707
                    {
1708
                         E Tn, To, T2q, T2r;
1709
                         Tn = ri[WS(is, 60)];
1710
                         To = ri[WS(is, 28)];
1711
                         Tp = Tn + To;
1712
                         T3q = Tn - To;
1713
                         T2q = ii[WS(is, 60)];
1714
                         T2r = ii[WS(is, 28)];
1715
                         T2s = T2q + T2r;
1716
                         T3o = T2q - T2r;
1717
                    }
1718
                    {
1719
                         E Tq, Tr, T2t, T2u;
1720
                         Tq = ri[WS(is, 12)];
1721
                         Tr = ri[WS(is, 44)];
1722
                         Ts = Tq + Tr;
1723
                         T3n = Tq - Tr;
1724
                         T2t = ii[WS(is, 12)];
1725
                         T2u = ii[WS(is, 44)];
1726
                         T2v = T2t + T2u;
1727
                         T3r = T2t - T2u;
1728
                    }
1729
                    {
1730
                         E Tm, Tt, Tai, Taj;
1731
                         Tm = Ti + Tl;
1732
                         Tt = Tp + Ts;
1733
                         Tu = Tm + Tt;
1734
                         TdI = Tt - Tm;
1735
                         Tai = T2l - T2o;
1736
                         Taj = Ti - Tl;
1737
                         Tak = Tai - Taj;
1738
                         TbD = Taj + Tai;
1739
                    }
1740
                    {
1741
                         E Tal, Tam, T2p, T2w;
1742
                         Tal = Tp - Ts;
1743
                         Tam = T2s - T2v;
1744
                         Tan = Tal + Tam;
1745
                         TbC = Tal - Tam;
1746
                         T2p = T2l + T2o;
1747
                         T2w = T2s + T2v;
1748
                         T2x = T2p + T2w;
1749
                         Tda = T2p - T2w;
1750
                    }
1751
                    {
1752
                         E T3i, T3l, T7E, T7F;
1753
                         T3i = T3g + T3h;
1754
                         T3l = T3j - T3k;
1755
                         T3m = FNMS(KP923879532, T3l, KP382683432 * T3i);
1756
                         T65 = FMA(KP923879532, T3i, KP382683432 * T3l);
1757
                         T7E = T3h - T3g;
1758
                         T7F = T3j + T3k;
1759
                         T7G = FNMS(KP382683432, T7F, KP923879532 * T7E);
1760
                         T8J = FMA(KP382683432, T7E, KP923879532 * T7F);
1761
                    }
1762
                    {
1763
                         E T7H, T7I, T3p, T3s;
1764
                         T7H = T3o - T3n;
1765
                         T7I = T3q + T3r;
1766
                         T7J = FMA(KP923879532, T7H, KP382683432 * T7I);
1767
                         T8I = FNMS(KP382683432, T7H, KP923879532 * T7I);
1768
                         T3p = T3n + T3o;
1769
                         T3s = T3q - T3r;
1770
                         T3t = FMA(KP382683432, T3p, KP923879532 * T3s);
1771
                         T64 = FNMS(KP923879532, T3p, KP382683432 * T3s);
1772
                    }
1773
               }
1774
               {
1775
                    E Ty, T3H, T2B, T3x, TB, T3w, T2E, T3I, TI, T3L, T2L, T3B, TF, T3K, T2I;
1776
                    E T3E;
1777
                    {
1778
                         E Tw, Tx, T2C, T2D;
1779
                         Tw = ri[WS(is, 2)];
1780
                         Tx = ri[WS(is, 34)];
1781
                         Ty = Tw + Tx;
1782
                         T3H = Tw - Tx;
1783
                         {
1784
                              E T2z, T2A, Tz, TA;
1785
                              T2z = ii[WS(is, 2)];
1786
                              T2A = ii[WS(is, 34)];
1787
                              T2B = T2z + T2A;
1788
                              T3x = T2z - T2A;
1789
                              Tz = ri[WS(is, 18)];
1790
                              TA = ri[WS(is, 50)];
1791
                              TB = Tz + TA;
1792
                              T3w = Tz - TA;
1793
                         }
1794
                         T2C = ii[WS(is, 18)];
1795
                         T2D = ii[WS(is, 50)];
1796
                         T2E = T2C + T2D;
1797
                         T3I = T2C - T2D;
1798
                         {
1799
                              E TG, TH, T3z, T2J, T2K, T3A;
1800
                              TG = ri[WS(is, 58)];
1801
                              TH = ri[WS(is, 26)];
1802
                              T3z = TG - TH;
1803
                              T2J = ii[WS(is, 58)];
1804
                              T2K = ii[WS(is, 26)];
1805
                              T3A = T2J - T2K;
1806
                              TI = TG + TH;
1807
                              T3L = T3z + T3A;
1808
                              T2L = T2J + T2K;
1809
                              T3B = T3z - T3A;
1810
                         }
1811
                         {
1812
                              E TD, TE, T3C, T2G, T2H, T3D;
1813
                              TD = ri[WS(is, 10)];
1814
                              TE = ri[WS(is, 42)];
1815
                              T3C = TD - TE;
1816
                              T2G = ii[WS(is, 10)];
1817
                              T2H = ii[WS(is, 42)];
1818
                              T3D = T2G - T2H;
1819
                              TF = TD + TE;
1820
                              T3K = T3D - T3C;
1821
                              T2I = T2G + T2H;
1822
                              T3E = T3C + T3D;
1823
                         }
1824
                    }
1825
                    {
1826
                         E TC, TJ, Taq, Tar;
1827
                         TC = Ty + TB;
1828
                         TJ = TF + TI;
1829
                         TK = TC + TJ;
1830
                         Tdd = TC - TJ;
1831
                         Taq = T2B - T2E;
1832
                         Tar = TI - TF;
1833
                         Tas = Taq - Tar;
1834
                         Tce = Tar + Taq;
1835
                    }
1836
                    {
1837
                         E Tat, Tau, T2F, T2M;
1838
                         Tat = Ty - TB;
1839
                         Tau = T2I - T2L;
1840
                         Tav = Tat - Tau;
1841
                         Tcf = Tat + Tau;
1842
                         T2F = T2B + T2E;
1843
                         T2M = T2I + T2L;
1844
                         T2N = T2F + T2M;
1845
                         Tdc = T2F - T2M;
1846
                    }
1847
                    {
1848
                         E T3y, T3F, T7M, T7N;
1849
                         T3y = T3w + T3x;
1850
                         T3F = KP707106781 * (T3B - T3E);
1851
                         T3G = T3y - T3F;
1852
                         T6G = T3y + T3F;
1853
                         T7M = T3x - T3w;
1854
                         T7N = KP707106781 * (T3K + T3L);
1855
                         T7O = T7M - T7N;
1856
                         T9k = T7M + T7N;
1857
                    }
1858
                    {
1859
                         E T7P, T7Q, T3J, T3M;
1860
                         T7P = T3H + T3I;
1861
                         T7Q = KP707106781 * (T3E + T3B);
1862
                         T7R = T7P - T7Q;
1863
                         T9l = T7P + T7Q;
1864
                         T3J = T3H - T3I;
1865
                         T3M = KP707106781 * (T3K - T3L);
1866
                         T3N = T3J - T3M;
1867
                         T6H = T3J + T3M;
1868
                    }
1869
               }
1870
               {
1871
                    E T1z, T53, T5L, Tbo, T1C, T5I, T56, Tbp, T1J, Tb9, T5h, T5N, T1G, Tb8, T5c;
1872
                    E T5O;
1873
                    {
1874
                         E T1x, T1y, T54, T55;
1875
                         T1x = ri[WS(is, 63)];
1876
                         T1y = ri[WS(is, 31)];
1877
                         T1z = T1x + T1y;
1878
                         T53 = T1x - T1y;
1879
                         {
1880
                              E T5J, T5K, T1A, T1B;
1881
                              T5J = ii[WS(is, 63)];
1882
                              T5K = ii[WS(is, 31)];
1883
                              T5L = T5J - T5K;
1884
                              Tbo = T5J + T5K;
1885
                              T1A = ri[WS(is, 15)];
1886
                              T1B = ri[WS(is, 47)];
1887
                              T1C = T1A + T1B;
1888
                              T5I = T1A - T1B;
1889
                         }
1890
                         T54 = ii[WS(is, 15)];
1891
                         T55 = ii[WS(is, 47)];
1892
                         T56 = T54 - T55;
1893
                         Tbp = T54 + T55;
1894
                         {
1895
                              E T1H, T1I, T5d, T5e, T5f, T5g;
1896
                              T1H = ri[WS(is, 55)];
1897
                              T1I = ri[WS(is, 23)];
1898
                              T5d = T1H - T1I;
1899
                              T5e = ii[WS(is, 55)];
1900
                              T5f = ii[WS(is, 23)];
1901
                              T5g = T5e - T5f;
1902
                              T1J = T1H + T1I;
1903
                              Tb9 = T5e + T5f;
1904
                              T5h = T5d + T5g;
1905
                              T5N = T5d - T5g;
1906
                         }
1907
                         {
1908
                              E T1E, T1F, T5b, T58, T59, T5a;
1909
                              T1E = ri[WS(is, 7)];
1910
                              T1F = ri[WS(is, 39)];
1911
                              T5b = T1E - T1F;
1912
                              T58 = ii[WS(is, 7)];
1913
                              T59 = ii[WS(is, 39)];
1914
                              T5a = T58 - T59;
1915
                              T1G = T1E + T1F;
1916
                              Tb8 = T58 + T59;
1917
                              T5c = T5a - T5b;
1918
                              T5O = T5b + T5a;
1919
                         }
1920
                    }
1921
                    {
1922
                         E T1D, T1K, Tbq, Tbr;
1923
                         T1D = T1z + T1C;
1924
                         T1K = T1G + T1J;
1925
                         T1L = T1D + T1K;
1926
                         Tdv = T1D - T1K;
1927
                         Tbq = Tbo - Tbp;
1928
                         Tbr = T1J - T1G;
1929
                         Tbs = Tbq - Tbr;
1930
                         Tcw = Tbr + Tbq;
1931
                    }
1932
                    {
1933
                         E TdA, TdB, T57, T5i;
1934
                         TdA = Tbo + Tbp;
1935
                         TdB = Tb8 + Tb9;
1936
                         TdC = TdA - TdB;
1937
                         Teo = TdA + TdB;
1938
                         T57 = T53 - T56;
1939
                         T5i = KP707106781 * (T5c - T5h);
1940
                         T5j = T57 - T5i;
1941
                         T6V = T57 + T5i;
1942
                    }
1943
                    {
1944
                         E T5M, T5P, T8w, T8x;
1945
                         T5M = T5I + T5L;
1946
                         T5P = KP707106781 * (T5N - T5O);
1947
                         T5Q = T5M - T5P;
1948
                         T6Y = T5M + T5P;
1949
                         T8w = T5L - T5I;
1950
                         T8x = KP707106781 * (T5c + T5h);
1951
                         T8y = T8w - T8x;
1952
                         T9C = T8w + T8x;
1953
                    }
1954
                    {
1955
                         E Tb7, Tba, T8l, T8m;
1956
                         Tb7 = T1z - T1C;
1957
                         Tba = Tb8 - Tb9;
1958
                         Tbb = Tb7 - Tba;
1959
                         Tct = Tb7 + Tba;
1960
                         T8l = T53 + T56;
1961
                         T8m = KP707106781 * (T5O + T5N);
1962
                         T8n = T8l - T8m;
1963
                         T9z = T8l + T8m;
1964
                    }
1965
               }
1966
               {
1967
                    E TN, T40, T2Q, T3Q, TQ, T3P, T2T, T41, TX, T44, T30, T3U, TU, T43, T2X;
1968
                    E T3X;
1969
                    {
1970
                         E TL, TM, T2R, T2S;
1971
                         TL = ri[WS(is, 62)];
1972
                         TM = ri[WS(is, 30)];
1973
                         TN = TL + TM;
1974
                         T40 = TL - TM;
1975
                         {
1976
                              E T2O, T2P, TO, TP;
1977
                              T2O = ii[WS(is, 62)];
1978
                              T2P = ii[WS(is, 30)];
1979
                              T2Q = T2O + T2P;
1980
                              T3Q = T2O - T2P;
1981
                              TO = ri[WS(is, 14)];
1982
                              TP = ri[WS(is, 46)];
1983
                              TQ = TO + TP;
1984
                              T3P = TO - TP;
1985
                         }
1986
                         T2R = ii[WS(is, 14)];
1987
                         T2S = ii[WS(is, 46)];
1988
                         T2T = T2R + T2S;
1989
                         T41 = T2R - T2S;
1990
                         {
1991
                              E TV, TW, T3S, T2Y, T2Z, T3T;
1992
                              TV = ri[WS(is, 54)];
1993
                              TW = ri[WS(is, 22)];
1994
                              T3S = TV - TW;
1995
                              T2Y = ii[WS(is, 54)];
1996
                              T2Z = ii[WS(is, 22)];
1997
                              T3T = T2Y - T2Z;
1998
                              TX = TV + TW;
1999
                              T44 = T3S + T3T;
2000
                              T30 = T2Y + T2Z;
2001
                              T3U = T3S - T3T;
2002
                         }
2003
                         {
2004
                              E TS, TT, T3V, T2V, T2W, T3W;
2005
                              TS = ri[WS(is, 6)];
2006
                              TT = ri[WS(is, 38)];
2007
                              T3V = TS - TT;
2008
                              T2V = ii[WS(is, 6)];
2009
                              T2W = ii[WS(is, 38)];
2010
                              T3W = T2V - T2W;
2011
                              TU = TS + TT;
2012
                              T43 = T3W - T3V;
2013
                              T2X = T2V + T2W;
2014
                              T3X = T3V + T3W;
2015
                         }
2016
                    }
2017
                    {
2018
                         E TR, TY, Tax, Tay;
2019
                         TR = TN + TQ;
2020
                         TY = TU + TX;
2021
                         TZ = TR + TY;
2022
                         Tdf = TR - TY;
2023
                         Tax = T2Q - T2T;
2024
                         Tay = TX - TU;
2025
                         Taz = Tax - Tay;
2026
                         Tch = Tay + Tax;
2027
                    }
2028
                    {
2029
                         E TaA, TaB, T2U, T31;
2030
                         TaA = TN - TQ;
2031
                         TaB = T2X - T30;
2032
                         TaC = TaA - TaB;
2033
                         Tci = TaA + TaB;
2034
                         T2U = T2Q + T2T;
2035
                         T31 = T2X + T30;
2036
                         T32 = T2U + T31;
2037
                         Tdg = T2U - T31;
2038
                    }
2039
                    {
2040
                         E T3R, T3Y, T7T, T7U;
2041
                         T3R = T3P + T3Q;
2042
                         T3Y = KP707106781 * (T3U - T3X);
2043
                         T3Z = T3R - T3Y;
2044
                         T6J = T3R + T3Y;
2045
                         T7T = T40 + T41;
2046
                         T7U = KP707106781 * (T3X + T3U);
2047
                         T7V = T7T - T7U;
2048
                         T9n = T7T + T7U;
2049
                    }
2050
                    {
2051
                         E T7W, T7X, T42, T45;
2052
                         T7W = T3Q - T3P;
2053
                         T7X = KP707106781 * (T43 + T44);
2054
                         T7Y = T7W - T7X;
2055
                         T9o = T7W + T7X;
2056
                         T42 = T40 - T41;
2057
                         T45 = KP707106781 * (T43 - T44);
2058
                         T46 = T42 - T45;
2059
                         T6K = T42 + T45;
2060
                    }
2061
               }
2062
               {
2063
                    E T14, T4P, T4d, TaG, T17, T4a, T4S, TaH, T1e, TaZ, T4j, T4V, T1b, TaY, T4o;
2064
                    E T4U;
2065
                    {
2066
                         E T12, T13, T4Q, T4R;
2067
                         T12 = ri[WS(is, 1)];
2068
                         T13 = ri[WS(is, 33)];
2069
                         T14 = T12 + T13;
2070
                         T4P = T12 - T13;
2071
                         {
2072
                              E T4b, T4c, T15, T16;
2073
                              T4b = ii[WS(is, 1)];
2074
                              T4c = ii[WS(is, 33)];
2075
                              T4d = T4b - T4c;
2076
                              TaG = T4b + T4c;
2077
                              T15 = ri[WS(is, 17)];
2078
                              T16 = ri[WS(is, 49)];
2079
                              T17 = T15 + T16;
2080
                              T4a = T15 - T16;
2081
                         }
2082
                         T4Q = ii[WS(is, 17)];
2083
                         T4R = ii[WS(is, 49)];
2084
                         T4S = T4Q - T4R;
2085
                         TaH = T4Q + T4R;
2086
                         {
2087
                              E T1c, T1d, T4f, T4g, T4h, T4i;
2088
                              T1c = ri[WS(is, 57)];
2089
                              T1d = ri[WS(is, 25)];
2090
                              T4f = T1c - T1d;
2091
                              T4g = ii[WS(is, 57)];
2092
                              T4h = ii[WS(is, 25)];
2093
                              T4i = T4g - T4h;
2094
                              T1e = T1c + T1d;
2095
                              TaZ = T4g + T4h;
2096
                              T4j = T4f - T4i;
2097
                              T4V = T4f + T4i;
2098
                         }
2099
                         {
2100
                              E T19, T1a, T4k, T4l, T4m, T4n;
2101
                              T19 = ri[WS(is, 9)];
2102
                              T1a = ri[WS(is, 41)];
2103
                              T4k = T19 - T1a;
2104
                              T4l = ii[WS(is, 9)];
2105
                              T4m = ii[WS(is, 41)];
2106
                              T4n = T4l - T4m;
2107
                              T1b = T19 + T1a;
2108
                              TaY = T4l + T4m;
2109
                              T4o = T4k + T4n;
2110
                              T4U = T4n - T4k;
2111
                         }
2112
                    }
2113
                    {
2114
                         E T18, T1f, TaX, Tb0;
2115
                         T18 = T14 + T17;
2116
                         T1f = T1b + T1e;
2117
                         T1g = T18 + T1f;
2118
                         Tdp = T18 - T1f;
2119
                         TaX = T14 - T17;
2120
                         Tb0 = TaY - TaZ;
2121
                         Tb1 = TaX - Tb0;
2122
                         Tcm = TaX + Tb0;
2123
                    }
2124
                    {
2125
                         E Tdk, Tdl, T4e, T4p;
2126
                         Tdk = TaG + TaH;
2127
                         Tdl = TaY + TaZ;
2128
                         Tdm = Tdk - Tdl;
2129
                         Tej = Tdk + Tdl;
2130
                         T4e = T4a + T4d;
2131
                         T4p = KP707106781 * (T4j - T4o);
2132
                         T4q = T4e - T4p;
2133
                         T6R = T4e + T4p;
2134
                    }
2135
                    {
2136
                         E T4T, T4W, T8d, T8e;
2137
                         T4T = T4P - T4S;
2138
                         T4W = KP707106781 * (T4U - T4V);
2139
                         T4X = T4T - T4W;
2140
                         T6O = T4T + T4W;
2141
                         T8d = T4P + T4S;
2142
                         T8e = KP707106781 * (T4o + T4j);
2143
                         T8f = T8d - T8e;
2144
                         T9s = T8d + T8e;
2145
                    }
2146
                    {
2147
                         E TaI, TaJ, T82, T83;
2148
                         TaI = TaG - TaH;
2149
                         TaJ = T1e - T1b;
2150
                         TaK = TaI - TaJ;
2151
                         Tcp = TaJ + TaI;
2152
                         T82 = T4d - T4a;
2153
                         T83 = KP707106781 * (T4U + T4V);
2154
                         T84 = T82 - T83;
2155
                         T9v = T82 + T83;
2156
                    }
2157
               }
2158
               {
2159
                    E T1j, TaR, T1m, TaS, T4G, T4L, TaT, TaQ, T89, T88, T1q, TaM, T1t, TaN, T4v;
2160
                    E T4A, TaO, TaL, T86, T85;
2161
                    {
2162
                         E T4H, T4F, T4C, T4K;
2163
                         {
2164
                              E T1h, T1i, T4D, T4E;
2165
                              T1h = ri[WS(is, 5)];
2166
                              T1i = ri[WS(is, 37)];
2167
                              T1j = T1h + T1i;
2168
                              T4H = T1h - T1i;
2169
                              T4D = ii[WS(is, 5)];
2170
                              T4E = ii[WS(is, 37)];
2171
                              T4F = T4D - T4E;
2172
                              TaR = T4D + T4E;
2173
                         }
2174
                         {
2175
                              E T1k, T1l, T4I, T4J;
2176
                              T1k = ri[WS(is, 21)];
2177
                              T1l = ri[WS(is, 53)];
2178
                              T1m = T1k + T1l;
2179
                              T4C = T1k - T1l;
2180
                              T4I = ii[WS(is, 21)];
2181
                              T4J = ii[WS(is, 53)];
2182
                              T4K = T4I - T4J;
2183
                              TaS = T4I + T4J;
2184
                         }
2185
                         T4G = T4C + T4F;
2186
                         T4L = T4H - T4K;
2187
                         TaT = TaR - TaS;
2188
                         TaQ = T1j - T1m;
2189
                         T89 = T4H + T4K;
2190
                         T88 = T4F - T4C;
2191
                    }
2192
                    {
2193
                         E T4r, T4z, T4w, T4u;
2194
                         {
2195
                              E T1o, T1p, T4x, T4y;
2196
                              T1o = ri[WS(is, 61)];
2197
                              T1p = ri[WS(is, 29)];
2198
                              T1q = T1o + T1p;
2199
                              T4r = T1o - T1p;
2200
                              T4x = ii[WS(is, 61)];
2201
                              T4y = ii[WS(is, 29)];
2202
                              T4z = T4x - T4y;
2203
                              TaM = T4x + T4y;
2204
                         }
2205
                         {
2206
                              E T1r, T1s, T4s, T4t;
2207
                              T1r = ri[WS(is, 13)];
2208
                              T1s = ri[WS(is, 45)];
2209
                              T1t = T1r + T1s;
2210
                              T4w = T1r - T1s;
2211
                              T4s = ii[WS(is, 13)];
2212
                              T4t = ii[WS(is, 45)];
2213
                              T4u = T4s - T4t;
2214
                              TaN = T4s + T4t;
2215
                         }
2216
                         T4v = T4r - T4u;
2217
                         T4A = T4w + T4z;
2218
                         TaO = TaM - TaN;
2219
                         TaL = T1q - T1t;
2220
                         T86 = T4z - T4w;
2221
                         T85 = T4r + T4u;
2222
                    }
2223
                    {
2224
                         E T1n, T1u, Tb2, Tb3;
2225
                         T1n = T1j + T1m;
2226
                         T1u = T1q + T1t;
2227
                         T1v = T1n + T1u;
2228
                         Tdn = T1u - T1n;
2229
                         Tb2 = TaT - TaQ;
2230
                         Tb3 = TaL + TaO;
2231
                         Tb4 = KP707106781 * (Tb2 - Tb3);
2232
                         Tcq = KP707106781 * (Tb2 + Tb3);
2233
                    }
2234
                    {
2235
                         E Tdq, Tdr, T4B, T4M;
2236
                         Tdq = TaR + TaS;
2237
                         Tdr = TaM + TaN;
2238
                         Tds = Tdq - Tdr;
2239
                         Tek = Tdq + Tdr;
2240
                         T4B = FNMS(KP923879532, T4A, KP382683432 * T4v);
2241
                         T4M = FMA(KP923879532, T4G, KP382683432 * T4L);
2242
                         T4N = T4B - T4M;
2243
                         T6P = T4M + T4B;
2244
                    }
2245
                    {
2246
                         E T4Y, T4Z, T8g, T8h;
2247
                         T4Y = FNMS(KP923879532, T4L, KP382683432 * T4G);
2248
                         T4Z = FMA(KP382683432, T4A, KP923879532 * T4v);
2249
                         T50 = T4Y - T4Z;
2250
                         T6S = T4Y + T4Z;
2251
                         T8g = FNMS(KP382683432, T89, KP923879532 * T88);
2252
                         T8h = FMA(KP923879532, T86, KP382683432 * T85);
2253
                         T8i = T8g - T8h;
2254
                         T9w = T8g + T8h;
2255
                    }
2256
                    {
2257
                         E TaP, TaU, T87, T8a;
2258
                         TaP = TaL - TaO;
2259
                         TaU = TaQ + TaT;
2260
                         TaV = KP707106781 * (TaP - TaU);
2261
                         Tcn = KP707106781 * (TaU + TaP);
2262
                         T87 = FNMS(KP382683432, T86, KP923879532 * T85);
2263
                         T8a = FMA(KP382683432, T88, KP923879532 * T89);
2264
                         T8b = T87 - T8a;
2265
                         T9t = T8a + T87;
2266
                    }
2267
               }
2268
               {
2269
                    E T1O, Tbc, T1R, Tbd, T5o, T5t, Tbf, Tbe, T8p, T8o, T1V, Tbi, T1Y, Tbj, T5z;
2270
                    E T5E, Tbk, Tbh, T8s, T8r;
2271
                    {
2272
                         E T5p, T5n, T5k, T5s;
2273
                         {
2274
                              E T1M, T1N, T5l, T5m;
2275
                              T1M = ri[WS(is, 3)];
2276
                              T1N = ri[WS(is, 35)];
2277
                              T1O = T1M + T1N;
2278
                              T5p = T1M - T1N;
2279
                              T5l = ii[WS(is, 3)];
2280
                              T5m = ii[WS(is, 35)];
2281
                              T5n = T5l - T5m;
2282
                              Tbc = T5l + T5m;
2283
                         }
2284
                         {
2285
                              E T1P, T1Q, T5q, T5r;
2286
                              T1P = ri[WS(is, 19)];
2287
                              T1Q = ri[WS(is, 51)];
2288
                              T1R = T1P + T1Q;
2289
                              T5k = T1P - T1Q;
2290
                              T5q = ii[WS(is, 19)];
2291
                              T5r = ii[WS(is, 51)];
2292
                              T5s = T5q - T5r;
2293
                              Tbd = T5q + T5r;
2294
                         }
2295
                         T5o = T5k + T5n;
2296
                         T5t = T5p - T5s;
2297
                         Tbf = T1O - T1R;
2298
                         Tbe = Tbc - Tbd;
2299
                         T8p = T5p + T5s;
2300
                         T8o = T5n - T5k;
2301
                    }
2302
                    {
2303
                         E T5A, T5y, T5v, T5D;
2304
                         {
2305
                              E T1T, T1U, T5w, T5x;
2306
                              T1T = ri[WS(is, 59)];
2307
                              T1U = ri[WS(is, 27)];
2308
                              T1V = T1T + T1U;
2309
                              T5A = T1T - T1U;
2310
                              T5w = ii[WS(is, 59)];
2311
                              T5x = ii[WS(is, 27)];
2312
                              T5y = T5w - T5x;
2313
                              Tbi = T5w + T5x;
2314
                         }
2315
                         {
2316
                              E T1W, T1X, T5B, T5C;
2317
                              T1W = ri[WS(is, 11)];
2318
                              T1X = ri[WS(is, 43)];
2319
                              T1Y = T1W + T1X;
2320
                              T5v = T1W - T1X;
2321
                              T5B = ii[WS(is, 11)];
2322
                              T5C = ii[WS(is, 43)];
2323
                              T5D = T5B - T5C;
2324
                              Tbj = T5B + T5C;
2325
                         }
2326
                         T5z = T5v + T5y;
2327
                         T5E = T5A - T5D;
2328
                         Tbk = Tbi - Tbj;
2329
                         Tbh = T1V - T1Y;
2330
                         T8s = T5A + T5D;
2331
                         T8r = T5y - T5v;
2332
                    }
2333
                    {
2334
                         E T1S, T1Z, Tbt, Tbu;
2335
                         T1S = T1O + T1R;
2336
                         T1Z = T1V + T1Y;
2337
                         T20 = T1S + T1Z;
2338
                         TdD = T1Z - T1S;
2339
                         Tbt = Tbh - Tbk;
2340
                         Tbu = Tbf + Tbe;
2341
                         Tbv = KP707106781 * (Tbt - Tbu);
2342
                         Tcu = KP707106781 * (Tbu + Tbt);
2343
                    }
2344
                    {
2345
                         E Tdw, Tdx, T5u, T5F;
2346
                         Tdw = Tbc + Tbd;
2347
                         Tdx = Tbi + Tbj;
2348
                         Tdy = Tdw - Tdx;
2349
                         Tep = Tdw + Tdx;
2350
                         T5u = FNMS(KP923879532, T5t, KP382683432 * T5o);
2351
                         T5F = FMA(KP382683432, T5z, KP923879532 * T5E);
2352
                         T5G = T5u - T5F;
2353
                         T6Z = T5u + T5F;
2354
                    }
2355
                    {
2356
                         E T5R, T5S, T8z, T8A;
2357
                         T5R = FNMS(KP923879532, T5z, KP382683432 * T5E);
2358
                         T5S = FMA(KP923879532, T5o, KP382683432 * T5t);
2359
                         T5T = T5R - T5S;
2360
                         T6W = T5S + T5R;
2361
                         T8z = FNMS(KP382683432, T8r, KP923879532 * T8s);
2362
                         T8A = FMA(KP382683432, T8o, KP923879532 * T8p);
2363
                         T8B = T8z - T8A;
2364
                         T9A = T8A + T8z;
2365
                    }
2366
                    {
2367
                         E Tbg, Tbl, T8q, T8t;
2368
                         Tbg = Tbe - Tbf;
2369
                         Tbl = Tbh + Tbk;
2370
                         Tbm = KP707106781 * (Tbg - Tbl);
2371
                         Tcx = KP707106781 * (Tbg + Tbl);
2372
                         T8q = FNMS(KP382683432, T8p, KP923879532 * T8o);
2373
                         T8t = FMA(KP923879532, T8r, KP382683432 * T8s);
2374
                         T8u = T8q - T8t;
2375
                         T9D = T8q + T8t;
2376
                    }
2377
               }
2378
               {
2379
                    E T11, TeD, TeG, TeI, T22, T23, T34, TeH;
2380
                    {
2381
                         E Tv, T10, TeE, TeF;
2382
                         Tv = Tf + Tu;
2383
                         T10 = TK + TZ;
2384
                         T11 = Tv + T10;
2385
                         TeD = Tv - T10;
2386
                         TeE = Tej + Tek;
2387
                         TeF = Teo + Tep;
2388
                         TeG = TeE - TeF;
2389
                         TeI = TeE + TeF;
2390
                    }
2391
                    {
2392
                         E T1w, T21, T2y, T33;
2393
                         T1w = T1g + T1v;
2394
                         T21 = T1L + T20;
2395
                         T22 = T1w + T21;
2396
                         T23 = T21 - T1w;
2397
                         T2y = T2i + T2x;
2398
                         T33 = T2N + T32;
2399
                         T34 = T2y - T33;
2400
                         TeH = T2y + T33;
2401
                    }
2402
                    ro[WS(os, 32)] = T11 - T22;
2403
                    io[WS(os, 32)] = TeH - TeI;
2404
                    ro[0] = T11 + T22;
2405
                    io[0] = TeH + TeI;
2406
                    io[WS(os, 16)] = T23 + T34;
2407
                    ro[WS(os, 16)] = TeD + TeG;
2408
                    io[WS(os, 48)] = T34 - T23;
2409
                    ro[WS(os, 48)] = TeD - TeG;
2410
               }
2411
               {
2412
                    E Teh, Tex, Tev, TeB, Tem, Tey, Ter, Tez;
2413
                    {
2414
                         E Tef, Teg, Tet, Teu;
2415
                         Tef = Tf - Tu;
2416
                         Teg = T2N - T32;
2417
                         Teh = Tef + Teg;
2418
                         Tex = Tef - Teg;
2419
                         Tet = T2i - T2x;
2420
                         Teu = TZ - TK;
2421
                         Tev = Tet - Teu;
2422
                         TeB = Teu + Tet;
2423
                    }
2424
                    {
2425
                         E Tei, Tel, Ten, Teq;
2426
                         Tei = T1g - T1v;
2427
                         Tel = Tej - Tek;
2428
                         Tem = Tei + Tel;
2429
                         Tey = Tel - Tei;
2430
                         Ten = T1L - T20;
2431
                         Teq = Teo - Tep;
2432
                         Ter = Ten - Teq;
2433
                         Tez = Ten + Teq;
2434
                    }
2435
                    {
2436
                         E Tes, TeC, Tew, TeA;
2437
                         Tes = KP707106781 * (Tem + Ter);
2438
                         ro[WS(os, 40)] = Teh - Tes;
2439
                         ro[WS(os, 8)] = Teh + Tes;
2440
                         TeC = KP707106781 * (Tey + Tez);
2441
                         io[WS(os, 40)] = TeB - TeC;
2442
                         io[WS(os, 8)] = TeB + TeC;
2443
                         Tew = KP707106781 * (Ter - Tem);
2444
                         io[WS(os, 56)] = Tev - Tew;
2445
                         io[WS(os, 24)] = Tev + Tew;
2446
                         TeA = KP707106781 * (Tey - Tez);
2447
                         ro[WS(os, 56)] = Tex - TeA;
2448
                         ro[WS(os, 24)] = Tex + TeA;
2449
                    }
2450
               }
2451
               {
2452
                    E Tdb, TdV, Te5, TdJ, Tdi, Te6, Te3, Teb, TdM, TdW, Tdu, TdQ, Te0, Tea, TdF;
2453
                    E TdR;
2454
                    {
2455
                         E Tde, Tdh, Tdo, Tdt;
2456
                         Tdb = Td9 - Tda;
2457
                         TdV = Td9 + Tda;
2458
                         Te5 = TdI + TdH;
2459
                         TdJ = TdH - TdI;
2460
                         Tde = Tdc - Tdd;
2461
                         Tdh = Tdf + Tdg;
2462
                         Tdi = KP707106781 * (Tde - Tdh);
2463
                         Te6 = KP707106781 * (Tde + Tdh);
2464
                         {
2465
                              E Te1, Te2, TdK, TdL;
2466
                              Te1 = Tdv + Tdy;
2467
                              Te2 = TdD + TdC;
2468
                              Te3 = FNMS(KP382683432, Te2, KP923879532 * Te1);
2469
                              Teb = FMA(KP923879532, Te2, KP382683432 * Te1);
2470
                              TdK = Tdf - Tdg;
2471
                              TdL = Tdd + Tdc;
2472
                              TdM = KP707106781 * (TdK - TdL);
2473
                              TdW = KP707106781 * (TdL + TdK);
2474
                         }
2475
                         Tdo = Tdm - Tdn;
2476
                         Tdt = Tdp - Tds;
2477
                         Tdu = FMA(KP923879532, Tdo, KP382683432 * Tdt);
2478
                         TdQ = FNMS(KP923879532, Tdt, KP382683432 * Tdo);
2479
                         {
2480
                              E TdY, TdZ, Tdz, TdE;
2481
                              TdY = Tdn + Tdm;
2482
                              TdZ = Tdp + Tds;
2483
                              Te0 = FMA(KP382683432, TdY, KP923879532 * TdZ);
2484
                              Tea = FNMS(KP382683432, TdZ, KP923879532 * TdY);
2485
                              Tdz = Tdv - Tdy;
2486
                              TdE = TdC - TdD;
2487
                              TdF = FNMS(KP923879532, TdE, KP382683432 * Tdz);
2488
                              TdR = FMA(KP382683432, TdE, KP923879532 * Tdz);
2489
                         }
2490
                    }
2491
                    {
2492
                         E Tdj, TdG, TdT, TdU;
2493
                         Tdj = Tdb + Tdi;
2494
                         TdG = Tdu + TdF;
2495
                         ro[WS(os, 44)] = Tdj - TdG;
2496
                         ro[WS(os, 12)] = Tdj + TdG;
2497
                         TdT = TdJ + TdM;
2498
                         TdU = TdQ + TdR;
2499
                         io[WS(os, 44)] = TdT - TdU;
2500
                         io[WS(os, 12)] = TdT + TdU;
2501
                    }
2502
                    {
2503
                         E TdN, TdO, TdP, TdS;
2504
                         TdN = TdJ - TdM;
2505
                         TdO = TdF - Tdu;
2506
                         io[WS(os, 60)] = TdN - TdO;
2507
                         io[WS(os, 28)] = TdN + TdO;
2508
                         TdP = Tdb - Tdi;
2509
                         TdS = TdQ - TdR;
2510
                         ro[WS(os, 60)] = TdP - TdS;
2511
                         ro[WS(os, 28)] = TdP + TdS;
2512
                    }
2513
                    {
2514
                         E TdX, Te4, Ted, Tee;
2515
                         TdX = TdV + TdW;
2516
                         Te4 = Te0 + Te3;
2517
                         ro[WS(os, 36)] = TdX - Te4;
2518
                         ro[WS(os, 4)] = TdX + Te4;
2519
                         Ted = Te5 + Te6;
2520
                         Tee = Tea + Teb;
2521
                         io[WS(os, 36)] = Ted - Tee;
2522
                         io[WS(os, 4)] = Ted + Tee;
2523
                    }
2524
                    {
2525
                         E Te7, Te8, Te9, Tec;
2526
                         Te7 = Te5 - Te6;
2527
                         Te8 = Te3 - Te0;
2528
                         io[WS(os, 52)] = Te7 - Te8;
2529
                         io[WS(os, 20)] = Te7 + Te8;
2530
                         Te9 = TdV - TdW;
2531
                         Tec = Tea - Teb;
2532
                         ro[WS(os, 52)] = Te9 - Tec;
2533
                         ro[WS(os, 20)] = Te9 + Tec;
2534
                    }
2535
               }
2536
               {
2537
                    E Tcd, TcP, TcD, TcZ, Tck, Td0, TcX, Td5, Tcs, TcK, TcG, TcQ, TcU, Td4, Tcz;
2538
                    E TcL, Tcc, TcC;
2539
                    Tcc = KP707106781 * (TbD + TbC);
2540
                    Tcd = Tcb - Tcc;
2541
                    TcP = Tcb + Tcc;
2542
                    TcC = KP707106781 * (Tak + Tan);
2543
                    TcD = TcB - TcC;
2544
                    TcZ = TcB + TcC;
2545
                    {
2546
                         E Tcg, Tcj, TcV, TcW;
2547
                         Tcg = FNMS(KP382683432, Tcf, KP923879532 * Tce);
2548
                         Tcj = FMA(KP923879532, Tch, KP382683432 * Tci);
2549
                         Tck = Tcg - Tcj;
2550
                         Td0 = Tcg + Tcj;
2551
                         TcV = Tct + Tcu;
2552
                         TcW = Tcw + Tcx;
2553
                         TcX = FNMS(KP195090322, TcW, KP980785280 * TcV);
2554
                         Td5 = FMA(KP195090322, TcV, KP980785280 * TcW);
2555
                    }
2556
                    {
2557
                         E Tco, Tcr, TcE, TcF;
2558
                         Tco = Tcm - Tcn;
2559
                         Tcr = Tcp - Tcq;
2560
                         Tcs = FMA(KP555570233, Tco, KP831469612 * Tcr);
2561
                         TcK = FNMS(KP831469612, Tco, KP555570233 * Tcr);
2562
                         TcE = FNMS(KP382683432, Tch, KP923879532 * Tci);
2563
                         TcF = FMA(KP382683432, Tce, KP923879532 * Tcf);
2564
                         TcG = TcE - TcF;
2565
                         TcQ = TcF + TcE;
2566
                    }
2567
                    {
2568
                         E TcS, TcT, Tcv, Tcy;
2569
                         TcS = Tcm + Tcn;
2570
                         TcT = Tcp + Tcq;
2571
                         TcU = FMA(KP980785280, TcS, KP195090322 * TcT);
2572
                         Td4 = FNMS(KP195090322, TcS, KP980785280 * TcT);
2573
                         Tcv = Tct - Tcu;
2574
                         Tcy = Tcw - Tcx;
2575
                         Tcz = FNMS(KP831469612, Tcy, KP555570233 * Tcv);
2576
                         TcL = FMA(KP831469612, Tcv, KP555570233 * Tcy);
2577
                    }
2578
                    {
2579
                         E Tcl, TcA, TcN, TcO;
2580
                         Tcl = Tcd + Tck;
2581
                         TcA = Tcs + Tcz;
2582
                         ro[WS(os, 42)] = Tcl - TcA;
2583
                         ro[WS(os, 10)] = Tcl + TcA;
2584
                         TcN = TcD + TcG;
2585
                         TcO = TcK + TcL;
2586
                         io[WS(os, 42)] = TcN - TcO;
2587
                         io[WS(os, 10)] = TcN + TcO;
2588
                    }
2589
                    {
2590
                         E TcH, TcI, TcJ, TcM;
2591
                         TcH = TcD - TcG;
2592
                         TcI = Tcz - Tcs;
2593
                         io[WS(os, 58)] = TcH - TcI;
2594
                         io[WS(os, 26)] = TcH + TcI;
2595
                         TcJ = Tcd - Tck;
2596
                         TcM = TcK - TcL;
2597
                         ro[WS(os, 58)] = TcJ - TcM;
2598
                         ro[WS(os, 26)] = TcJ + TcM;
2599
                    }
2600
                    {
2601
                         E TcR, TcY, Td7, Td8;
2602
                         TcR = TcP + TcQ;
2603
                         TcY = TcU + TcX;
2604
                         ro[WS(os, 34)] = TcR - TcY;
2605
                         ro[WS(os, 2)] = TcR + TcY;
2606
                         Td7 = TcZ + Td0;
2607
                         Td8 = Td4 + Td5;
2608
                         io[WS(os, 34)] = Td7 - Td8;
2609
                         io[WS(os, 2)] = Td7 + Td8;
2610
                    }
2611
                    {
2612
                         E Td1, Td2, Td3, Td6;
2613
                         Td1 = TcZ - Td0;
2614
                         Td2 = TcX - TcU;
2615
                         io[WS(os, 50)] = Td1 - Td2;
2616
                         io[WS(os, 18)] = Td1 + Td2;
2617
                         Td3 = TcP - TcQ;
2618
                         Td6 = Td4 - Td5;
2619
                         ro[WS(os, 50)] = Td3 - Td6;
2620
                         ro[WS(os, 18)] = Td3 + Td6;
2621
                    }
2622
               }
2623
               {
2624
                    E Tap, TbR, TbF, Tc1, TaE, Tc2, TbZ, Tc7, Tb6, TbM, TbI, TbS, TbW, Tc6, Tbx;
2625
                    E TbN, Tao, TbE;
2626
                    Tao = KP707106781 * (Tak - Tan);
2627
                    Tap = Tah - Tao;
2628
                    TbR = Tah + Tao;
2629
                    TbE = KP707106781 * (TbC - TbD);
2630
                    TbF = TbB - TbE;
2631
                    Tc1 = TbB + TbE;
2632
                    {
2633
                         E Taw, TaD, TbX, TbY;
2634
                         Taw = FNMS(KP923879532, Tav, KP382683432 * Tas);
2635
                         TaD = FMA(KP382683432, Taz, KP923879532 * TaC);
2636
                         TaE = Taw - TaD;
2637
                         Tc2 = Taw + TaD;
2638
                         TbX = Tbb + Tbm;
2639
                         TbY = Tbs + Tbv;
2640
                         TbZ = FNMS(KP555570233, TbY, KP831469612 * TbX);
2641
                         Tc7 = FMA(KP831469612, TbY, KP555570233 * TbX);
2642
                    }
2643
                    {
2644
                         E TaW, Tb5, TbG, TbH;
2645
                         TaW = TaK - TaV;
2646
                         Tb5 = Tb1 - Tb4;
2647
                         Tb6 = FMA(KP980785280, TaW, KP195090322 * Tb5);
2648
                         TbM = FNMS(KP980785280, Tb5, KP195090322 * TaW);
2649
                         TbG = FNMS(KP923879532, Taz, KP382683432 * TaC);
2650
                         TbH = FMA(KP923879532, Tas, KP382683432 * Tav);
2651
                         TbI = TbG - TbH;
2652
                         TbS = TbH + TbG;
2653
                    }
2654
                    {
2655
                         E TbU, TbV, Tbn, Tbw;
2656
                         TbU = TaK + TaV;
2657
                         TbV = Tb1 + Tb4;
2658
                         TbW = FMA(KP555570233, TbU, KP831469612 * TbV);
2659
                         Tc6 = FNMS(KP555570233, TbV, KP831469612 * TbU);
2660
                         Tbn = Tbb - Tbm;
2661
                         Tbw = Tbs - Tbv;
2662
                         Tbx = FNMS(KP980785280, Tbw, KP195090322 * Tbn);
2663
                         TbN = FMA(KP195090322, Tbw, KP980785280 * Tbn);
2664
                    }
2665
                    {
2666
                         E TaF, Tby, TbP, TbQ;
2667
                         TaF = Tap + TaE;
2668
                         Tby = Tb6 + Tbx;
2669
                         ro[WS(os, 46)] = TaF - Tby;
2670
                         ro[WS(os, 14)] = TaF + Tby;
2671
                         TbP = TbF + TbI;
2672
                         TbQ = TbM + TbN;
2673
                         io[WS(os, 46)] = TbP - TbQ;
2674
                         io[WS(os, 14)] = TbP + TbQ;
2675
                    }
2676
                    {
2677
                         E TbJ, TbK, TbL, TbO;
2678
                         TbJ = TbF - TbI;
2679
                         TbK = Tbx - Tb6;
2680
                         io[WS(os, 62)] = TbJ - TbK;
2681
                         io[WS(os, 30)] = TbJ + TbK;
2682
                         TbL = Tap - TaE;
2683
                         TbO = TbM - TbN;
2684
                         ro[WS(os, 62)] = TbL - TbO;
2685
                         ro[WS(os, 30)] = TbL + TbO;
2686
                    }
2687
                    {
2688
                         E TbT, Tc0, Tc9, Tca;
2689
                         TbT = TbR + TbS;
2690
                         Tc0 = TbW + TbZ;
2691
                         ro[WS(os, 38)] = TbT - Tc0;
2692
                         ro[WS(os, 6)] = TbT + Tc0;
2693
                         Tc9 = Tc1 + Tc2;
2694
                         Tca = Tc6 + Tc7;
2695
                         io[WS(os, 38)] = Tc9 - Tca;
2696
                         io[WS(os, 6)] = Tc9 + Tca;
2697
                    }
2698
                    {
2699
                         E Tc3, Tc4, Tc5, Tc8;
2700
                         Tc3 = Tc1 - Tc2;
2701
                         Tc4 = TbZ - TbW;
2702
                         io[WS(os, 54)] = Tc3 - Tc4;
2703
                         io[WS(os, 22)] = Tc3 + Tc4;
2704
                         Tc5 = TbR - TbS;
2705
                         Tc8 = Tc6 - Tc7;
2706
                         ro[WS(os, 54)] = Tc5 - Tc8;
2707
                         ro[WS(os, 22)] = Tc5 + Tc8;
2708
                    }
2709
               }
2710
               {
2711
                    E T6F, T7h, T7m, T7w, T7p, T7x, T6M, T7s, T6U, T7c, T75, T7r, T78, T7i, T71;
2712
                    E T7d;
2713
                    {
2714
                         E T6D, T6E, T7k, T7l;
2715
                         T6D = T37 + T3e;
2716
                         T6E = T65 + T64;
2717
                         T6F = T6D - T6E;
2718
                         T7h = T6D + T6E;
2719
                         T7k = T6O + T6P;
2720
                         T7l = T6R + T6S;
2721
                         T7m = FMA(KP956940335, T7k, KP290284677 * T7l);
2722
                         T7w = FNMS(KP290284677, T7k, KP956940335 * T7l);
2723
                    }
2724
                    {
2725
                         E T7n, T7o, T6I, T6L;
2726
                         T7n = T6V + T6W;
2727
                         T7o = T6Y + T6Z;
2728
                         T7p = FNMS(KP290284677, T7o, KP956940335 * T7n);
2729
                         T7x = FMA(KP290284677, T7n, KP956940335 * T7o);
2730
                         T6I = FNMS(KP555570233, T6H, KP831469612 * T6G);
2731
                         T6L = FMA(KP831469612, T6J, KP555570233 * T6K);
2732
                         T6M = T6I - T6L;
2733
                         T7s = T6I + T6L;
2734
                    }
2735
                    {
2736
                         E T6Q, T6T, T73, T74;
2737
                         T6Q = T6O - T6P;
2738
                         T6T = T6R - T6S;
2739
                         T6U = FMA(KP471396736, T6Q, KP881921264 * T6T);
2740
                         T7c = FNMS(KP881921264, T6Q, KP471396736 * T6T);
2741
                         T73 = T5Z + T62;
2742
                         T74 = T3m + T3t;
2743
                         T75 = T73 - T74;
2744
                         T7r = T73 + T74;
2745
                    }
2746
                    {
2747
                         E T76, T77, T6X, T70;
2748
                         T76 = FNMS(KP555570233, T6J, KP831469612 * T6K);
2749
                         T77 = FMA(KP555570233, T6G, KP831469612 * T6H);
2750
                         T78 = T76 - T77;
2751
                         T7i = T77 + T76;
2752
                         T6X = T6V - T6W;
2753
                         T70 = T6Y - T6Z;
2754
                         T71 = FNMS(KP881921264, T70, KP471396736 * T6X);
2755
                         T7d = FMA(KP881921264, T6X, KP471396736 * T70);
2756
                    }
2757
                    {
2758
                         E T6N, T72, T7f, T7g;
2759
                         T6N = T6F + T6M;
2760
                         T72 = T6U + T71;
2761
                         ro[WS(os, 43)] = T6N - T72;
2762
                         ro[WS(os, 11)] = T6N + T72;
2763
                         T7f = T75 + T78;
2764
                         T7g = T7c + T7d;
2765
                         io[WS(os, 43)] = T7f - T7g;
2766
                         io[WS(os, 11)] = T7f + T7g;
2767
                    }
2768
                    {
2769
                         E T79, T7a, T7b, T7e;
2770
                         T79 = T75 - T78;
2771
                         T7a = T71 - T6U;
2772
                         io[WS(os, 59)] = T79 - T7a;
2773
                         io[WS(os, 27)] = T79 + T7a;
2774
                         T7b = T6F - T6M;
2775
                         T7e = T7c - T7d;
2776
                         ro[WS(os, 59)] = T7b - T7e;
2777
                         ro[WS(os, 27)] = T7b + T7e;
2778
                    }
2779
                    {
2780
                         E T7j, T7q, T7z, T7A;
2781
                         T7j = T7h + T7i;
2782
                         T7q = T7m + T7p;
2783
                         ro[WS(os, 35)] = T7j - T7q;
2784
                         ro[WS(os, 3)] = T7j + T7q;
2785
                         T7z = T7r + T7s;
2786
                         T7A = T7w + T7x;
2787
                         io[WS(os, 35)] = T7z - T7A;
2788
                         io[WS(os, 3)] = T7z + T7A;
2789
                    }
2790
                    {
2791
                         E T7t, T7u, T7v, T7y;
2792
                         T7t = T7r - T7s;
2793
                         T7u = T7p - T7m;
2794
                         io[WS(os, 51)] = T7t - T7u;
2795
                         io[WS(os, 19)] = T7t + T7u;
2796
                         T7v = T7h - T7i;
2797
                         T7y = T7w - T7x;
2798
                         ro[WS(os, 51)] = T7v - T7y;
2799
                         ro[WS(os, 19)] = T7v + T7y;
2800
                    }
2801
               }
2802
               {
2803
                    E T9j, T9V, Ta0, Taa, Ta3, Tab, T9q, Ta6, T9y, T9Q, T9J, Ta5, T9M, T9W, T9F;
2804
                    E T9R;
2805
                    {
2806
                         E T9h, T9i, T9Y, T9Z;
2807
                         T9h = T7B + T7C;
2808
                         T9i = T8J + T8I;
2809
                         T9j = T9h - T9i;
2810
                         T9V = T9h + T9i;
2811
                         T9Y = T9s + T9t;
2812
                         T9Z = T9v + T9w;
2813
                         Ta0 = FMA(KP995184726, T9Y, KP098017140 * T9Z);
2814
                         Taa = FNMS(KP098017140, T9Y, KP995184726 * T9Z);
2815
                    }
2816
                    {
2817
                         E Ta1, Ta2, T9m, T9p;
2818
                         Ta1 = T9z + T9A;
2819
                         Ta2 = T9C + T9D;
2820
                         Ta3 = FNMS(KP098017140, Ta2, KP995184726 * Ta1);
2821
                         Tab = FMA(KP098017140, Ta1, KP995184726 * Ta2);
2822
                         T9m = FNMS(KP195090322, T9l, KP980785280 * T9k);
2823
                         T9p = FMA(KP195090322, T9n, KP980785280 * T9o);
2824
                         T9q = T9m - T9p;
2825
                         Ta6 = T9m + T9p;
2826
                    }
2827
                    {
2828
                         E T9u, T9x, T9H, T9I;
2829
                         T9u = T9s - T9t;
2830
                         T9x = T9v - T9w;
2831
                         T9y = FMA(KP634393284, T9u, KP773010453 * T9x);
2832
                         T9Q = FNMS(KP773010453, T9u, KP634393284 * T9x);
2833
                         T9H = T8F + T8G;
2834
                         T9I = T7G + T7J;
2835
                         T9J = T9H - T9I;
2836
                         Ta5 = T9H + T9I;
2837
                    }
2838
                    {
2839
                         E T9K, T9L, T9B, T9E;
2840
                         T9K = FNMS(KP195090322, T9o, KP980785280 * T9n);
2841
                         T9L = FMA(KP980785280, T9l, KP195090322 * T9k);
2842
                         T9M = T9K - T9L;
2843
                         T9W = T9L + T9K;
2844
                         T9B = T9z - T9A;
2845
                         T9E = T9C - T9D;
2846
                         T9F = FNMS(KP773010453, T9E, KP634393284 * T9B);
2847
                         T9R = FMA(KP773010453, T9B, KP634393284 * T9E);
2848
                    }
2849
                    {
2850
                         E T9r, T9G, T9T, T9U;
2851
                         T9r = T9j + T9q;
2852
                         T9G = T9y + T9F;
2853
                         ro[WS(os, 41)] = T9r - T9G;
2854
                         ro[WS(os, 9)] = T9r + T9G;
2855
                         T9T = T9J + T9M;
2856
                         T9U = T9Q + T9R;
2857
                         io[WS(os, 41)] = T9T - T9U;
2858
                         io[WS(os, 9)] = T9T + T9U;
2859
                    }
2860
                    {
2861
                         E T9N, T9O, T9P, T9S;
2862
                         T9N = T9J - T9M;
2863
                         T9O = T9F - T9y;
2864
                         io[WS(os, 57)] = T9N - T9O;
2865
                         io[WS(os, 25)] = T9N + T9O;
2866
                         T9P = T9j - T9q;
2867
                         T9S = T9Q - T9R;
2868
                         ro[WS(os, 57)] = T9P - T9S;
2869
                         ro[WS(os, 25)] = T9P + T9S;
2870
                    }
2871
                    {
2872
                         E T9X, Ta4, Tad, Tae;
2873
                         T9X = T9V + T9W;
2874
                         Ta4 = Ta0 + Ta3;
2875
                         ro[WS(os, 33)] = T9X - Ta4;
2876
                         ro[WS(os, 1)] = T9X + Ta4;
2877
                         Tad = Ta5 + Ta6;
2878
                         Tae = Taa + Tab;
2879
                         io[WS(os, 33)] = Tad - Tae;
2880
                         io[WS(os, 1)] = Tad + Tae;
2881
                    }
2882
                    {
2883
                         E Ta7, Ta8, Ta9, Tac;
2884
                         Ta7 = Ta5 - Ta6;
2885
                         Ta8 = Ta3 - Ta0;
2886
                         io[WS(os, 49)] = Ta7 - Ta8;
2887
                         io[WS(os, 17)] = Ta7 + Ta8;
2888
                         Ta9 = T9V - T9W;
2889
                         Tac = Taa - Tab;
2890
                         ro[WS(os, 49)] = Ta9 - Tac;
2891
                         ro[WS(os, 17)] = Ta9 + Tac;
2892
                    }
2893
               }
2894
               {
2895
                    E T3v, T6j, T6o, T6y, T6r, T6z, T48, T6u, T52, T6e, T67, T6t, T6a, T6k, T5V;
2896
                    E T6f;
2897
                    {
2898
                         E T3f, T3u, T6m, T6n;
2899
                         T3f = T37 - T3e;
2900
                         T3u = T3m - T3t;
2901
                         T3v = T3f - T3u;
2902
                         T6j = T3f + T3u;
2903
                         T6m = T4q + T4N;
2904
                         T6n = T4X + T50;
2905
                         T6o = FMA(KP634393284, T6m, KP773010453 * T6n);
2906
                         T6y = FNMS(KP634393284, T6n, KP773010453 * T6m);
2907
                    }
2908
                    {
2909
                         E T6p, T6q, T3O, T47;
2910
                         T6p = T5j + T5G;
2911
                         T6q = T5Q + T5T;
2912
                         T6r = FNMS(KP634393284, T6q, KP773010453 * T6p);
2913
                         T6z = FMA(KP773010453, T6q, KP634393284 * T6p);
2914
                         T3O = FNMS(KP980785280, T3N, KP195090322 * T3G);
2915
                         T47 = FMA(KP195090322, T3Z, KP980785280 * T46);
2916
                         T48 = T3O - T47;
2917
                         T6u = T3O + T47;
2918
                    }
2919
                    {
2920
                         E T4O, T51, T63, T66;
2921
                         T4O = T4q - T4N;
2922
                         T51 = T4X - T50;
2923
                         T52 = FMA(KP995184726, T4O, KP098017140 * T51);
2924
                         T6e = FNMS(KP995184726, T51, KP098017140 * T4O);
2925
                         T63 = T5Z - T62;
2926
                         T66 = T64 - T65;
2927
                         T67 = T63 - T66;
2928
                         T6t = T63 + T66;
2929
                    }
2930
                    {
2931
                         E T68, T69, T5H, T5U;
2932
                         T68 = FNMS(KP980785280, T3Z, KP195090322 * T46);
2933
                         T69 = FMA(KP980785280, T3G, KP195090322 * T3N);
2934
                         T6a = T68 - T69;
2935
                         T6k = T69 + T68;
2936
                         T5H = T5j - T5G;
2937
                         T5U = T5Q - T5T;
2938
                         T5V = FNMS(KP995184726, T5U, KP098017140 * T5H);
2939
                         T6f = FMA(KP098017140, T5U, KP995184726 * T5H);
2940
                    }
2941
                    {
2942
                         E T49, T5W, T6h, T6i;
2943
                         T49 = T3v + T48;
2944
                         T5W = T52 + T5V;
2945
                         ro[WS(os, 47)] = T49 - T5W;
2946
                         ro[WS(os, 15)] = T49 + T5W;
2947
                         T6h = T67 + T6a;
2948
                         T6i = T6e + T6f;
2949
                         io[WS(os, 47)] = T6h - T6i;
2950
                         io[WS(os, 15)] = T6h + T6i;
2951
                    }
2952
                    {
2953
                         E T6b, T6c, T6d, T6g;
2954
                         T6b = T67 - T6a;
2955
                         T6c = T5V - T52;
2956
                         io[WS(os, 63)] = T6b - T6c;
2957
                         io[WS(os, 31)] = T6b + T6c;
2958
                         T6d = T3v - T48;
2959
                         T6g = T6e - T6f;
2960
                         ro[WS(os, 63)] = T6d - T6g;
2961
                         ro[WS(os, 31)] = T6d + T6g;
2962
                    }
2963
                    {
2964
                         E T6l, T6s, T6B, T6C;
2965
                         T6l = T6j + T6k;
2966
                         T6s = T6o + T6r;
2967
                         ro[WS(os, 39)] = T6l - T6s;
2968
                         ro[WS(os, 7)] = T6l + T6s;
2969
                         T6B = T6t + T6u;
2970
                         T6C = T6y + T6z;
2971
                         io[WS(os, 39)] = T6B - T6C;
2972
                         io[WS(os, 7)] = T6B + T6C;
2973
                    }
2974
                    {
2975
                         E T6v, T6w, T6x, T6A;
2976
                         T6v = T6t - T6u;
2977
                         T6w = T6r - T6o;
2978
                         io[WS(os, 55)] = T6v - T6w;
2979
                         io[WS(os, 23)] = T6v + T6w;
2980
                         T6x = T6j - T6k;
2981
                         T6A = T6y - T6z;
2982
                         ro[WS(os, 55)] = T6x - T6A;
2983
                         ro[WS(os, 23)] = T6x + T6A;
2984
                    }
2985
               }
2986
               {
2987
                    E T7L, T8X, T92, T9c, T95, T9d, T80, T98, T8k, T8S, T8L, T97, T8O, T8Y, T8D;
2988
                    E T8T;
2989
                    {
2990
                         E T7D, T7K, T90, T91;
2991
                         T7D = T7B - T7C;
2992
                         T7K = T7G - T7J;
2993
                         T7L = T7D - T7K;
2994
                         T8X = T7D + T7K;
2995
                         T90 = T84 + T8b;
2996
                         T91 = T8f + T8i;
2997
                         T92 = FMA(KP471396736, T90, KP881921264 * T91);
2998
                         T9c = FNMS(KP471396736, T91, KP881921264 * T90);
2999
                    }
3000
                    {
3001
                         E T93, T94, T7S, T7Z;
3002
                         T93 = T8n + T8u;
3003
                         T94 = T8y + T8B;
3004
                         T95 = FNMS(KP471396736, T94, KP881921264 * T93);
3005
                         T9d = FMA(KP881921264, T94, KP471396736 * T93);
3006
                         T7S = FNMS(KP831469612, T7R, KP555570233 * T7O);
3007
                         T7Z = FMA(KP831469612, T7V, KP555570233 * T7Y);
3008
                         T80 = T7S - T7Z;
3009
                         T98 = T7S + T7Z;
3010
                    }
3011
                    {
3012
                         E T8c, T8j, T8H, T8K;
3013
                         T8c = T84 - T8b;
3014
                         T8j = T8f - T8i;
3015
                         T8k = FMA(KP956940335, T8c, KP290284677 * T8j);
3016
                         T8S = FNMS(KP956940335, T8j, KP290284677 * T8c);
3017
                         T8H = T8F - T8G;
3018
                         T8K = T8I - T8J;
3019
                         T8L = T8H - T8K;
3020
                         T97 = T8H + T8K;
3021
                    }
3022
                    {
3023
                         E T8M, T8N, T8v, T8C;
3024
                         T8M = FNMS(KP831469612, T7Y, KP555570233 * T7V);
3025
                         T8N = FMA(KP555570233, T7R, KP831469612 * T7O);
3026
                         T8O = T8M - T8N;
3027
                         T8Y = T8N + T8M;
3028
                         T8v = T8n - T8u;
3029
                         T8C = T8y - T8B;
3030
                         T8D = FNMS(KP956940335, T8C, KP290284677 * T8v);
3031
                         T8T = FMA(KP290284677, T8C, KP956940335 * T8v);
3032
                    }
3033
                    {
3034
                         E T81, T8E, T8V, T8W;
3035
                         T81 = T7L + T80;
3036
                         T8E = T8k + T8D;
3037
                         ro[WS(os, 45)] = T81 - T8E;
3038
                         ro[WS(os, 13)] = T81 + T8E;
3039
                         T8V = T8L + T8O;
3040
                         T8W = T8S + T8T;
3041
                         io[WS(os, 45)] = T8V - T8W;
3042
                         io[WS(os, 13)] = T8V + T8W;
3043
                    }
3044
                    {
3045
                         E T8P, T8Q, T8R, T8U;
3046
                         T8P = T8L - T8O;
3047
                         T8Q = T8D - T8k;
3048
                         io[WS(os, 61)] = T8P - T8Q;
3049
                         io[WS(os, 29)] = T8P + T8Q;
3050
                         T8R = T7L - T80;
3051
                         T8U = T8S - T8T;
3052
                         ro[WS(os, 61)] = T8R - T8U;
3053
                         ro[WS(os, 29)] = T8R + T8U;
3054
                    }
3055
                    {
3056
                         E T8Z, T96, T9f, T9g;
3057
                         T8Z = T8X + T8Y;
3058
                         T96 = T92 + T95;
3059
                         ro[WS(os, 37)] = T8Z - T96;
3060
                         ro[WS(os, 5)] = T8Z + T96;
3061
                         T9f = T97 + T98;
3062
                         T9g = T9c + T9d;
3063
                         io[WS(os, 37)] = T9f - T9g;
3064
                         io[WS(os, 5)] = T9f + T9g;
3065
                    }
3066
                    {
3067
                         E T99, T9a, T9b, T9e;
3068
                         T99 = T97 - T98;
3069
                         T9a = T95 - T92;
3070
                         io[WS(os, 53)] = T99 - T9a;
3071
                         io[WS(os, 21)] = T99 + T9a;
3072
                         T9b = T8X - T8Y;
3073
                         T9e = T9c - T9d;
3074
                         ro[WS(os, 53)] = T9b - T9e;
3075
                         ro[WS(os, 21)] = T9b + T9e;
3076
                    }
3077
               }
3078
          }
3079
     }
3080
}
3081

    
3082
static const kdft_desc desc = { 64, "n1_64", {808, 144, 104, 0}, &GENUS, 0, 0, 0, 0 };
3083

    
3084
void X(codelet_n1_64) (planner *p) {
3085
     X(kdft_register) (p, n1_64, &desc);
3086
}
3087

    
3088
#endif