annotate src/fftw-3.3.8/dft/scalar/codelets/n1_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents d0c2a83c1364
children
rev   line source
Chris@82 1 /*
Chris@82 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
Chris@82 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
Chris@82 4 *
Chris@82 5 * This program is free software; you can redistribute it and/or modify
Chris@82 6 * it under the terms of the GNU General Public License as published by
Chris@82 7 * the Free Software Foundation; either version 2 of the License, or
Chris@82 8 * (at your option) any later version.
Chris@82 9 *
Chris@82 10 * This program is distributed in the hope that it will be useful,
Chris@82 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@82 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@82 13 * GNU General Public License for more details.
Chris@82 14 *
Chris@82 15 * You should have received a copy of the GNU General Public License
Chris@82 16 * along with this program; if not, write to the Free Software
Chris@82 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@82 18 *
Chris@82 19 */
Chris@82 20
Chris@82 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@82 22 /* Generated on Thu May 24 08:04:10 EDT 2018 */
Chris@82 23
Chris@82 24 #include "dft/codelet-dft.h"
Chris@82 25
Chris@82 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
Chris@82 27
Chris@82 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include dft/scalar/n.h */
Chris@82 29
Chris@82 30 /*
Chris@82 31 * This function contains 36 FP additions, 12 FP multiplications,
Chris@82 32 * (or, 24 additions, 0 multiplications, 12 fused multiply/add),
Chris@82 33 * 23 stack variables, 2 constants, and 24 memory accesses
Chris@82 34 */
Chris@82 35 #include "dft/scalar/n.h"
Chris@82 36
Chris@82 37 static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 38 {
Chris@82 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 41 {
Chris@82 42 INT i;
Chris@82 43 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
Chris@82 44 E T3, Tb, Tp, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tq;
Chris@82 45 E Ty;
Chris@82 46 {
Chris@82 47 E T1, T2, Tn, To;
Chris@82 48 T1 = ri[0];
Chris@82 49 T2 = ri[WS(is, 3)];
Chris@82 50 T3 = T1 - T2;
Chris@82 51 Tb = T1 + T2;
Chris@82 52 Tn = ii[0];
Chris@82 53 To = ii[WS(is, 3)];
Chris@82 54 Tp = Tn - To;
Chris@82 55 Tx = Tn + To;
Chris@82 56 }
Chris@82 57 {
Chris@82 58 E T4, T5, T7, T8;
Chris@82 59 T4 = ri[WS(is, 2)];
Chris@82 60 T5 = ri[WS(is, 5)];
Chris@82 61 T6 = T4 - T5;
Chris@82 62 Tc = T4 + T5;
Chris@82 63 T7 = ri[WS(is, 4)];
Chris@82 64 T8 = ri[WS(is, 1)];
Chris@82 65 T9 = T7 - T8;
Chris@82 66 Td = T7 + T8;
Chris@82 67 }
Chris@82 68 Ta = T6 + T9;
Chris@82 69 Te = Tc + Td;
Chris@82 70 {
Chris@82 71 E Tg, Th, Tj, Tk;
Chris@82 72 Tg = ii[WS(is, 2)];
Chris@82 73 Th = ii[WS(is, 5)];
Chris@82 74 Ti = Tg - Th;
Chris@82 75 Tu = Tg + Th;
Chris@82 76 Tj = ii[WS(is, 4)];
Chris@82 77 Tk = ii[WS(is, 1)];
Chris@82 78 Tl = Tj - Tk;
Chris@82 79 Tv = Tj + Tk;
Chris@82 80 }
Chris@82 81 Tq = Ti + Tl;
Chris@82 82 Ty = Tu + Tv;
Chris@82 83 ro[WS(os, 3)] = T3 + Ta;
Chris@82 84 io[WS(os, 3)] = Tp + Tq;
Chris@82 85 ro[0] = Tb + Te;
Chris@82 86 io[0] = Tx + Ty;
Chris@82 87 {
Chris@82 88 E Tf, Tm, Tr, Ts;
Chris@82 89 Tf = FNMS(KP500000000, Ta, T3);
Chris@82 90 Tm = Ti - Tl;
Chris@82 91 ro[WS(os, 5)] = FNMS(KP866025403, Tm, Tf);
Chris@82 92 ro[WS(os, 1)] = FMA(KP866025403, Tm, Tf);
Chris@82 93 Tr = FNMS(KP500000000, Tq, Tp);
Chris@82 94 Ts = T9 - T6;
Chris@82 95 io[WS(os, 1)] = FMA(KP866025403, Ts, Tr);
Chris@82 96 io[WS(os, 5)] = FNMS(KP866025403, Ts, Tr);
Chris@82 97 }
Chris@82 98 {
Chris@82 99 E Tt, Tw, Tz, TA;
Chris@82 100 Tt = FNMS(KP500000000, Te, Tb);
Chris@82 101 Tw = Tu - Tv;
Chris@82 102 ro[WS(os, 2)] = FNMS(KP866025403, Tw, Tt);
Chris@82 103 ro[WS(os, 4)] = FMA(KP866025403, Tw, Tt);
Chris@82 104 Tz = FNMS(KP500000000, Ty, Tx);
Chris@82 105 TA = Td - Tc;
Chris@82 106 io[WS(os, 2)] = FNMS(KP866025403, TA, Tz);
Chris@82 107 io[WS(os, 4)] = FMA(KP866025403, TA, Tz);
Chris@82 108 }
Chris@82 109 }
Chris@82 110 }
Chris@82 111 }
Chris@82 112
Chris@82 113 static const kdft_desc desc = { 6, "n1_6", {24, 0, 12, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 114
Chris@82 115 void X(codelet_n1_6) (planner *p) {
Chris@82 116 X(kdft_register) (p, n1_6, &desc);
Chris@82 117 }
Chris@82 118
Chris@82 119 #else
Chris@82 120
Chris@82 121 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include dft/scalar/n.h */
Chris@82 122
Chris@82 123 /*
Chris@82 124 * This function contains 36 FP additions, 8 FP multiplications,
Chris@82 125 * (or, 32 additions, 4 multiplications, 4 fused multiply/add),
Chris@82 126 * 23 stack variables, 2 constants, and 24 memory accesses
Chris@82 127 */
Chris@82 128 #include "dft/scalar/n.h"
Chris@82 129
Chris@82 130 static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@82 131 {
Chris@82 132 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@82 133 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@82 134 {
Chris@82 135 INT i;
Chris@82 136 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
Chris@82 137 E T3, Tb, Tq, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tr;
Chris@82 138 E Ty;
Chris@82 139 {
Chris@82 140 E T1, T2, To, Tp;
Chris@82 141 T1 = ri[0];
Chris@82 142 T2 = ri[WS(is, 3)];
Chris@82 143 T3 = T1 - T2;
Chris@82 144 Tb = T1 + T2;
Chris@82 145 To = ii[0];
Chris@82 146 Tp = ii[WS(is, 3)];
Chris@82 147 Tq = To - Tp;
Chris@82 148 Tx = To + Tp;
Chris@82 149 }
Chris@82 150 {
Chris@82 151 E T4, T5, T7, T8;
Chris@82 152 T4 = ri[WS(is, 2)];
Chris@82 153 T5 = ri[WS(is, 5)];
Chris@82 154 T6 = T4 - T5;
Chris@82 155 Tc = T4 + T5;
Chris@82 156 T7 = ri[WS(is, 4)];
Chris@82 157 T8 = ri[WS(is, 1)];
Chris@82 158 T9 = T7 - T8;
Chris@82 159 Td = T7 + T8;
Chris@82 160 }
Chris@82 161 Ta = T6 + T9;
Chris@82 162 Te = Tc + Td;
Chris@82 163 {
Chris@82 164 E Tg, Th, Tj, Tk;
Chris@82 165 Tg = ii[WS(is, 2)];
Chris@82 166 Th = ii[WS(is, 5)];
Chris@82 167 Ti = Tg - Th;
Chris@82 168 Tu = Tg + Th;
Chris@82 169 Tj = ii[WS(is, 4)];
Chris@82 170 Tk = ii[WS(is, 1)];
Chris@82 171 Tl = Tj - Tk;
Chris@82 172 Tv = Tj + Tk;
Chris@82 173 }
Chris@82 174 Tr = Ti + Tl;
Chris@82 175 Ty = Tu + Tv;
Chris@82 176 ro[WS(os, 3)] = T3 + Ta;
Chris@82 177 io[WS(os, 3)] = Tq + Tr;
Chris@82 178 ro[0] = Tb + Te;
Chris@82 179 io[0] = Tx + Ty;
Chris@82 180 {
Chris@82 181 E Tf, Tm, Tn, Ts;
Chris@82 182 Tf = FNMS(KP500000000, Ta, T3);
Chris@82 183 Tm = KP866025403 * (Ti - Tl);
Chris@82 184 ro[WS(os, 5)] = Tf - Tm;
Chris@82 185 ro[WS(os, 1)] = Tf + Tm;
Chris@82 186 Tn = KP866025403 * (T9 - T6);
Chris@82 187 Ts = FNMS(KP500000000, Tr, Tq);
Chris@82 188 io[WS(os, 1)] = Tn + Ts;
Chris@82 189 io[WS(os, 5)] = Ts - Tn;
Chris@82 190 }
Chris@82 191 {
Chris@82 192 E Tt, Tw, Tz, TA;
Chris@82 193 Tt = FNMS(KP500000000, Te, Tb);
Chris@82 194 Tw = KP866025403 * (Tu - Tv);
Chris@82 195 ro[WS(os, 2)] = Tt - Tw;
Chris@82 196 ro[WS(os, 4)] = Tt + Tw;
Chris@82 197 Tz = FNMS(KP500000000, Ty, Tx);
Chris@82 198 TA = KP866025403 * (Td - Tc);
Chris@82 199 io[WS(os, 2)] = Tz - TA;
Chris@82 200 io[WS(os, 4)] = TA + Tz;
Chris@82 201 }
Chris@82 202 }
Chris@82 203 }
Chris@82 204 }
Chris@82 205
Chris@82 206 static const kdft_desc desc = { 6, "n1_6", {32, 4, 4, 0}, &GENUS, 0, 0, 0, 0 };
Chris@82 207
Chris@82 208 void X(codelet_n1_6) (planner *p) {
Chris@82 209 X(kdft_register) (p, n1_6, &desc);
Chris@82 210 }
Chris@82 211
Chris@82 212 #endif