annotate src/fftw-3.3.8/dft/scalar/codelets/n1_6.c @ 168:ceec0dd9ec9c

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 07 Feb 2020 11:51:13 +0000
parents bd3cc4d1df30
children
rev   line source
cannam@167 1 /*
cannam@167 2 * Copyright (c) 2003, 2007-14 Matteo Frigo
cannam@167 3 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
cannam@167 4 *
cannam@167 5 * This program is free software; you can redistribute it and/or modify
cannam@167 6 * it under the terms of the GNU General Public License as published by
cannam@167 7 * the Free Software Foundation; either version 2 of the License, or
cannam@167 8 * (at your option) any later version.
cannam@167 9 *
cannam@167 10 * This program is distributed in the hope that it will be useful,
cannam@167 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cannam@167 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cannam@167 13 * GNU General Public License for more details.
cannam@167 14 *
cannam@167 15 * You should have received a copy of the GNU General Public License
cannam@167 16 * along with this program; if not, write to the Free Software
cannam@167 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cannam@167 18 *
cannam@167 19 */
cannam@167 20
cannam@167 21 /* This file was automatically generated --- DO NOT EDIT */
cannam@167 22 /* Generated on Thu May 24 08:04:10 EDT 2018 */
cannam@167 23
cannam@167 24 #include "dft/codelet-dft.h"
cannam@167 25
cannam@167 26 #if defined(ARCH_PREFERS_FMA) || defined(ISA_EXTENSION_PREFERS_FMA)
cannam@167 27
cannam@167 28 /* Generated by: ../../../genfft/gen_notw.native -fma -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include dft/scalar/n.h */
cannam@167 29
cannam@167 30 /*
cannam@167 31 * This function contains 36 FP additions, 12 FP multiplications,
cannam@167 32 * (or, 24 additions, 0 multiplications, 12 fused multiply/add),
cannam@167 33 * 23 stack variables, 2 constants, and 24 memory accesses
cannam@167 34 */
cannam@167 35 #include "dft/scalar/n.h"
cannam@167 36
cannam@167 37 static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
cannam@167 38 {
cannam@167 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
cannam@167 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
cannam@167 41 {
cannam@167 42 INT i;
cannam@167 43 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
cannam@167 44 E T3, Tb, Tp, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tq;
cannam@167 45 E Ty;
cannam@167 46 {
cannam@167 47 E T1, T2, Tn, To;
cannam@167 48 T1 = ri[0];
cannam@167 49 T2 = ri[WS(is, 3)];
cannam@167 50 T3 = T1 - T2;
cannam@167 51 Tb = T1 + T2;
cannam@167 52 Tn = ii[0];
cannam@167 53 To = ii[WS(is, 3)];
cannam@167 54 Tp = Tn - To;
cannam@167 55 Tx = Tn + To;
cannam@167 56 }
cannam@167 57 {
cannam@167 58 E T4, T5, T7, T8;
cannam@167 59 T4 = ri[WS(is, 2)];
cannam@167 60 T5 = ri[WS(is, 5)];
cannam@167 61 T6 = T4 - T5;
cannam@167 62 Tc = T4 + T5;
cannam@167 63 T7 = ri[WS(is, 4)];
cannam@167 64 T8 = ri[WS(is, 1)];
cannam@167 65 T9 = T7 - T8;
cannam@167 66 Td = T7 + T8;
cannam@167 67 }
cannam@167 68 Ta = T6 + T9;
cannam@167 69 Te = Tc + Td;
cannam@167 70 {
cannam@167 71 E Tg, Th, Tj, Tk;
cannam@167 72 Tg = ii[WS(is, 2)];
cannam@167 73 Th = ii[WS(is, 5)];
cannam@167 74 Ti = Tg - Th;
cannam@167 75 Tu = Tg + Th;
cannam@167 76 Tj = ii[WS(is, 4)];
cannam@167 77 Tk = ii[WS(is, 1)];
cannam@167 78 Tl = Tj - Tk;
cannam@167 79 Tv = Tj + Tk;
cannam@167 80 }
cannam@167 81 Tq = Ti + Tl;
cannam@167 82 Ty = Tu + Tv;
cannam@167 83 ro[WS(os, 3)] = T3 + Ta;
cannam@167 84 io[WS(os, 3)] = Tp + Tq;
cannam@167 85 ro[0] = Tb + Te;
cannam@167 86 io[0] = Tx + Ty;
cannam@167 87 {
cannam@167 88 E Tf, Tm, Tr, Ts;
cannam@167 89 Tf = FNMS(KP500000000, Ta, T3);
cannam@167 90 Tm = Ti - Tl;
cannam@167 91 ro[WS(os, 5)] = FNMS(KP866025403, Tm, Tf);
cannam@167 92 ro[WS(os, 1)] = FMA(KP866025403, Tm, Tf);
cannam@167 93 Tr = FNMS(KP500000000, Tq, Tp);
cannam@167 94 Ts = T9 - T6;
cannam@167 95 io[WS(os, 1)] = FMA(KP866025403, Ts, Tr);
cannam@167 96 io[WS(os, 5)] = FNMS(KP866025403, Ts, Tr);
cannam@167 97 }
cannam@167 98 {
cannam@167 99 E Tt, Tw, Tz, TA;
cannam@167 100 Tt = FNMS(KP500000000, Te, Tb);
cannam@167 101 Tw = Tu - Tv;
cannam@167 102 ro[WS(os, 2)] = FNMS(KP866025403, Tw, Tt);
cannam@167 103 ro[WS(os, 4)] = FMA(KP866025403, Tw, Tt);
cannam@167 104 Tz = FNMS(KP500000000, Ty, Tx);
cannam@167 105 TA = Td - Tc;
cannam@167 106 io[WS(os, 2)] = FNMS(KP866025403, TA, Tz);
cannam@167 107 io[WS(os, 4)] = FMA(KP866025403, TA, Tz);
cannam@167 108 }
cannam@167 109 }
cannam@167 110 }
cannam@167 111 }
cannam@167 112
cannam@167 113 static const kdft_desc desc = { 6, "n1_6", {24, 0, 12, 0}, &GENUS, 0, 0, 0, 0 };
cannam@167 114
cannam@167 115 void X(codelet_n1_6) (planner *p) {
cannam@167 116 X(kdft_register) (p, n1_6, &desc);
cannam@167 117 }
cannam@167 118
cannam@167 119 #else
cannam@167 120
cannam@167 121 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include dft/scalar/n.h */
cannam@167 122
cannam@167 123 /*
cannam@167 124 * This function contains 36 FP additions, 8 FP multiplications,
cannam@167 125 * (or, 32 additions, 4 multiplications, 4 fused multiply/add),
cannam@167 126 * 23 stack variables, 2 constants, and 24 memory accesses
cannam@167 127 */
cannam@167 128 #include "dft/scalar/n.h"
cannam@167 129
cannam@167 130 static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
cannam@167 131 {
cannam@167 132 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
cannam@167 133 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
cannam@167 134 {
cannam@167 135 INT i;
cannam@167 136 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
cannam@167 137 E T3, Tb, Tq, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tr;
cannam@167 138 E Ty;
cannam@167 139 {
cannam@167 140 E T1, T2, To, Tp;
cannam@167 141 T1 = ri[0];
cannam@167 142 T2 = ri[WS(is, 3)];
cannam@167 143 T3 = T1 - T2;
cannam@167 144 Tb = T1 + T2;
cannam@167 145 To = ii[0];
cannam@167 146 Tp = ii[WS(is, 3)];
cannam@167 147 Tq = To - Tp;
cannam@167 148 Tx = To + Tp;
cannam@167 149 }
cannam@167 150 {
cannam@167 151 E T4, T5, T7, T8;
cannam@167 152 T4 = ri[WS(is, 2)];
cannam@167 153 T5 = ri[WS(is, 5)];
cannam@167 154 T6 = T4 - T5;
cannam@167 155 Tc = T4 + T5;
cannam@167 156 T7 = ri[WS(is, 4)];
cannam@167 157 T8 = ri[WS(is, 1)];
cannam@167 158 T9 = T7 - T8;
cannam@167 159 Td = T7 + T8;
cannam@167 160 }
cannam@167 161 Ta = T6 + T9;
cannam@167 162 Te = Tc + Td;
cannam@167 163 {
cannam@167 164 E Tg, Th, Tj, Tk;
cannam@167 165 Tg = ii[WS(is, 2)];
cannam@167 166 Th = ii[WS(is, 5)];
cannam@167 167 Ti = Tg - Th;
cannam@167 168 Tu = Tg + Th;
cannam@167 169 Tj = ii[WS(is, 4)];
cannam@167 170 Tk = ii[WS(is, 1)];
cannam@167 171 Tl = Tj - Tk;
cannam@167 172 Tv = Tj + Tk;
cannam@167 173 }
cannam@167 174 Tr = Ti + Tl;
cannam@167 175 Ty = Tu + Tv;
cannam@167 176 ro[WS(os, 3)] = T3 + Ta;
cannam@167 177 io[WS(os, 3)] = Tq + Tr;
cannam@167 178 ro[0] = Tb + Te;
cannam@167 179 io[0] = Tx + Ty;
cannam@167 180 {
cannam@167 181 E Tf, Tm, Tn, Ts;
cannam@167 182 Tf = FNMS(KP500000000, Ta, T3);
cannam@167 183 Tm = KP866025403 * (Ti - Tl);
cannam@167 184 ro[WS(os, 5)] = Tf - Tm;
cannam@167 185 ro[WS(os, 1)] = Tf + Tm;
cannam@167 186 Tn = KP866025403 * (T9 - T6);
cannam@167 187 Ts = FNMS(KP500000000, Tr, Tq);
cannam@167 188 io[WS(os, 1)] = Tn + Ts;
cannam@167 189 io[WS(os, 5)] = Ts - Tn;
cannam@167 190 }
cannam@167 191 {
cannam@167 192 E Tt, Tw, Tz, TA;
cannam@167 193 Tt = FNMS(KP500000000, Te, Tb);
cannam@167 194 Tw = KP866025403 * (Tu - Tv);
cannam@167 195 ro[WS(os, 2)] = Tt - Tw;
cannam@167 196 ro[WS(os, 4)] = Tt + Tw;
cannam@167 197 Tz = FNMS(KP500000000, Ty, Tx);
cannam@167 198 TA = KP866025403 * (Td - Tc);
cannam@167 199 io[WS(os, 2)] = Tz - TA;
cannam@167 200 io[WS(os, 4)] = TA + Tz;
cannam@167 201 }
cannam@167 202 }
cannam@167 203 }
cannam@167 204 }
cannam@167 205
cannam@167 206 static const kdft_desc desc = { 6, "n1_6", {32, 4, 4, 0}, &GENUS, 0, 0, 0, 0 };
cannam@167 207
cannam@167 208 void X(codelet_n1_6) (planner *p) {
cannam@167 209 X(kdft_register) (p, n1_6, &desc);
cannam@167 210 }
cannam@167 211
cannam@167 212 #endif