annotate src/fftw-3.3.3/dft/scalar/codelets/n1_6.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents 37bf6b4a2645
children
rev   line source
Chris@10 1 /*
Chris@10 2 * Copyright (c) 2003, 2007-11 Matteo Frigo
Chris@10 3 * Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
Chris@10 4 *
Chris@10 5 * This program is free software; you can redistribute it and/or modify
Chris@10 6 * it under the terms of the GNU General Public License as published by
Chris@10 7 * the Free Software Foundation; either version 2 of the License, or
Chris@10 8 * (at your option) any later version.
Chris@10 9 *
Chris@10 10 * This program is distributed in the hope that it will be useful,
Chris@10 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@10 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@10 13 * GNU General Public License for more details.
Chris@10 14 *
Chris@10 15 * You should have received a copy of the GNU General Public License
Chris@10 16 * along with this program; if not, write to the Free Software
Chris@10 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Chris@10 18 *
Chris@10 19 */
Chris@10 20
Chris@10 21 /* This file was automatically generated --- DO NOT EDIT */
Chris@10 22 /* Generated on Sun Nov 25 07:35:42 EST 2012 */
Chris@10 23
Chris@10 24 #include "codelet-dft.h"
Chris@10 25
Chris@10 26 #ifdef HAVE_FMA
Chris@10 27
Chris@10 28 /* Generated by: ../../../genfft/gen_notw.native -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include n.h */
Chris@10 29
Chris@10 30 /*
Chris@10 31 * This function contains 36 FP additions, 12 FP multiplications,
Chris@10 32 * (or, 24 additions, 0 multiplications, 12 fused multiply/add),
Chris@10 33 * 30 stack variables, 2 constants, and 24 memory accesses
Chris@10 34 */
Chris@10 35 #include "n.h"
Chris@10 36
Chris@10 37 static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 38 {
Chris@10 39 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@10 40 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@10 41 {
Chris@10 42 INT i;
Chris@10 43 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
Chris@10 44 E TA, Tz;
Chris@10 45 {
Chris@10 46 E Tb, T3, Tx, Tp, Tj, Te, Ts, Ta, Tu, Ti, Tk;
Chris@10 47 {
Chris@10 48 E T1, T2, Tn, To;
Chris@10 49 T1 = ri[0];
Chris@10 50 T2 = ri[WS(is, 3)];
Chris@10 51 Tn = ii[0];
Chris@10 52 To = ii[WS(is, 3)];
Chris@10 53 {
Chris@10 54 E T4, T5, T7, T8;
Chris@10 55 T4 = ri[WS(is, 2)];
Chris@10 56 Tb = T1 + T2;
Chris@10 57 T3 = T1 - T2;
Chris@10 58 Tx = Tn + To;
Chris@10 59 Tp = Tn - To;
Chris@10 60 T5 = ri[WS(is, 5)];
Chris@10 61 T7 = ri[WS(is, 4)];
Chris@10 62 T8 = ri[WS(is, 1)];
Chris@10 63 {
Chris@10 64 E Tg, Tc, T6, Td, T9, Th;
Chris@10 65 Tg = ii[WS(is, 2)];
Chris@10 66 Tc = T4 + T5;
Chris@10 67 T6 = T4 - T5;
Chris@10 68 Td = T7 + T8;
Chris@10 69 T9 = T7 - T8;
Chris@10 70 Th = ii[WS(is, 5)];
Chris@10 71 Tj = ii[WS(is, 4)];
Chris@10 72 Te = Tc + Td;
Chris@10 73 TA = Td - Tc;
Chris@10 74 Ts = T9 - T6;
Chris@10 75 Ta = T6 + T9;
Chris@10 76 Tu = Tg + Th;
Chris@10 77 Ti = Tg - Th;
Chris@10 78 Tk = ii[WS(is, 1)];
Chris@10 79 }
Chris@10 80 }
Chris@10 81 }
Chris@10 82 ro[WS(os, 3)] = T3 + Ta;
Chris@10 83 ro[0] = Tb + Te;
Chris@10 84 {
Chris@10 85 E Tf, Tv, Tl, Ty, Tr;
Chris@10 86 Tf = FNMS(KP500000000, Ta, T3);
Chris@10 87 Tv = Tj + Tk;
Chris@10 88 Tl = Tj - Tk;
Chris@10 89 {
Chris@10 90 E Tt, Tw, Tq, Tm;
Chris@10 91 Tt = FNMS(KP500000000, Te, Tb);
Chris@10 92 Ty = Tu + Tv;
Chris@10 93 Tw = Tu - Tv;
Chris@10 94 Tq = Ti + Tl;
Chris@10 95 Tm = Ti - Tl;
Chris@10 96 io[0] = Tx + Ty;
Chris@10 97 ro[WS(os, 1)] = FMA(KP866025403, Tm, Tf);
Chris@10 98 ro[WS(os, 5)] = FNMS(KP866025403, Tm, Tf);
Chris@10 99 Tr = FNMS(KP500000000, Tq, Tp);
Chris@10 100 io[WS(os, 3)] = Tp + Tq;
Chris@10 101 ro[WS(os, 2)] = FNMS(KP866025403, Tw, Tt);
Chris@10 102 ro[WS(os, 4)] = FMA(KP866025403, Tw, Tt);
Chris@10 103 }
Chris@10 104 io[WS(os, 5)] = FNMS(KP866025403, Ts, Tr);
Chris@10 105 io[WS(os, 1)] = FMA(KP866025403, Ts, Tr);
Chris@10 106 Tz = FNMS(KP500000000, Ty, Tx);
Chris@10 107 }
Chris@10 108 }
Chris@10 109 io[WS(os, 4)] = FMA(KP866025403, TA, Tz);
Chris@10 110 io[WS(os, 2)] = FNMS(KP866025403, TA, Tz);
Chris@10 111 }
Chris@10 112 }
Chris@10 113 }
Chris@10 114
Chris@10 115 static const kdft_desc desc = { 6, "n1_6", {24, 0, 12, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 116
Chris@10 117 void X(codelet_n1_6) (planner *p) {
Chris@10 118 X(kdft_register) (p, n1_6, &desc);
Chris@10 119 }
Chris@10 120
Chris@10 121 #else /* HAVE_FMA */
Chris@10 122
Chris@10 123 /* Generated by: ../../../genfft/gen_notw.native -compact -variables 4 -pipeline-latency 4 -n 6 -name n1_6 -include n.h */
Chris@10 124
Chris@10 125 /*
Chris@10 126 * This function contains 36 FP additions, 8 FP multiplications,
Chris@10 127 * (or, 32 additions, 4 multiplications, 4 fused multiply/add),
Chris@10 128 * 23 stack variables, 2 constants, and 24 memory accesses
Chris@10 129 */
Chris@10 130 #include "n.h"
Chris@10 131
Chris@10 132 static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
Chris@10 133 {
Chris@10 134 DK(KP866025403, +0.866025403784438646763723170752936183471402627);
Chris@10 135 DK(KP500000000, +0.500000000000000000000000000000000000000000000);
Chris@10 136 {
Chris@10 137 INT i;
Chris@10 138 for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
Chris@10 139 E T3, Tb, Tq, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tr;
Chris@10 140 E Ty;
Chris@10 141 {
Chris@10 142 E T1, T2, To, Tp;
Chris@10 143 T1 = ri[0];
Chris@10 144 T2 = ri[WS(is, 3)];
Chris@10 145 T3 = T1 - T2;
Chris@10 146 Tb = T1 + T2;
Chris@10 147 To = ii[0];
Chris@10 148 Tp = ii[WS(is, 3)];
Chris@10 149 Tq = To - Tp;
Chris@10 150 Tx = To + Tp;
Chris@10 151 }
Chris@10 152 {
Chris@10 153 E T4, T5, T7, T8;
Chris@10 154 T4 = ri[WS(is, 2)];
Chris@10 155 T5 = ri[WS(is, 5)];
Chris@10 156 T6 = T4 - T5;
Chris@10 157 Tc = T4 + T5;
Chris@10 158 T7 = ri[WS(is, 4)];
Chris@10 159 T8 = ri[WS(is, 1)];
Chris@10 160 T9 = T7 - T8;
Chris@10 161 Td = T7 + T8;
Chris@10 162 }
Chris@10 163 Ta = T6 + T9;
Chris@10 164 Te = Tc + Td;
Chris@10 165 {
Chris@10 166 E Tg, Th, Tj, Tk;
Chris@10 167 Tg = ii[WS(is, 2)];
Chris@10 168 Th = ii[WS(is, 5)];
Chris@10 169 Ti = Tg - Th;
Chris@10 170 Tu = Tg + Th;
Chris@10 171 Tj = ii[WS(is, 4)];
Chris@10 172 Tk = ii[WS(is, 1)];
Chris@10 173 Tl = Tj - Tk;
Chris@10 174 Tv = Tj + Tk;
Chris@10 175 }
Chris@10 176 Tr = Ti + Tl;
Chris@10 177 Ty = Tu + Tv;
Chris@10 178 ro[WS(os, 3)] = T3 + Ta;
Chris@10 179 io[WS(os, 3)] = Tq + Tr;
Chris@10 180 ro[0] = Tb + Te;
Chris@10 181 io[0] = Tx + Ty;
Chris@10 182 {
Chris@10 183 E Tf, Tm, Tn, Ts;
Chris@10 184 Tf = FNMS(KP500000000, Ta, T3);
Chris@10 185 Tm = KP866025403 * (Ti - Tl);
Chris@10 186 ro[WS(os, 5)] = Tf - Tm;
Chris@10 187 ro[WS(os, 1)] = Tf + Tm;
Chris@10 188 Tn = KP866025403 * (T9 - T6);
Chris@10 189 Ts = FNMS(KP500000000, Tr, Tq);
Chris@10 190 io[WS(os, 1)] = Tn + Ts;
Chris@10 191 io[WS(os, 5)] = Ts - Tn;
Chris@10 192 }
Chris@10 193 {
Chris@10 194 E Tt, Tw, Tz, TA;
Chris@10 195 Tt = FNMS(KP500000000, Te, Tb);
Chris@10 196 Tw = KP866025403 * (Tu - Tv);
Chris@10 197 ro[WS(os, 2)] = Tt - Tw;
Chris@10 198 ro[WS(os, 4)] = Tt + Tw;
Chris@10 199 Tz = FNMS(KP500000000, Ty, Tx);
Chris@10 200 TA = KP866025403 * (Td - Tc);
Chris@10 201 io[WS(os, 2)] = Tz - TA;
Chris@10 202 io[WS(os, 4)] = TA + Tz;
Chris@10 203 }
Chris@10 204 }
Chris@10 205 }
Chris@10 206 }
Chris@10 207
Chris@10 208 static const kdft_desc desc = { 6, "n1_6", {32, 4, 4, 0}, &GENUS, 0, 0, 0, 0 };
Chris@10 209
Chris@10 210 void X(codelet_n1_6) (planner *p) {
Chris@10 211 X(kdft_register) (p, n1_6, &desc);
Chris@10 212 }
Chris@10 213
Chris@10 214 #endif /* HAVE_FMA */