Mercurial > hg > batch-feature-extraction-tool
view Lib/fftw-3.2.1/reodft/reodft00e-splitradix.c @ 1:e86e9c111b29
Updates stuff that potentially fixes the memory leak and also makes it work on Windows and Linux (Need to test). Still have to fix fftw include for linux in Jucer.
author | David Ronan <d.m.ronan@qmul.ac.uk> |
---|---|
date | Thu, 09 Jul 2015 15:01:32 +0100 |
parents | 25bf17994ef1 |
children |
line wrap: on
line source
/* * Copyright (c) 2005 Matteo Frigo * Copyright (c) 2005 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* Do an R{E,O}DFT00 problem (of an odd length n) recursively via an R{E,O}DFT00 problem and an RDFT problem of half the length. This works by "logically" expanding the array to a real-even/odd DFT of length 2n-/+2 and then applying the split-radix algorithm. In this way, we can avoid having to pad to twice the length (ala redft00-r2hc-pad), saving a factor of ~2 for n=2^m+/-1, but don't incur the accuracy loss that the "ordinary" algorithm sacrifices (ala redft00-r2hc.c). */ #include "reodft.h" typedef struct { solver super; } S; typedef struct { plan_rdft super; plan *clde, *cldo; twid *td; INT is, os; INT n; INT vl; INT ivs, ovs; } P; /* redft00 */ static void apply_e(const plan *ego_, R *I, R *O) { const P *ego = (const P *) ego_; INT is = ego->is, os = ego->os; INT i, j, n = ego->n + 1, n2 = (n-1)/2; INT iv, vl = ego->vl; INT ivs = ego->ivs, ovs = ego->ovs; R *W = ego->td->W - 2; R *buf; buf = (R *) MALLOC(sizeof(R) * n2, BUFFERS); for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { /* do size (n-1)/2 r2hc transform of odd-indexed elements with stride 4, "wrapping around" end of array with even boundary conditions */ for (j = 0, i = 1; i < n; i += 4) buf[j++] = I[is * i]; for (i = 2*n-2-i; i > 0; i -= 4) buf[j++] = I[is * i]; { plan_rdft *cld = (plan_rdft *) ego->cldo; cld->apply((plan *) cld, buf, buf); } /* do size (n+1)/2 redft00 of the even-indexed elements, writing to O: */ { plan_rdft *cld = (plan_rdft *) ego->clde; cld->apply((plan *) cld, I, O); } /* combine the results with the twiddle factors to get output */ { /* DC element */ E b20 = O[0], b0 = K(2.0) * buf[0]; O[0] = b20 + b0; O[2*(n2*os)] = b20 - b0; /* O[n2*os] = O[n2*os]; */ } for (i = 1; i < n2 - i; ++i) { E ap, am, br, bi, wr, wi, wbr, wbi; br = buf[i]; bi = buf[n2 - i]; wr = W[2*i]; wi = W[2*i+1]; #if FFT_SIGN == -1 wbr = K(2.0) * (wr*br + wi*bi); wbi = K(2.0) * (wr*bi - wi*br); #else wbr = K(2.0) * (wr*br - wi*bi); wbi = K(2.0) * (wr*bi + wi*br); #endif ap = O[i*os]; O[i*os] = ap + wbr; O[(2*n2 - i)*os] = ap - wbr; am = O[(n2 - i)*os]; #if FFT_SIGN == -1 O[(n2 - i)*os] = am - wbi; O[(n2 + i)*os] = am + wbi; #else O[(n2 - i)*os] = am + wbi; O[(n2 + i)*os] = am - wbi; #endif } if (i == n2 - i) { /* Nyquist element */ E ap, wbr; wbr = K(2.0) * (W[2*i] * buf[i]); ap = O[i*os]; O[i*os] = ap + wbr; O[(2*n2 - i)*os] = ap - wbr; } } X(ifree)(buf); } /* rodft00 */ static void apply_o(const plan *ego_, R *I, R *O) { const P *ego = (const P *) ego_; INT is = ego->is, os = ego->os; INT i, j, n = ego->n - 1, n2 = (n+1)/2; INT iv, vl = ego->vl; INT ivs = ego->ivs, ovs = ego->ovs; R *W = ego->td->W - 2; R *buf; buf = (R *) MALLOC(sizeof(R) * n2, BUFFERS); for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) { /* do size (n+1)/2 r2hc transform of even-indexed elements with stride 4, "wrapping around" end of array with odd boundary conditions */ for (j = 0, i = 0; i < n; i += 4) buf[j++] = I[is * i]; for (i = 2*n-i; i > 0; i -= 4) buf[j++] = -I[is * i]; { plan_rdft *cld = (plan_rdft *) ego->cldo; cld->apply((plan *) cld, buf, buf); } /* do size (n-1)/2 rodft00 of the odd-indexed elements, writing to O: */ { plan_rdft *cld = (plan_rdft *) ego->clde; if (I == O) { /* can't use I+is and I, subplan would lose in-placeness */ cld->apply((plan *) cld, I + is, I + is); /* we could maybe avoid this copy by modifying the twiddle loop, but currently I can't be bothered. */ A(is >= os); for (i = 0; i < n2-1; ++i) O[os*i] = I[is*(i+1)]; } else cld->apply((plan *) cld, I + is, O); } /* combine the results with the twiddle factors to get output */ O[(n2-1)*os] = K(2.0) * buf[0]; for (i = 1; i < n2 - i; ++i) { E ap, am, br, bi, wr, wi, wbr, wbi; br = buf[i]; bi = buf[n2 - i]; wr = W[2*i]; wi = W[2*i+1]; #if FFT_SIGN == -1 wbr = K(2.0) * (wr*br + wi*bi); wbi = K(2.0) * (wi*br - wr*bi); #else wbr = K(2.0) * (wr*br - wi*bi); wbi = K(2.0) * (wr*bi + wi*br); #endif ap = O[(i-1)*os]; O[(i-1)*os] = wbi + ap; O[(2*n2-1 - i)*os] = wbi - ap; am = O[(n2-1 - i)*os]; #if FFT_SIGN == -1 O[(n2-1 - i)*os] = wbr + am; O[(n2-1 + i)*os] = wbr - am; #else O[(n2-1 - i)*os] = wbr + am; O[(n2-1 + i)*os] = wbr - am; #endif } if (i == n2 - i) { /* Nyquist element */ E ap, wbi; wbi = K(2.0) * (W[2*i+1] * buf[i]); ap = O[(i-1)*os]; O[(i-1)*os] = wbi + ap; O[(2*n2-1 - i)*os] = wbi - ap; } } X(ifree)(buf); } static void awake(plan *ego_, enum wakefulness wakefulness) { P *ego = (P *) ego_; static const tw_instr reodft00e_tw[] = { { TW_COS, 1, 1 }, { TW_SIN, 1, 1 }, { TW_NEXT, 1, 0 } }; X(plan_awake)(ego->clde, wakefulness); X(plan_awake)(ego->cldo, wakefulness); X(twiddle_awake)(wakefulness, &ego->td, reodft00e_tw, 2*ego->n, 1, ego->n/4); } static void destroy(plan *ego_) { P *ego = (P *) ego_; X(plan_destroy_internal)(ego->cldo); X(plan_destroy_internal)(ego->clde); } static void print(const plan *ego_, printer *p) { const P *ego = (const P *) ego_; if (ego->super.apply == apply_e) p->print(p, "(redft00e-splitradix-%D%v%(%p%)%(%p%))", ego->n + 1, ego->vl, ego->clde, ego->cldo); else p->print(p, "(rodft00e-splitradix-%D%v%(%p%)%(%p%))", ego->n - 1, ego->vl, ego->clde, ego->cldo); } static int applicable0(const solver *ego_, const problem *p_) { const problem_rdft *p = (const problem_rdft *) p_; UNUSED(ego_); return (1 && p->sz->rnk == 1 && p->vecsz->rnk <= 1 && (p->kind[0] == REDFT00 || p->kind[0] == RODFT00) && p->sz->dims[0].n > 1 /* don't create size-0 sub-plans */ && p->sz->dims[0].n % 2 /* odd: 4 divides "logical" DFT */ && (p->I != p->O || p->vecsz->rnk == 0 || p->vecsz->dims[0].is == p->vecsz->dims[0].os) && (p->kind[0] != RODFT00 || p->I != p->O || p->sz->dims[0].is >= p->sz->dims[0].os) /* laziness */ ); } static int applicable(const solver *ego, const problem *p, const planner *plnr) { return (!NO_SLOWP(plnr) && applicable0(ego, p)); } static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr) { P *pln; const problem_rdft *p; plan *clde, *cldo; R *buf; INT n, n0; opcnt ops; int inplace_odd; static const plan_adt padt = { X(rdft_solve), awake, print, destroy }; if (!applicable(ego_, p_, plnr)) return (plan *)0; p = (const problem_rdft *) p_; n = (n0 = p->sz->dims[0].n) + (p->kind[0] == REDFT00 ? (INT)-1 : (INT)1); A(n > 0 && n % 2 == 0); buf = (R *) MALLOC(sizeof(R) * (n/2), BUFFERS); inplace_odd = p->kind[0]==RODFT00 && p->I == p->O; clde = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)( X(mktensor_1d)(n0-n/2, 2*p->sz->dims[0].is, inplace_odd ? p->sz->dims[0].is : p->sz->dims[0].os), X(mktensor_0d)(), TAINT(p->I + p->sz->dims[0].is * (p->kind[0]==RODFT00), p->vecsz->rnk ? p->vecsz->dims[0].is : 0), TAINT(p->O + p->sz->dims[0].is * inplace_odd, p->vecsz->rnk ? p->vecsz->dims[0].os : 0), p->kind[0])); if (!clde) { X(ifree)(buf); return (plan *)0; } cldo = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)( X(mktensor_1d)(n/2, 1, 1), X(mktensor_0d)(), buf, buf, R2HC)); X(ifree)(buf); if (!cldo) return (plan *)0; pln = MKPLAN_RDFT(P, &padt, p->kind[0] == REDFT00 ? apply_e : apply_o); pln->n = n; pln->is = p->sz->dims[0].is; pln->os = p->sz->dims[0].os; pln->clde = clde; pln->cldo = cldo; pln->td = 0; X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs); X(ops_zero)(&ops); ops.other = n/2; ops.add = (p->kind[0]==REDFT00 ? (INT)2 : (INT)0) + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2; ops.mul = 1 + (n/2-1)/2 * 6 + ((n/2)%2==0) * 2; /* tweak ops.other so that r2hc-pad is used for small sizes, which seems to be a lot faster on my machine: */ ops.other += 256; X(ops_zero)(&pln->super.super.ops); X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops); X(ops_madd2)(pln->vl, &clde->ops, &pln->super.super.ops); X(ops_madd2)(pln->vl, &cldo->ops, &pln->super.super.ops); return &(pln->super.super); } /* constructor */ static solver *mksolver(void) { static const solver_adt sadt = { PROBLEM_RDFT, mkplan, 0 }; S *slv = MKSOLVER(S, &sadt); return &(slv->super); } void X(reodft00e_splitradix_register)(planner *p) { REGISTER_SOLVER(p, mksolver()); }