view Lib/fftw-3.2.1/dft/simd/t.c @ 3:005e311b5e62

Fixed memory leak. :) Need to fix Debug FFTW now though.
author Geogaddi\David <d.m.ronan@qmul.ac.uk>
date Fri, 10 Jul 2015 00:33:15 +0100
parents 25bf17994ef1
children
line wrap: on
line source
/*
 * Copyright (c) 2003, 2007-8 Matteo Frigo
 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include "codelet-dft.h"

#if HAVE_SIMD

#include "simd.h"

static int okp_common(const ct_desc *d,
		      const R *rio, const R *iio, 
		      INT rs, INT vs, INT m, INT mb, INT me, INT ms,
		      const planner *plnr)
{
     UNUSED(rio); UNUSED(iio);
     return (RIGHT_CPU()
	     && !NO_SIMDP(plnr)
	     && SIMD_STRIDE_OKA(rs)
	     && SIMD_VSTRIDE_OKA(ms)
             && (m % VL) == 0
             && (mb % VL) == 0
             && (me % VL) == 0
	     && (!d->rs || (d->rs == rs))
	     && (!d->vs || (d->vs == vs))
	     && (!d->ms || (d->ms == ms))
	  );
}

static int okp_commonu(const ct_desc *d,
		      const R *rio, const R *iio, 
		      INT rs, INT vs, INT m, INT mb, INT me, INT ms,
		      const planner *plnr)
{
     UNUSED(rio); UNUSED(iio); UNUSED(m);
     return (RIGHT_CPU()
	     && !NO_SIMDP(plnr)
	     && SIMD_STRIDE_OK(rs)
	     && SIMD_VSTRIDE_OK(ms)
             && (mb % VL) == 0
             && (me % VL) == 0
	     && (!d->rs || (d->rs == rs))
	     && (!d->vs || (d->vs == vs))
	     && (!d->ms || (d->ms == ms))
	  );
}

static int okp_t1f(const ct_desc *d,
		   const R *rio, const R *iio, 
		   INT rs, INT vs, INT m, INT mb, INT me, INT ms,
		   const planner *plnr)
{
     return  okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
	  && iio == rio + 1
	  && ALIGNEDA(rio);
}

extern const ct_genus X(dft_t1fsimd_genus);
const ct_genus X(dft_t1fsimd_genus) = { okp_t1f, VL };

static int okp_t1fu(const ct_desc *d,
		    const R *rio, const R *iio, 
		    INT rs, INT vs, INT m, INT mb, INT me, INT ms,
		    const planner *plnr)
{
     return  okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
	  && iio == rio + 1
	  && ALIGNED(rio);
}

extern const ct_genus X(dft_t1fusimd_genus);
const ct_genus X(dft_t1fusimd_genus) = { okp_t1fu, VL };

static int okp_t1b(const ct_desc *d,
		   const R *rio, const R *iio, 
		   INT rs, INT vs, INT m, INT mb, INT me, INT ms,
		   const planner *plnr)
{
     return  okp_common(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
	  && rio == iio + 1
	  && ALIGNEDA(iio);
}

extern const ct_genus X(dft_t1bsimd_genus);
const ct_genus X(dft_t1bsimd_genus) = { okp_t1b, VL };

static int okp_t1bu(const ct_desc *d,					\
		   const R *rio, const R *iio, 				\
		   INT rs, INT vs, INT m, INT mb, INT me, INT ms,	\
		   const planner *plnr)					\
{									\
     return  okp_commonu(d, rio, iio, rs, vs, m, mb, me, ms, plnr)	\
	  && rio == iio + 1						\
	  && ALIGNED(iio);						\
}

extern const ct_genus X(dft_t1busimd_genus);
const ct_genus X(dft_t1busimd_genus) = { okp_t1bu, VL };

/* use t2* codelets only when n = m*radix is small, because
   t2* codelets use ~2n twiddle factors (instead of ~n) */
static int small_enough(const ct_desc *d, INT m)
{
     return m * d->radix <= 16384;
}

static int okp_t2f(const ct_desc *d,
		   const R *rio, const R *iio, 
		   INT rs, INT vs, INT m, INT mb, INT me, INT ms,
		   const planner *plnr)
{
     return  okp_t1f(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
	  && small_enough(d, m);
}

extern const ct_genus X(dft_t2fsimd_genus);
const ct_genus X(dft_t2fsimd_genus) = { okp_t2f, VL };

static int okp_t2b(const ct_desc *d,
		   const R *rio, const R *iio, 
		   INT rs, INT vs, INT m, INT mb, INT me, INT ms,
		   const planner *plnr)
{
     return  okp_t1b(d, rio, iio, rs, vs, m, mb, me, ms, plnr)
	  && small_enough(d, m);
}

extern const ct_genus X(dft_t2bsimd_genus);
const ct_genus X(dft_t2bsimd_genus) = { okp_t2b, VL };

#endif /* HAVE_SIMD */