view Lib/fftw-3.2.1/cell/spu/.svn/text-base/planner.spuc.svn-base @ 1:e86e9c111b29

Updates stuff that potentially fixes the memory leak and also makes it work on Windows and Linux (Need to test). Still have to fix fftw include for linux in Jucer.
author David Ronan <d.m.ronan@qmul.ac.uk>
date Thu, 09 Jul 2015 15:01:32 +0100
parents 25bf17994ef1
children
line wrap: on
line source
/* -*- C -*- */
/*
 * Copyright (c) 2007 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include <math.h>
#include <stdio.h>
#include <string.h>
#include <spu_intrinsics.h>
#include "fftw-spu.h"
#include "../fftw-cell.h"

#undef DEBUG

const double twopi = 6.2831853071795864769252867665590057683943388;

R *fill_twiddle(R *W, int r, int m)
{
     int i, j, v, n = r * m;

     for (j = 0; j < m; j += VL) {
	  for (i = 1; i < r; ++i) {
	       for (v = 0; v < VL; ++v) {
		    *W++ = cos(twopi * i * (j+v) / n);
		    *W++ = sin(twopi * i * (j+v) / n);
	       }
	  }
     }
     return W;
}

R A[2*MAX_N] __attribute__ ((aligned (16)));
R B[2*MAX_N] __attribute__ ((aligned (16)));
R W[2*MAX_N] __attribute__ ((aligned (16)));

void fill_plan(struct spu_plan *p)
{
     p->W = W;     
     while (p->r > 0) {
	  p[1].W = fill_twiddle(p->W, p->r, p->m);
	  ++p;
     }
}

void print_plan(struct spu_plan *p)
{
     while (p->r > 0) {
	  printf("[r:%d m:%d] ", p->r, p->m);
	  ++p;
     }
     printf("\n");
}

unsigned decrementer(void)
{
     return spu_readch(SPU_RdDec);
}

void check(int n)
{
     double maxerr = 0;
     int i, j;

     for (i = 0; i < n; ++i) {
	  double xr = 0, xi = 0;
	  for (j = 0; j < n; ++j) {
	       double c = cos(twopi * i * j / n);
	       double s = -sin(twopi * i * j / n);
	       xr += A[2*j] * c - A[2*j+1] * s;
	       xi += A[2*j] * s + A[2*j+1] * c;
	  }

	  xr -= B[2*i]; if (xr < 0) xr = -xr;
	  xi -= B[2*i+1]; if (xi < 0) xi = -xi;
	  if (xr > maxerr) maxerr = xr;
	  if (xi > maxerr) maxerr = xi;
     }
     printf("%4d %16.10g\n", n, maxerr);
}

struct spu_plan the_plan[15];
struct spu_plan best_plan[15];
int best_time = 10000000;

void doit(void)
{
     int i;
     int b, e;

     fill_plan(the_plan);

     b = decrementer();
     for (i = 0; i < 100; ++i)
	  X(spu_execute_plan)(the_plan, A, B);
     e = decrementer();

     b -= e;

     if (b <= best_time) {

	  memcpy(best_plan, the_plan, sizeof(the_plan));
	  best_time = b;
     }
}

void genall(struct spu_plan *p, int n, int v)
{
     int r;

     if (n < sizeof(X(n2fvtab))  / sizeof(X(n2fvtab)[0])
	 && X(n2fvtab)[n] && (v % VL) == 0) {
	  p->r = -n;
	  doit();
     }

     for (r = 2; r < sizeof(X(t1fvtab))  / sizeof(X(t1fvtab)[0]); ++r)
	  if (n % r == 0 && X(t1fvtab)[r] && ((n / r) % VL) == 0) {
	       p->r = r;
	       p->m = n / r;
	       genall(p+1, p->m, v > 1 ? v : p->r);
	  }
}

#ifdef DEBUG
int main( int argc, char *argv[])
{  
     int n;
     int i;

     spu_writech(SPU_WrDec, -1);

     for (n = 1; n <= MAX_N; ++n) {
	  best_time = 1000000000;
	  best_plan[0].r = 0; /* invalid plan */

	  genall(the_plan, n, 1);

	  if (best_plan[0].r) {
	       printf("%5d: %8d   ", n, best_time);
	       print_plan(best_plan);

	       for (i = 0; i < 2 * n; ++i)
		    A[i] = ((i*i)%n+1) / (double)n;

	       fill_plan(best_plan);
	       X(spu_execute_plan)(best_plan, A, B);	  
	  
	       check(n);
	  }
     }
}

#else
/* output a C file containing a compact representation of all plans
   from 1 to MAX_N */
int main( int argc, char *argv[])
{  
     int n;
     struct spu_plan *p;

     spu_writech(SPU_WrDec, -1);
     printf("/* automatically generated by spu/planner; DO NOT EDIT */\n\n");
     printf("#include \"ifftw.h\"\n\n");
     printf("#ifdef HAVE_CELL\n\n");
     printf("#include \"fftw-cell.h\"\n\n");
     printf("const struct spu_radices X(spu_radices)[%d] = {\n", 
	    (MAX_N/REQUIRE_N_MULTIPLE_OF) + 1);
     for (n = 0; n <= MAX_N; n += REQUIRE_N_MULTIPLE_OF) {
	  best_time = 1000000000;
	  best_plan[0].r = 0; /* invalid plan */

	  if (n > 0)
	       genall(the_plan, n, 1);

	  printf("  /* [%4d] */ {{", n);

	  if (best_plan[0].r) {
	       p = best_plan;
	       for (p = best_plan; ; ++p) {
		    printf(" %d", p->r);
		    if (p->r < 0) break;
		    printf(",");
	       } 
	  } else {
	       printf(" 0");
	  }
	  printf(" }},\n");
     }
     printf("};\n\n");
     printf("#endif /* HAVE_CELL */\n\n");
     return 0;
}
#endif