view Lib/fftw-3.2.1/cell/spu/.svn/text-base/copy.spuc.svn-base @ 7:c6f38cba266d

Cleaned up redundant code
author Geogaddi\David <d.m.ronan@qmul.ac.uk>
date Wed, 22 Jul 2015 15:14:58 +0100
parents 25bf17994ef1
children
line wrap: on
line source
/* -*- C -*- */
/*
 * Copyright (c) 2007 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include "fftw-spu.h"
#include "../fftw-cell.h"

#define BLOCKSZ 64
void X(spu_do_copy)(const struct copy_context *c)
{
     int n = c->n, v = c->v, nspe = c->nspe, my_id = c->my_id;
     int is_bytes = c->is_bytes, os_bytes = c->os_bytes;
     int ivs_bytes = c->ivs_bytes, ovs_bytes = c->ovs_bytes;
     int nblock;
     int in, iv, nn, nv;
     R *A, *Aalign;

     X(spu_alloc_reset)();
     A = X(spu_alloc)(BLOCKSZ * BLOCKSZ * 2 * sizeof(R) + ALIGNMENT);

     nblock = 0;
     nv = BLOCKSZ;
     for (iv = 0; iv < v; iv += nv) {
	  if (nv > v - iv) nv = v - iv;
	  if (nv == 1)
	       nn = BLOCKSZ * BLOCKSZ / 8; /* large 1D copy, heuristic */
	  else
	       nn = BLOCKSZ; /* 2D copy */
	  for (in = 0; in < n; in += nn) {
	       if (nn > n - in) nn = n - in;

	       if ((nblock++ % nspe) != my_id)
		    continue; /* block is not ours */

	       Aalign = A;
	       X(spu_dma2d)(A, c->I + in * is_bytes + iv * ivs_bytes,
			    nn, is_bytes, nv, ivs_bytes,
			    MFC_GET_CMD);
	       X(spu_dma2d)(A, c->O + in * os_bytes + iv * ovs_bytes,
			    nn, os_bytes, nv, ovs_bytes,
			    MFC_PUT_CMD);
	  }
     }
}