changeset 53:cfbb6c84d009

(none)
author idamnjanovic
date Mon, 14 Mar 2011 16:52:54 +0000
parents a8a32e130893
children 4fef33632323
files Problems/private/ompcore.c
diffstat 1 files changed, 409 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Problems/private/ompcore.c	Mon Mar 14 16:52:54 2011 +0000
@@ -0,0 +1,409 @@
+/**************************************************************************
+ *
+ * File name: ompcore.c
+ *
+ * Ron Rubinstein
+ * Computer Science Department
+ * Technion, Haifa 32000 Israel
+ * ronrubin@cs
+ *
+ * Last Updated: 25.8.2009
+ *
+ *************************************************************************/
+
+
+#include "ompcore.h"
+#include "omputils.h"
+#include "ompprof.h"
+#include "myblas.h"
+#include <math.h>
+#include <string.h>
+
+
+
+/******************************************************************************
+ *                                                                            *
+ *                           Batch-OMP Implementation                         *
+ *                                                                            *
+ ******************************************************************************/  
+
+mxArray* ompcore(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L,
+                 int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp)
+{
+  
+  profdata pd;
+  mxArray *Gamma;
+  mwIndex i, j, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count;
+  mwSize allocated_coefs, allocated_cols;
+  int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms;
+  double *alpha, *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2; 
+  double eps2, resnorm, delta, deltaprev, secs_remain;
+  int mins_remain, hrs_remain;
+  clock_t lastprint_time, starttime;
+ 
+  
+  
+  /*** status flags ***/
+  
+  DtX_specified = (DtX!=0);   /* indicates whether D'*x was provided */
+  XtX_specified = (XtX!=0);   /* indicates whether sum(x.*x) was provided */
+  
+  standardomp = (G==0);       /* batch-omp or standard omp are selected depending on availability of G */
+  batchomp = !standardomp;
+  
+  
+  
+  /*** allocate output matrix ***/
+  
+  
+  if (gamma_mode == FULL_GAMMA) {
+    
+    /* allocate full matrix of size m X L */
+    
+    Gamma = mxCreateDoubleMatrix(m, L, mxREAL);
+    gammaPr = mxGetPr(Gamma);
+    gammaIr = 0;
+    gammaJc = 0;
+  }
+  else {
+    
+    /* allocate sparse matrix with room for allocated_coefs nonzeros */
+    
+    /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */
+    allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T;
+    Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL);
+    gammaPr = mxGetPr(Gamma);
+    gammaIr = mxGetIr(Gamma);
+    gammaJc = mxGetJc(Gamma);
+    gamma_count = 0;
+    gammaJc[0] = 0;
+  }
+  
+  
+  /*** helper arrays ***/
+  
+  alpha = (double*)mxMalloc(m*sizeof(double));        /* contains D'*residual */
+  ind = (mwIndex*)mxMalloc(n*sizeof(mwIndex));        /* indices of selected atoms */
+  selected_atoms = (int*)mxMalloc(m*sizeof(int));     /* binary array with 1's for selected atoms */
+  c = (double*)mxMalloc(n*sizeof(double));            /* orthogonal projection result */
+  
+  /* current number of columns in Dsub / Gsub / Lchol */
+  allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T;
+  
+  /* Cholesky decomposition of D_I'*D_I */
+  Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double));
+
+  /* temporary vectors for various computations */
+  tempvec1 = (double*)mxMalloc(m*sizeof(double));
+  tempvec2 = (double*)mxMalloc(m*sizeof(double));
+  
+  if (batchomp) {
+    /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */
+    Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double));
+  }
+  else {
+    /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */
+    Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double));
+    
+    /* stores the residual */
+    r = (double*)mxMalloc(n*sizeof(double));        
+  }
+  
+  if (!DtX_specified) {
+    /* contains D'*x for the current signal */
+    DtX = (double*)mxMalloc(m*sizeof(double));  
+  }
+  
+  
+  
+  /*** initializations for error omp ***/
+  
+  if (erroromp) {
+    eps2 = eps*eps;        /* compute eps^2 */
+    if (T<0 || T>n) {      /* unspecified max atom num - set max atoms to n */
+      T = n;
+    }
+  }
+  
+  
+  
+  /*** initialize timers ***/
+  
+  initprofdata(&pd);             /* initialize profiling counters */
+  starttime = clock();           /* record starting time for eta computations */
+  lastprint_time = starttime;    /* time of last status display */
+  
+  
+  
+  /**********************   perform omp for each signal   **********************/
+  
+  
+  
+  for (signum=0; signum<L; ++signum) {
+    
+    
+    /* initialize residual norm and deltaprev for error-omp */
+    
+    if (erroromp) {
+      if (XtX_specified) {
+        resnorm = XtX[signum];
+      }
+      else {
+        resnorm = dotprod(x+n*signum, x+n*signum, n);
+        addproftime(&pd, XtX_TIME);
+      }
+      deltaprev = 0;     /* delta tracks the value of gamma'*G*gamma */
+    }
+    else {
+      /* ignore residual norm stopping criterion */
+      eps2 = 0;
+      resnorm = 1;
+    }
+    
+    
+    if (resnorm>eps2 && T>0) {
+      
+      /* compute DtX */
+      
+      if (!DtX_specified) {
+        matT_vec(1, D, x+n*signum, DtX, n, m);
+        addproftime(&pd, DtX_TIME);
+      }
+      
+      
+      /* initialize alpha := DtX */
+      
+      memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double));
+      
+      
+      /* mark all atoms as unselected */
+      
+      for (i=0; i<m; ++i) {
+        selected_atoms[i] = 0;
+      }
+      
+    }
+    
+
+    /* main loop */
+    
+    i=0;
+    while (resnorm>eps2 && i<T) {
+
+      /* index of next atom */
+      
+      pos = maxabs(alpha, m);
+      addproftime(&pd, MAXABS_TIME);
+      
+      
+      /* stop criterion: selected same atom twice, or inner product too small */
+      
+      if (selected_atoms[pos] || alpha[pos]*alpha[pos]<1e-14) {
+        break;
+      }
+      
+      
+      /* mark selected atom */
+      
+      ind[i] = pos;
+      selected_atoms[pos] = 1;
+      
+      
+      /* matrix reallocation */
+      
+      if (erroromp && i>=allocated_cols) {
+        
+        allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01);
+        
+        Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double));
+        
+        batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) :
+                   (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ;
+      }
+      
+      
+      /* append column to Gsub or Dsub */
+      
+      if (batchomp) {
+        memcpy(Gsub+i*m, G+pos*m, m*sizeof(double));
+      }
+      else {
+        memcpy(Dsub+i*n, D+pos*n, n*sizeof(double));
+      }
+      
+      
+      /*** Cholesky update ***/
+      
+      if (i==0) {
+        *Lchol = 1;
+      }
+      else {
+        
+        /* incremental Cholesky decomposition: compute next row of Lchol */
+        
+        if (standardomp) {
+          matT_vec(1, Dsub, D+n*pos, tempvec1, n, i);      /* compute tempvec1 := Dsub'*d where d is new atom */
+          addproftime(&pd, DtD_TIME);
+        }
+        else {
+          vec_assign(tempvec1, Gsub+i*m, ind, i);          /* extract tempvec1 := Gsub(ind,i) */
+        }
+        backsubst('L', Lchol, tempvec1, tempvec2, n, i);   /* compute tempvec2 = Lchol \ tempvec1 */
+        for (j=0; j<i; ++j) {                              /* write tempvec2 to end of Lchol */
+          Lchol[j*n+i] = tempvec2[j];
+        }
+        
+        /* compute Lchol(i,i) */
+        sum = 0;
+        for (j=0; j<i; ++j) {         /* compute sum of squares of last row without Lchol(i,i) */
+          sum += SQR(Lchol[j*n+i]);
+        }
+        if ( (1-sum) <= 1e-14 ) {     /* Lchol(i,i) is zero => selected atoms are dependent */
+          break;
+        }
+        Lchol[i*n+i] = sqrt(1-sum);
+      }
+      
+      addproftime(&pd, LCHOL_TIME);
+
+      i++;
+      
+      
+      /* perform orthogonal projection and compute sparse coefficients */
+      
+      vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i);   /* extract tempvec1 = DtX(ind) */
+      cholsolve('L', Lchol, tempvec1, c, n, i);                     /* solve LL'c = tempvec1 for c */
+      addproftime(&pd, COMPCOEF_TIME);
+      
+
+      /* update alpha = D'*residual */
+      
+      if (standardomp) {
+        mat_vec(-1, Dsub, c, r, n, i);             /* compute r := -Dsub*c */
+        vec_sum(1, x+n*signum, r, n);              /* compute r := x+r */
+        
+        
+        /*memcpy(r, x+n*signum, n*sizeof(double));   /* assign r := x */
+        /*mat_vec1(-1, Dsub, c, 1, r, n, i);         /* compute r := r-Dsub*c */
+        
+        addproftime(&pd, COMPRES_TIME);
+        matT_vec(1, D, r, alpha, n, m);            /* compute alpha := D'*r */
+        addproftime(&pd, DtR_TIME);
+        
+        /* update residual norm */
+        if (erroromp) {
+          resnorm = dotprod(r, r, n);
+          addproftime(&pd, UPDATE_RESNORM_TIME);
+        }
+      }
+      else {
+        mat_vec(1, Gsub, c, tempvec1, m, i);                              /* compute tempvec1 := Gsub*c */
+        memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double));    /* set alpha = D'*x */
+        vec_sum(-1, tempvec1, alpha, m);                                  /* compute alpha := alpha - tempvec1 */
+        addproftime(&pd, UPDATE_DtR_TIME);
+        
+        /* update residual norm */
+        if (erroromp) {
+          vec_assign(tempvec2, tempvec1, ind, i);      /* assign tempvec2 := tempvec1(ind) */
+          delta = dotprod(c,tempvec2,i);               /* compute c'*tempvec2 */
+          resnorm = resnorm - delta + deltaprev;       /* residual norm update */
+          deltaprev = delta;
+          addproftime(&pd, UPDATE_RESNORM_TIME);
+        }
+      }
+    }
+    
+    
+    /*** generate output vector gamma ***/
+
+    if (gamma_mode == FULL_GAMMA) {    /* write the coefs in c to their correct positions in gamma */
+      for (j=0; j<i; ++j) {
+        gammaPr[m*signum + ind[j]] = c[j];
+      }
+    }
+    else {
+      /* sort the coefs by index before writing them to gamma */
+      quicksort(ind,c,i);
+      addproftime(&pd, INDEXSORT_TIME);
+      
+      /* gamma is full - reallocate */
+      if (gamma_count+i >= allocated_coefs) {
+        
+        while(gamma_count+i >= allocated_coefs) {
+          allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01);
+        }
+        
+        mxSetNzmax(Gamma, allocated_coefs);
+        mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double)));
+        mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex)));
+        
+        gammaPr = mxGetPr(Gamma);
+        gammaIr = mxGetIr(Gamma);
+      }
+      
+      /* append coefs to gamma and update the indices */
+      for (j=0; j<i; ++j) {
+        gammaPr[gamma_count] = c[j];
+        gammaIr[gamma_count] = ind[j];
+        gamma_count++;
+      }
+      gammaJc[signum+1] = gammaJc[signum] + i;
+    }
+    
+    
+    
+    /*** display status messages ***/
+    
+    if (msg_delta>0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta)
+    {
+      lastprint_time = clock();
+      
+      /* estimated remainig time */
+      secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) ,
+        &hrs_remain, &mins_remain, &secs_remain);
+      
+      mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n",        
+        signum+1, L, hrs_remain, mins_remain, secs_remain);
+      mexEvalString("drawnow;");
+    }
+    
+  }
+  
+  /* end omp */
+  
+  
+  
+  /*** print final messages ***/
+  
+  if (msg_delta>0) {
+    mexPrintf("omp: signal %d / %d\n", signum, L);
+  }
+  
+  if (profile) {
+    printprofinfo(&pd, erroromp, batchomp, L);
+  }
+  
+  
+  
+  /* free memory */
+  
+  if (!DtX_specified) {
+    mxFree(DtX);
+  }
+  if (standardomp) {
+    mxFree(r);
+    mxFree(Dsub);
+  }
+  else {
+    mxFree(Gsub);
+  }  
+  mxFree(tempvec2);
+  mxFree(tempvec1);
+  mxFree(Lchol);
+  mxFree(c);
+  mxFree(selected_atoms);
+  mxFree(ind);
+  mxFree(alpha);
+  
+  return Gamma;
+}