idamnjanovic@60: /************************************************************************** idamnjanovic@60: * idamnjanovic@60: * File name: ompcore.c idamnjanovic@60: * idamnjanovic@60: * Ron Rubinstein idamnjanovic@60: * Computer Science Department idamnjanovic@60: * Technion, Haifa 32000 Israel idamnjanovic@60: * ronrubin@cs idamnjanovic@60: * idamnjanovic@60: * Last Updated: 25.8.2009 idamnjanovic@60: * idamnjanovic@60: *************************************************************************/ idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: #include "ompcore.h" idamnjanovic@60: #include "omputils.h" idamnjanovic@60: #include "ompprof.h" idamnjanovic@60: #include "myblas.h" idamnjanovic@60: #include idamnjanovic@60: #include idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /****************************************************************************** idamnjanovic@60: * * idamnjanovic@60: * Batch-OMP Implementation * idamnjanovic@60: * * idamnjanovic@60: ******************************************************************************/ idamnjanovic@60: idamnjanovic@60: mxArray* ompcore(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L, idamnjanovic@60: int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp) idamnjanovic@60: { idamnjanovic@60: idamnjanovic@60: profdata pd; idamnjanovic@60: mxArray *Gamma; idamnjanovic@60: mwIndex i, j, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count; idamnjanovic@60: mwSize allocated_coefs, allocated_cols; idamnjanovic@60: int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms; idamnjanovic@60: double *alpha, *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2; idamnjanovic@60: double eps2, resnorm, delta, deltaprev, secs_remain; idamnjanovic@60: int mins_remain, hrs_remain; idamnjanovic@60: clock_t lastprint_time, starttime; idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*** status flags ***/ idamnjanovic@60: idamnjanovic@60: DtX_specified = (DtX!=0); /* indicates whether D'*x was provided */ idamnjanovic@60: XtX_specified = (XtX!=0); /* indicates whether sum(x.*x) was provided */ idamnjanovic@60: idamnjanovic@60: standardomp = (G==0); /* batch-omp or standard omp are selected depending on availability of G */ idamnjanovic@60: batchomp = !standardomp; idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*** allocate output matrix ***/ idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: if (gamma_mode == FULL_GAMMA) { idamnjanovic@60: idamnjanovic@60: /* allocate full matrix of size m X L */ idamnjanovic@60: idamnjanovic@60: Gamma = mxCreateDoubleMatrix(m, L, mxREAL); idamnjanovic@60: gammaPr = mxGetPr(Gamma); idamnjanovic@60: gammaIr = 0; idamnjanovic@60: gammaJc = 0; idamnjanovic@60: } idamnjanovic@60: else { idamnjanovic@60: idamnjanovic@60: /* allocate sparse matrix with room for allocated_coefs nonzeros */ idamnjanovic@60: idamnjanovic@60: /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */ idamnjanovic@60: allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T; idamnjanovic@60: Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL); idamnjanovic@60: gammaPr = mxGetPr(Gamma); idamnjanovic@60: gammaIr = mxGetIr(Gamma); idamnjanovic@60: gammaJc = mxGetJc(Gamma); idamnjanovic@60: gamma_count = 0; idamnjanovic@60: gammaJc[0] = 0; idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*** helper arrays ***/ idamnjanovic@60: idamnjanovic@60: alpha = (double*)mxMalloc(m*sizeof(double)); /* contains D'*residual */ idamnjanovic@60: ind = (mwIndex*)mxMalloc(n*sizeof(mwIndex)); /* indices of selected atoms */ idamnjanovic@60: selected_atoms = (int*)mxMalloc(m*sizeof(int)); /* binary array with 1's for selected atoms */ idamnjanovic@60: c = (double*)mxMalloc(n*sizeof(double)); /* orthogonal projection result */ idamnjanovic@60: idamnjanovic@60: /* current number of columns in Dsub / Gsub / Lchol */ idamnjanovic@60: allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T; idamnjanovic@60: idamnjanovic@60: /* Cholesky decomposition of D_I'*D_I */ idamnjanovic@60: Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double)); idamnjanovic@60: idamnjanovic@60: /* temporary vectors for various computations */ idamnjanovic@60: tempvec1 = (double*)mxMalloc(m*sizeof(double)); idamnjanovic@60: tempvec2 = (double*)mxMalloc(m*sizeof(double)); idamnjanovic@60: idamnjanovic@60: if (batchomp) { idamnjanovic@60: /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */ idamnjanovic@60: Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double)); idamnjanovic@60: } idamnjanovic@60: else { idamnjanovic@60: /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */ idamnjanovic@60: Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double)); idamnjanovic@60: idamnjanovic@60: /* stores the residual */ idamnjanovic@60: r = (double*)mxMalloc(n*sizeof(double)); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: if (!DtX_specified) { idamnjanovic@60: /* contains D'*x for the current signal */ idamnjanovic@60: DtX = (double*)mxMalloc(m*sizeof(double)); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*** initializations for error omp ***/ idamnjanovic@60: idamnjanovic@60: if (erroromp) { idamnjanovic@60: eps2 = eps*eps; /* compute eps^2 */ idamnjanovic@60: if (T<0 || T>n) { /* unspecified max atom num - set max atoms to n */ idamnjanovic@60: T = n; idamnjanovic@60: } idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*** initialize timers ***/ idamnjanovic@60: idamnjanovic@60: initprofdata(&pd); /* initialize profiling counters */ idamnjanovic@60: starttime = clock(); /* record starting time for eta computations */ idamnjanovic@60: lastprint_time = starttime; /* time of last status display */ idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /********************** perform omp for each signal **********************/ idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: for (signum=0; signumeps2 && T>0) { idamnjanovic@60: idamnjanovic@60: /* compute DtX */ idamnjanovic@60: idamnjanovic@60: if (!DtX_specified) { idamnjanovic@60: matT_vec(1, D, x+n*signum, DtX, n, m); idamnjanovic@60: addproftime(&pd, DtX_TIME); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /* initialize alpha := DtX */ idamnjanovic@60: idamnjanovic@60: memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double)); idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /* mark all atoms as unselected */ idamnjanovic@60: idamnjanovic@60: for (i=0; ieps2 && i=allocated_cols) { idamnjanovic@60: idamnjanovic@60: allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01); idamnjanovic@60: idamnjanovic@60: Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double)); idamnjanovic@60: idamnjanovic@60: batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) : idamnjanovic@60: (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ; idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /* append column to Gsub or Dsub */ idamnjanovic@60: idamnjanovic@60: if (batchomp) { idamnjanovic@60: memcpy(Gsub+i*m, G+pos*m, m*sizeof(double)); idamnjanovic@60: } idamnjanovic@60: else { idamnjanovic@60: memcpy(Dsub+i*n, D+pos*n, n*sizeof(double)); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*** Cholesky update ***/ idamnjanovic@60: idamnjanovic@60: if (i==0) { idamnjanovic@60: *Lchol = 1; idamnjanovic@60: } idamnjanovic@60: else { idamnjanovic@60: idamnjanovic@60: /* incremental Cholesky decomposition: compute next row of Lchol */ idamnjanovic@60: idamnjanovic@60: if (standardomp) { idamnjanovic@60: matT_vec(1, Dsub, D+n*pos, tempvec1, n, i); /* compute tempvec1 := Dsub'*d where d is new atom */ idamnjanovic@60: addproftime(&pd, DtD_TIME); idamnjanovic@60: } idamnjanovic@60: else { idamnjanovic@60: vec_assign(tempvec1, Gsub+i*m, ind, i); /* extract tempvec1 := Gsub(ind,i) */ idamnjanovic@60: } idamnjanovic@60: backsubst('L', Lchol, tempvec1, tempvec2, n, i); /* compute tempvec2 = Lchol \ tempvec1 */ idamnjanovic@60: for (j=0; j selected atoms are dependent */ idamnjanovic@60: break; idamnjanovic@60: } idamnjanovic@60: Lchol[i*n+i] = sqrt(1-sum); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: addproftime(&pd, LCHOL_TIME); idamnjanovic@60: idamnjanovic@60: i++; idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /* perform orthogonal projection and compute sparse coefficients */ idamnjanovic@60: idamnjanovic@60: vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i); /* extract tempvec1 = DtX(ind) */ idamnjanovic@60: cholsolve('L', Lchol, tempvec1, c, n, i); /* solve LL'c = tempvec1 for c */ idamnjanovic@60: addproftime(&pd, COMPCOEF_TIME); idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /* update alpha = D'*residual */ idamnjanovic@60: idamnjanovic@60: if (standardomp) { idamnjanovic@60: mat_vec(-1, Dsub, c, r, n, i); /* compute r := -Dsub*c */ idamnjanovic@60: vec_sum(1, x+n*signum, r, n); /* compute r := x+r */ idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*memcpy(r, x+n*signum, n*sizeof(double)); /* assign r := x */ idamnjanovic@60: /*mat_vec1(-1, Dsub, c, 1, r, n, i); /* compute r := r-Dsub*c */ idamnjanovic@60: idamnjanovic@60: addproftime(&pd, COMPRES_TIME); idamnjanovic@60: matT_vec(1, D, r, alpha, n, m); /* compute alpha := D'*r */ idamnjanovic@60: addproftime(&pd, DtR_TIME); idamnjanovic@60: idamnjanovic@60: /* update residual norm */ idamnjanovic@60: if (erroromp) { idamnjanovic@60: resnorm = dotprod(r, r, n); idamnjanovic@60: addproftime(&pd, UPDATE_RESNORM_TIME); idamnjanovic@60: } idamnjanovic@60: } idamnjanovic@60: else { idamnjanovic@60: mat_vec(1, Gsub, c, tempvec1, m, i); /* compute tempvec1 := Gsub*c */ idamnjanovic@60: memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double)); /* set alpha = D'*x */ idamnjanovic@60: vec_sum(-1, tempvec1, alpha, m); /* compute alpha := alpha - tempvec1 */ idamnjanovic@60: addproftime(&pd, UPDATE_DtR_TIME); idamnjanovic@60: idamnjanovic@60: /* update residual norm */ idamnjanovic@60: if (erroromp) { idamnjanovic@60: vec_assign(tempvec2, tempvec1, ind, i); /* assign tempvec2 := tempvec1(ind) */ idamnjanovic@60: delta = dotprod(c,tempvec2,i); /* compute c'*tempvec2 */ idamnjanovic@60: resnorm = resnorm - delta + deltaprev; /* residual norm update */ idamnjanovic@60: deltaprev = delta; idamnjanovic@60: addproftime(&pd, UPDATE_RESNORM_TIME); idamnjanovic@60: } idamnjanovic@60: } idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*** generate output vector gamma ***/ idamnjanovic@60: idamnjanovic@60: if (gamma_mode == FULL_GAMMA) { /* write the coefs in c to their correct positions in gamma */ idamnjanovic@60: for (j=0; j= allocated_coefs) { idamnjanovic@60: idamnjanovic@60: while(gamma_count+i >= allocated_coefs) { idamnjanovic@60: allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: mxSetNzmax(Gamma, allocated_coefs); idamnjanovic@60: mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double))); idamnjanovic@60: mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex))); idamnjanovic@60: idamnjanovic@60: gammaPr = mxGetPr(Gamma); idamnjanovic@60: gammaIr = mxGetIr(Gamma); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: /* append coefs to gamma and update the indices */ idamnjanovic@60: for (j=0; j0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta) idamnjanovic@60: { idamnjanovic@60: lastprint_time = clock(); idamnjanovic@60: idamnjanovic@60: /* estimated remainig time */ idamnjanovic@60: secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) , idamnjanovic@60: &hrs_remain, &mins_remain, &secs_remain); idamnjanovic@60: idamnjanovic@60: mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n", idamnjanovic@60: signum+1, L, hrs_remain, mins_remain, secs_remain); idamnjanovic@60: mexEvalString("drawnow;"); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: /* end omp */ idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /*** print final messages ***/ idamnjanovic@60: idamnjanovic@60: if (msg_delta>0) { idamnjanovic@60: mexPrintf("omp: signal %d / %d\n", signum, L); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: if (profile) { idamnjanovic@60: printprofinfo(&pd, erroromp, batchomp, L); idamnjanovic@60: } idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: idamnjanovic@60: /* free memory */ idamnjanovic@60: idamnjanovic@60: if (!DtX_specified) { idamnjanovic@60: mxFree(DtX); idamnjanovic@60: } idamnjanovic@60: if (standardomp) { idamnjanovic@60: mxFree(r); idamnjanovic@60: mxFree(Dsub); idamnjanovic@60: } idamnjanovic@60: else { idamnjanovic@60: mxFree(Gsub); idamnjanovic@60: } idamnjanovic@60: mxFree(tempvec2); idamnjanovic@60: mxFree(tempvec1); idamnjanovic@60: mxFree(Lchol); idamnjanovic@60: mxFree(c); idamnjanovic@60: mxFree(selected_atoms); idamnjanovic@60: mxFree(ind); idamnjanovic@60: mxFree(alpha); idamnjanovic@60: idamnjanovic@60: return Gamma; idamnjanovic@60: }