annotate DL/RLS-DLA/private/ompcore.c @ 60:ad36f80e2ccf

(none)
author idamnjanovic
date Tue, 15 Mar 2011 12:20:59 +0000
parents
children
rev   line source
idamnjanovic@60 1 /**************************************************************************
idamnjanovic@60 2 *
idamnjanovic@60 3 * File name: ompcore.c
idamnjanovic@60 4 *
idamnjanovic@60 5 * Ron Rubinstein
idamnjanovic@60 6 * Computer Science Department
idamnjanovic@60 7 * Technion, Haifa 32000 Israel
idamnjanovic@60 8 * ronrubin@cs
idamnjanovic@60 9 *
idamnjanovic@60 10 * Last Updated: 25.8.2009
idamnjanovic@60 11 *
idamnjanovic@60 12 *************************************************************************/
idamnjanovic@60 13
idamnjanovic@60 14
idamnjanovic@60 15 #include "ompcore.h"
idamnjanovic@60 16 #include "omputils.h"
idamnjanovic@60 17 #include "ompprof.h"
idamnjanovic@60 18 #include "myblas.h"
idamnjanovic@60 19 #include <math.h>
idamnjanovic@60 20 #include <string.h>
idamnjanovic@60 21
idamnjanovic@60 22
idamnjanovic@60 23
idamnjanovic@60 24 /******************************************************************************
idamnjanovic@60 25 * *
idamnjanovic@60 26 * Batch-OMP Implementation *
idamnjanovic@60 27 * *
idamnjanovic@60 28 ******************************************************************************/
idamnjanovic@60 29
idamnjanovic@60 30 mxArray* ompcore(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L,
idamnjanovic@60 31 int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp)
idamnjanovic@60 32 {
idamnjanovic@60 33
idamnjanovic@60 34 profdata pd;
idamnjanovic@60 35 mxArray *Gamma;
idamnjanovic@60 36 mwIndex i, j, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count;
idamnjanovic@60 37 mwSize allocated_coefs, allocated_cols;
idamnjanovic@60 38 int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms;
idamnjanovic@60 39 double *alpha, *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2;
idamnjanovic@60 40 double eps2, resnorm, delta, deltaprev, secs_remain;
idamnjanovic@60 41 int mins_remain, hrs_remain;
idamnjanovic@60 42 clock_t lastprint_time, starttime;
idamnjanovic@60 43
idamnjanovic@60 44
idamnjanovic@60 45
idamnjanovic@60 46 /*** status flags ***/
idamnjanovic@60 47
idamnjanovic@60 48 DtX_specified = (DtX!=0); /* indicates whether D'*x was provided */
idamnjanovic@60 49 XtX_specified = (XtX!=0); /* indicates whether sum(x.*x) was provided */
idamnjanovic@60 50
idamnjanovic@60 51 standardomp = (G==0); /* batch-omp or standard omp are selected depending on availability of G */
idamnjanovic@60 52 batchomp = !standardomp;
idamnjanovic@60 53
idamnjanovic@60 54
idamnjanovic@60 55
idamnjanovic@60 56 /*** allocate output matrix ***/
idamnjanovic@60 57
idamnjanovic@60 58
idamnjanovic@60 59 if (gamma_mode == FULL_GAMMA) {
idamnjanovic@60 60
idamnjanovic@60 61 /* allocate full matrix of size m X L */
idamnjanovic@60 62
idamnjanovic@60 63 Gamma = mxCreateDoubleMatrix(m, L, mxREAL);
idamnjanovic@60 64 gammaPr = mxGetPr(Gamma);
idamnjanovic@60 65 gammaIr = 0;
idamnjanovic@60 66 gammaJc = 0;
idamnjanovic@60 67 }
idamnjanovic@60 68 else {
idamnjanovic@60 69
idamnjanovic@60 70 /* allocate sparse matrix with room for allocated_coefs nonzeros */
idamnjanovic@60 71
idamnjanovic@60 72 /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */
idamnjanovic@60 73 allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T;
idamnjanovic@60 74 Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL);
idamnjanovic@60 75 gammaPr = mxGetPr(Gamma);
idamnjanovic@60 76 gammaIr = mxGetIr(Gamma);
idamnjanovic@60 77 gammaJc = mxGetJc(Gamma);
idamnjanovic@60 78 gamma_count = 0;
idamnjanovic@60 79 gammaJc[0] = 0;
idamnjanovic@60 80 }
idamnjanovic@60 81
idamnjanovic@60 82
idamnjanovic@60 83 /*** helper arrays ***/
idamnjanovic@60 84
idamnjanovic@60 85 alpha = (double*)mxMalloc(m*sizeof(double)); /* contains D'*residual */
idamnjanovic@60 86 ind = (mwIndex*)mxMalloc(n*sizeof(mwIndex)); /* indices of selected atoms */
idamnjanovic@60 87 selected_atoms = (int*)mxMalloc(m*sizeof(int)); /* binary array with 1's for selected atoms */
idamnjanovic@60 88 c = (double*)mxMalloc(n*sizeof(double)); /* orthogonal projection result */
idamnjanovic@60 89
idamnjanovic@60 90 /* current number of columns in Dsub / Gsub / Lchol */
idamnjanovic@60 91 allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T;
idamnjanovic@60 92
idamnjanovic@60 93 /* Cholesky decomposition of D_I'*D_I */
idamnjanovic@60 94 Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double));
idamnjanovic@60 95
idamnjanovic@60 96 /* temporary vectors for various computations */
idamnjanovic@60 97 tempvec1 = (double*)mxMalloc(m*sizeof(double));
idamnjanovic@60 98 tempvec2 = (double*)mxMalloc(m*sizeof(double));
idamnjanovic@60 99
idamnjanovic@60 100 if (batchomp) {
idamnjanovic@60 101 /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */
idamnjanovic@60 102 Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double));
idamnjanovic@60 103 }
idamnjanovic@60 104 else {
idamnjanovic@60 105 /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */
idamnjanovic@60 106 Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double));
idamnjanovic@60 107
idamnjanovic@60 108 /* stores the residual */
idamnjanovic@60 109 r = (double*)mxMalloc(n*sizeof(double));
idamnjanovic@60 110 }
idamnjanovic@60 111
idamnjanovic@60 112 if (!DtX_specified) {
idamnjanovic@60 113 /* contains D'*x for the current signal */
idamnjanovic@60 114 DtX = (double*)mxMalloc(m*sizeof(double));
idamnjanovic@60 115 }
idamnjanovic@60 116
idamnjanovic@60 117
idamnjanovic@60 118
idamnjanovic@60 119 /*** initializations for error omp ***/
idamnjanovic@60 120
idamnjanovic@60 121 if (erroromp) {
idamnjanovic@60 122 eps2 = eps*eps; /* compute eps^2 */
idamnjanovic@60 123 if (T<0 || T>n) { /* unspecified max atom num - set max atoms to n */
idamnjanovic@60 124 T = n;
idamnjanovic@60 125 }
idamnjanovic@60 126 }
idamnjanovic@60 127
idamnjanovic@60 128
idamnjanovic@60 129
idamnjanovic@60 130 /*** initialize timers ***/
idamnjanovic@60 131
idamnjanovic@60 132 initprofdata(&pd); /* initialize profiling counters */
idamnjanovic@60 133 starttime = clock(); /* record starting time for eta computations */
idamnjanovic@60 134 lastprint_time = starttime; /* time of last status display */
idamnjanovic@60 135
idamnjanovic@60 136
idamnjanovic@60 137
idamnjanovic@60 138 /********************** perform omp for each signal **********************/
idamnjanovic@60 139
idamnjanovic@60 140
idamnjanovic@60 141
idamnjanovic@60 142 for (signum=0; signum<L; ++signum) {
idamnjanovic@60 143
idamnjanovic@60 144
idamnjanovic@60 145 /* initialize residual norm and deltaprev for error-omp */
idamnjanovic@60 146
idamnjanovic@60 147 if (erroromp) {
idamnjanovic@60 148 if (XtX_specified) {
idamnjanovic@60 149 resnorm = XtX[signum];
idamnjanovic@60 150 }
idamnjanovic@60 151 else {
idamnjanovic@60 152 resnorm = dotprod(x+n*signum, x+n*signum, n);
idamnjanovic@60 153 addproftime(&pd, XtX_TIME);
idamnjanovic@60 154 }
idamnjanovic@60 155 deltaprev = 0; /* delta tracks the value of gamma'*G*gamma */
idamnjanovic@60 156 }
idamnjanovic@60 157 else {
idamnjanovic@60 158 /* ignore residual norm stopping criterion */
idamnjanovic@60 159 eps2 = 0;
idamnjanovic@60 160 resnorm = 1;
idamnjanovic@60 161 }
idamnjanovic@60 162
idamnjanovic@60 163
idamnjanovic@60 164 if (resnorm>eps2 && T>0) {
idamnjanovic@60 165
idamnjanovic@60 166 /* compute DtX */
idamnjanovic@60 167
idamnjanovic@60 168 if (!DtX_specified) {
idamnjanovic@60 169 matT_vec(1, D, x+n*signum, DtX, n, m);
idamnjanovic@60 170 addproftime(&pd, DtX_TIME);
idamnjanovic@60 171 }
idamnjanovic@60 172
idamnjanovic@60 173
idamnjanovic@60 174 /* initialize alpha := DtX */
idamnjanovic@60 175
idamnjanovic@60 176 memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double));
idamnjanovic@60 177
idamnjanovic@60 178
idamnjanovic@60 179 /* mark all atoms as unselected */
idamnjanovic@60 180
idamnjanovic@60 181 for (i=0; i<m; ++i) {
idamnjanovic@60 182 selected_atoms[i] = 0;
idamnjanovic@60 183 }
idamnjanovic@60 184
idamnjanovic@60 185 }
idamnjanovic@60 186
idamnjanovic@60 187
idamnjanovic@60 188 /* main loop */
idamnjanovic@60 189
idamnjanovic@60 190 i=0;
idamnjanovic@60 191 while (resnorm>eps2 && i<T) {
idamnjanovic@60 192
idamnjanovic@60 193 /* index of next atom */
idamnjanovic@60 194
idamnjanovic@60 195 pos = maxabs(alpha, m);
idamnjanovic@60 196 addproftime(&pd, MAXABS_TIME);
idamnjanovic@60 197
idamnjanovic@60 198
idamnjanovic@60 199 /* stop criterion: selected same atom twice, or inner product too small */
idamnjanovic@60 200
idamnjanovic@60 201 if (selected_atoms[pos] || alpha[pos]*alpha[pos]<1e-14) {
idamnjanovic@60 202 break;
idamnjanovic@60 203 }
idamnjanovic@60 204
idamnjanovic@60 205
idamnjanovic@60 206 /* mark selected atom */
idamnjanovic@60 207
idamnjanovic@60 208 ind[i] = pos;
idamnjanovic@60 209 selected_atoms[pos] = 1;
idamnjanovic@60 210
idamnjanovic@60 211
idamnjanovic@60 212 /* matrix reallocation */
idamnjanovic@60 213
idamnjanovic@60 214 if (erroromp && i>=allocated_cols) {
idamnjanovic@60 215
idamnjanovic@60 216 allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01);
idamnjanovic@60 217
idamnjanovic@60 218 Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double));
idamnjanovic@60 219
idamnjanovic@60 220 batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) :
idamnjanovic@60 221 (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ;
idamnjanovic@60 222 }
idamnjanovic@60 223
idamnjanovic@60 224
idamnjanovic@60 225 /* append column to Gsub or Dsub */
idamnjanovic@60 226
idamnjanovic@60 227 if (batchomp) {
idamnjanovic@60 228 memcpy(Gsub+i*m, G+pos*m, m*sizeof(double));
idamnjanovic@60 229 }
idamnjanovic@60 230 else {
idamnjanovic@60 231 memcpy(Dsub+i*n, D+pos*n, n*sizeof(double));
idamnjanovic@60 232 }
idamnjanovic@60 233
idamnjanovic@60 234
idamnjanovic@60 235 /*** Cholesky update ***/
idamnjanovic@60 236
idamnjanovic@60 237 if (i==0) {
idamnjanovic@60 238 *Lchol = 1;
idamnjanovic@60 239 }
idamnjanovic@60 240 else {
idamnjanovic@60 241
idamnjanovic@60 242 /* incremental Cholesky decomposition: compute next row of Lchol */
idamnjanovic@60 243
idamnjanovic@60 244 if (standardomp) {
idamnjanovic@60 245 matT_vec(1, Dsub, D+n*pos, tempvec1, n, i); /* compute tempvec1 := Dsub'*d where d is new atom */
idamnjanovic@60 246 addproftime(&pd, DtD_TIME);
idamnjanovic@60 247 }
idamnjanovic@60 248 else {
idamnjanovic@60 249 vec_assign(tempvec1, Gsub+i*m, ind, i); /* extract tempvec1 := Gsub(ind,i) */
idamnjanovic@60 250 }
idamnjanovic@60 251 backsubst('L', Lchol, tempvec1, tempvec2, n, i); /* compute tempvec2 = Lchol \ tempvec1 */
idamnjanovic@60 252 for (j=0; j<i; ++j) { /* write tempvec2 to end of Lchol */
idamnjanovic@60 253 Lchol[j*n+i] = tempvec2[j];
idamnjanovic@60 254 }
idamnjanovic@60 255
idamnjanovic@60 256 /* compute Lchol(i,i) */
idamnjanovic@60 257 sum = 0;
idamnjanovic@60 258 for (j=0; j<i; ++j) { /* compute sum of squares of last row without Lchol(i,i) */
idamnjanovic@60 259 sum += SQR(Lchol[j*n+i]);
idamnjanovic@60 260 }
idamnjanovic@60 261 if ( (1-sum) <= 1e-14 ) { /* Lchol(i,i) is zero => selected atoms are dependent */
idamnjanovic@60 262 break;
idamnjanovic@60 263 }
idamnjanovic@60 264 Lchol[i*n+i] = sqrt(1-sum);
idamnjanovic@60 265 }
idamnjanovic@60 266
idamnjanovic@60 267 addproftime(&pd, LCHOL_TIME);
idamnjanovic@60 268
idamnjanovic@60 269 i++;
idamnjanovic@60 270
idamnjanovic@60 271
idamnjanovic@60 272 /* perform orthogonal projection and compute sparse coefficients */
idamnjanovic@60 273
idamnjanovic@60 274 vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i); /* extract tempvec1 = DtX(ind) */
idamnjanovic@60 275 cholsolve('L', Lchol, tempvec1, c, n, i); /* solve LL'c = tempvec1 for c */
idamnjanovic@60 276 addproftime(&pd, COMPCOEF_TIME);
idamnjanovic@60 277
idamnjanovic@60 278
idamnjanovic@60 279 /* update alpha = D'*residual */
idamnjanovic@60 280
idamnjanovic@60 281 if (standardomp) {
idamnjanovic@60 282 mat_vec(-1, Dsub, c, r, n, i); /* compute r := -Dsub*c */
idamnjanovic@60 283 vec_sum(1, x+n*signum, r, n); /* compute r := x+r */
idamnjanovic@60 284
idamnjanovic@60 285
idamnjanovic@60 286 /*memcpy(r, x+n*signum, n*sizeof(double)); /* assign r := x */
idamnjanovic@60 287 /*mat_vec1(-1, Dsub, c, 1, r, n, i); /* compute r := r-Dsub*c */
idamnjanovic@60 288
idamnjanovic@60 289 addproftime(&pd, COMPRES_TIME);
idamnjanovic@60 290 matT_vec(1, D, r, alpha, n, m); /* compute alpha := D'*r */
idamnjanovic@60 291 addproftime(&pd, DtR_TIME);
idamnjanovic@60 292
idamnjanovic@60 293 /* update residual norm */
idamnjanovic@60 294 if (erroromp) {
idamnjanovic@60 295 resnorm = dotprod(r, r, n);
idamnjanovic@60 296 addproftime(&pd, UPDATE_RESNORM_TIME);
idamnjanovic@60 297 }
idamnjanovic@60 298 }
idamnjanovic@60 299 else {
idamnjanovic@60 300 mat_vec(1, Gsub, c, tempvec1, m, i); /* compute tempvec1 := Gsub*c */
idamnjanovic@60 301 memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double)); /* set alpha = D'*x */
idamnjanovic@60 302 vec_sum(-1, tempvec1, alpha, m); /* compute alpha := alpha - tempvec1 */
idamnjanovic@60 303 addproftime(&pd, UPDATE_DtR_TIME);
idamnjanovic@60 304
idamnjanovic@60 305 /* update residual norm */
idamnjanovic@60 306 if (erroromp) {
idamnjanovic@60 307 vec_assign(tempvec2, tempvec1, ind, i); /* assign tempvec2 := tempvec1(ind) */
idamnjanovic@60 308 delta = dotprod(c,tempvec2,i); /* compute c'*tempvec2 */
idamnjanovic@60 309 resnorm = resnorm - delta + deltaprev; /* residual norm update */
idamnjanovic@60 310 deltaprev = delta;
idamnjanovic@60 311 addproftime(&pd, UPDATE_RESNORM_TIME);
idamnjanovic@60 312 }
idamnjanovic@60 313 }
idamnjanovic@60 314 }
idamnjanovic@60 315
idamnjanovic@60 316
idamnjanovic@60 317 /*** generate output vector gamma ***/
idamnjanovic@60 318
idamnjanovic@60 319 if (gamma_mode == FULL_GAMMA) { /* write the coefs in c to their correct positions in gamma */
idamnjanovic@60 320 for (j=0; j<i; ++j) {
idamnjanovic@60 321 gammaPr[m*signum + ind[j]] = c[j];
idamnjanovic@60 322 }
idamnjanovic@60 323 }
idamnjanovic@60 324 else {
idamnjanovic@60 325 /* sort the coefs by index before writing them to gamma */
idamnjanovic@60 326 quicksort(ind,c,i);
idamnjanovic@60 327 addproftime(&pd, INDEXSORT_TIME);
idamnjanovic@60 328
idamnjanovic@60 329 /* gamma is full - reallocate */
idamnjanovic@60 330 if (gamma_count+i >= allocated_coefs) {
idamnjanovic@60 331
idamnjanovic@60 332 while(gamma_count+i >= allocated_coefs) {
idamnjanovic@60 333 allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01);
idamnjanovic@60 334 }
idamnjanovic@60 335
idamnjanovic@60 336 mxSetNzmax(Gamma, allocated_coefs);
idamnjanovic@60 337 mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double)));
idamnjanovic@60 338 mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex)));
idamnjanovic@60 339
idamnjanovic@60 340 gammaPr = mxGetPr(Gamma);
idamnjanovic@60 341 gammaIr = mxGetIr(Gamma);
idamnjanovic@60 342 }
idamnjanovic@60 343
idamnjanovic@60 344 /* append coefs to gamma and update the indices */
idamnjanovic@60 345 for (j=0; j<i; ++j) {
idamnjanovic@60 346 gammaPr[gamma_count] = c[j];
idamnjanovic@60 347 gammaIr[gamma_count] = ind[j];
idamnjanovic@60 348 gamma_count++;
idamnjanovic@60 349 }
idamnjanovic@60 350 gammaJc[signum+1] = gammaJc[signum] + i;
idamnjanovic@60 351 }
idamnjanovic@60 352
idamnjanovic@60 353
idamnjanovic@60 354
idamnjanovic@60 355 /*** display status messages ***/
idamnjanovic@60 356
idamnjanovic@60 357 if (msg_delta>0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta)
idamnjanovic@60 358 {
idamnjanovic@60 359 lastprint_time = clock();
idamnjanovic@60 360
idamnjanovic@60 361 /* estimated remainig time */
idamnjanovic@60 362 secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) ,
idamnjanovic@60 363 &hrs_remain, &mins_remain, &secs_remain);
idamnjanovic@60 364
idamnjanovic@60 365 mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n",
idamnjanovic@60 366 signum+1, L, hrs_remain, mins_remain, secs_remain);
idamnjanovic@60 367 mexEvalString("drawnow;");
idamnjanovic@60 368 }
idamnjanovic@60 369
idamnjanovic@60 370 }
idamnjanovic@60 371
idamnjanovic@60 372 /* end omp */
idamnjanovic@60 373
idamnjanovic@60 374
idamnjanovic@60 375
idamnjanovic@60 376 /*** print final messages ***/
idamnjanovic@60 377
idamnjanovic@60 378 if (msg_delta>0) {
idamnjanovic@60 379 mexPrintf("omp: signal %d / %d\n", signum, L);
idamnjanovic@60 380 }
idamnjanovic@60 381
idamnjanovic@60 382 if (profile) {
idamnjanovic@60 383 printprofinfo(&pd, erroromp, batchomp, L);
idamnjanovic@60 384 }
idamnjanovic@60 385
idamnjanovic@60 386
idamnjanovic@60 387
idamnjanovic@60 388 /* free memory */
idamnjanovic@60 389
idamnjanovic@60 390 if (!DtX_specified) {
idamnjanovic@60 391 mxFree(DtX);
idamnjanovic@60 392 }
idamnjanovic@60 393 if (standardomp) {
idamnjanovic@60 394 mxFree(r);
idamnjanovic@60 395 mxFree(Dsub);
idamnjanovic@60 396 }
idamnjanovic@60 397 else {
idamnjanovic@60 398 mxFree(Gsub);
idamnjanovic@60 399 }
idamnjanovic@60 400 mxFree(tempvec2);
idamnjanovic@60 401 mxFree(tempvec1);
idamnjanovic@60 402 mxFree(Lchol);
idamnjanovic@60 403 mxFree(c);
idamnjanovic@60 404 mxFree(selected_atoms);
idamnjanovic@60 405 mxFree(ind);
idamnjanovic@60 406 mxFree(alpha);
idamnjanovic@60 407
idamnjanovic@60 408 return Gamma;
idamnjanovic@60 409 }