ivan@140
|
1 /**************************************************************************
|
ivan@140
|
2 *
|
ivan@140
|
3 * File name: ompcore.c
|
ivan@140
|
4 *
|
ivan@140
|
5 * Ron Rubinstein
|
ivan@140
|
6 * Computer Science Department
|
ivan@140
|
7 * Technion, Haifa 32000 Israel
|
ivan@140
|
8 * ronrubin@cs
|
ivan@140
|
9 *
|
ivan@140
|
10 * Last Updated: 25.8.2009
|
ivan@140
|
11 *
|
ivan@140
|
12 *************************************************************************/
|
ivan@140
|
13
|
ivan@140
|
14
|
ivan@140
|
15 #include "ompcore.h"
|
ivan@140
|
16 #include "omputils.h"
|
ivan@140
|
17 #include "ompprof.h"
|
ivan@140
|
18 #include "myblas.h"
|
ivan@140
|
19 #include <math.h>
|
ivan@140
|
20 #include <string.h>
|
ivan@140
|
21
|
ivan@140
|
22
|
ivan@140
|
23
|
ivan@140
|
24 /******************************************************************************
|
ivan@140
|
25 * *
|
ivan@140
|
26 * Batch-OMP Implementation *
|
ivan@140
|
27 * *
|
ivan@140
|
28 ******************************************************************************/
|
ivan@140
|
29
|
ivan@140
|
30 mxArray* ompcore(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L,
|
ivan@140
|
31 int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp)
|
ivan@140
|
32 {
|
ivan@140
|
33
|
ivan@140
|
34 profdata pd;
|
ivan@140
|
35 mxArray *Gamma;
|
ivan@140
|
36 mwIndex i, j, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count;
|
ivan@140
|
37 mwSize allocated_coefs, allocated_cols;
|
ivan@140
|
38 int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms;
|
ivan@140
|
39 double *alpha, *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2;
|
ivan@140
|
40 double eps2, resnorm, delta, deltaprev, secs_remain;
|
ivan@140
|
41 int mins_remain, hrs_remain;
|
ivan@140
|
42 clock_t lastprint_time, starttime;
|
ivan@140
|
43
|
ivan@140
|
44
|
ivan@140
|
45
|
ivan@140
|
46 /*** status flags ***/
|
ivan@140
|
47
|
ivan@140
|
48 DtX_specified = (DtX!=0); /* indicates whether D'*x was provided */
|
ivan@140
|
49 XtX_specified = (XtX!=0); /* indicates whether sum(x.*x) was provided */
|
ivan@140
|
50
|
ivan@140
|
51 standardomp = (G==0); /* batch-omp or standard omp are selected depending on availability of G */
|
ivan@140
|
52 batchomp = !standardomp;
|
ivan@140
|
53
|
ivan@140
|
54
|
ivan@140
|
55
|
ivan@140
|
56 /*** allocate output matrix ***/
|
ivan@140
|
57
|
ivan@140
|
58
|
ivan@140
|
59 if (gamma_mode == FULL_GAMMA) {
|
ivan@140
|
60
|
ivan@140
|
61 /* allocate full matrix of size m X L */
|
ivan@140
|
62
|
ivan@140
|
63 Gamma = mxCreateDoubleMatrix(m, L, mxREAL);
|
ivan@140
|
64 gammaPr = mxGetPr(Gamma);
|
ivan@140
|
65 gammaIr = 0;
|
ivan@140
|
66 gammaJc = 0;
|
ivan@140
|
67 }
|
ivan@140
|
68 else {
|
ivan@140
|
69
|
ivan@140
|
70 /* allocate sparse matrix with room for allocated_coefs nonzeros */
|
ivan@140
|
71
|
ivan@140
|
72 /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */
|
ivan@140
|
73 allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T;
|
ivan@140
|
74 Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL);
|
ivan@140
|
75 gammaPr = mxGetPr(Gamma);
|
ivan@140
|
76 gammaIr = mxGetIr(Gamma);
|
ivan@140
|
77 gammaJc = mxGetJc(Gamma);
|
ivan@140
|
78 gamma_count = 0;
|
ivan@140
|
79 gammaJc[0] = 0;
|
ivan@140
|
80 }
|
ivan@140
|
81
|
ivan@140
|
82
|
ivan@140
|
83 /*** helper arrays ***/
|
ivan@140
|
84
|
ivan@140
|
85 alpha = (double*)mxMalloc(m*sizeof(double)); /* contains D'*residual */
|
ivan@140
|
86 ind = (mwIndex*)mxMalloc(n*sizeof(mwIndex)); /* indices of selected atoms */
|
ivan@140
|
87 selected_atoms = (int*)mxMalloc(m*sizeof(int)); /* binary array with 1's for selected atoms */
|
ivan@140
|
88 c = (double*)mxMalloc(n*sizeof(double)); /* orthogonal projection result */
|
ivan@140
|
89
|
ivan@140
|
90 /* current number of columns in Dsub / Gsub / Lchol */
|
ivan@140
|
91 allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T;
|
ivan@140
|
92
|
ivan@140
|
93 /* Cholesky decomposition of D_I'*D_I */
|
ivan@140
|
94 Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double));
|
ivan@140
|
95
|
ivan@140
|
96 /* temporary vectors for various computations */
|
ivan@140
|
97 tempvec1 = (double*)mxMalloc(m*sizeof(double));
|
ivan@140
|
98 tempvec2 = (double*)mxMalloc(m*sizeof(double));
|
ivan@140
|
99
|
ivan@140
|
100 if (batchomp) {
|
ivan@140
|
101 /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */
|
ivan@140
|
102 Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double));
|
ivan@140
|
103 }
|
ivan@140
|
104 else {
|
ivan@140
|
105 /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */
|
ivan@140
|
106 Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double));
|
ivan@140
|
107
|
ivan@140
|
108 /* stores the residual */
|
ivan@140
|
109 r = (double*)mxMalloc(n*sizeof(double));
|
ivan@140
|
110 }
|
ivan@140
|
111
|
ivan@140
|
112 if (!DtX_specified) {
|
ivan@140
|
113 /* contains D'*x for the current signal */
|
ivan@140
|
114 DtX = (double*)mxMalloc(m*sizeof(double));
|
ivan@140
|
115 }
|
ivan@140
|
116
|
ivan@140
|
117
|
ivan@140
|
118
|
ivan@140
|
119 /*** initializations for error omp ***/
|
ivan@140
|
120
|
ivan@140
|
121 if (erroromp) {
|
ivan@140
|
122 eps2 = eps*eps; /* compute eps^2 */
|
ivan@140
|
123 if (T<0 || T>n) { /* unspecified max atom num - set max atoms to n */
|
ivan@140
|
124 T = n;
|
ivan@140
|
125 }
|
ivan@140
|
126 }
|
ivan@140
|
127
|
ivan@140
|
128
|
ivan@140
|
129
|
ivan@140
|
130 /*** initialize timers ***/
|
ivan@140
|
131
|
ivan@140
|
132 initprofdata(&pd); /* initialize profiling counters */
|
ivan@140
|
133 starttime = clock(); /* record starting time for eta computations */
|
ivan@140
|
134 lastprint_time = starttime; /* time of last status display */
|
ivan@140
|
135
|
ivan@140
|
136
|
ivan@140
|
137
|
ivan@140
|
138 /********************** perform omp for each signal **********************/
|
ivan@140
|
139
|
ivan@140
|
140
|
ivan@140
|
141
|
ivan@140
|
142 for (signum=0; signum<L; ++signum) {
|
ivan@140
|
143
|
ivan@140
|
144
|
ivan@140
|
145 /* initialize residual norm and deltaprev for error-omp */
|
ivan@140
|
146
|
ivan@140
|
147 if (erroromp) {
|
ivan@140
|
148 if (XtX_specified) {
|
ivan@140
|
149 resnorm = XtX[signum];
|
ivan@140
|
150 }
|
ivan@140
|
151 else {
|
ivan@140
|
152 resnorm = dotprod(x+n*signum, x+n*signum, n);
|
ivan@140
|
153 addproftime(&pd, XtX_TIME);
|
ivan@140
|
154 }
|
ivan@140
|
155 deltaprev = 0; /* delta tracks the value of gamma'*G*gamma */
|
ivan@140
|
156 }
|
ivan@140
|
157 else {
|
ivan@140
|
158 /* ignore residual norm stopping criterion */
|
ivan@140
|
159 eps2 = 0;
|
ivan@140
|
160 resnorm = 1;
|
ivan@140
|
161 }
|
ivan@140
|
162
|
ivan@140
|
163
|
ivan@140
|
164 if (resnorm>eps2 && T>0) {
|
ivan@140
|
165
|
ivan@140
|
166 /* compute DtX */
|
ivan@140
|
167
|
ivan@140
|
168 if (!DtX_specified) {
|
ivan@140
|
169 matT_vec(1, D, x+n*signum, DtX, n, m);
|
ivan@140
|
170 addproftime(&pd, DtX_TIME);
|
ivan@140
|
171 }
|
ivan@140
|
172
|
ivan@140
|
173
|
ivan@140
|
174 /* initialize alpha := DtX */
|
ivan@140
|
175
|
ivan@140
|
176 memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double));
|
ivan@140
|
177
|
ivan@140
|
178
|
ivan@140
|
179 /* mark all atoms as unselected */
|
ivan@140
|
180
|
ivan@140
|
181 for (i=0; i<m; ++i) {
|
ivan@140
|
182 selected_atoms[i] = 0;
|
ivan@140
|
183 }
|
ivan@140
|
184
|
ivan@140
|
185 }
|
ivan@140
|
186
|
ivan@140
|
187
|
ivan@140
|
188 /* main loop */
|
ivan@140
|
189
|
ivan@140
|
190 i=0;
|
ivan@140
|
191 while (resnorm>eps2 && i<T) {
|
ivan@140
|
192
|
ivan@140
|
193 /* index of next atom */
|
ivan@140
|
194
|
ivan@140
|
195 pos = maxabs(alpha, m);
|
ivan@140
|
196 addproftime(&pd, MAXABS_TIME);
|
ivan@140
|
197
|
ivan@140
|
198
|
ivan@140
|
199 /* stop criterion: selected same atom twice, or inner product too small */
|
ivan@140
|
200
|
ivan@140
|
201 if (selected_atoms[pos] || alpha[pos]*alpha[pos]<1e-14) {
|
ivan@140
|
202 break;
|
ivan@140
|
203 }
|
ivan@140
|
204
|
ivan@140
|
205
|
ivan@140
|
206 /* mark selected atom */
|
ivan@140
|
207
|
ivan@140
|
208 ind[i] = pos;
|
ivan@140
|
209 selected_atoms[pos] = 1;
|
ivan@140
|
210
|
ivan@140
|
211
|
ivan@140
|
212 /* matrix reallocation */
|
ivan@140
|
213
|
ivan@140
|
214 if (erroromp && i>=allocated_cols) {
|
ivan@140
|
215
|
ivan@140
|
216 allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01);
|
ivan@140
|
217
|
ivan@140
|
218 Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double));
|
ivan@140
|
219
|
ivan@140
|
220 batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) :
|
ivan@140
|
221 (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ;
|
ivan@140
|
222 }
|
ivan@140
|
223
|
ivan@140
|
224
|
ivan@140
|
225 /* append column to Gsub or Dsub */
|
ivan@140
|
226
|
ivan@140
|
227 if (batchomp) {
|
ivan@140
|
228 memcpy(Gsub+i*m, G+pos*m, m*sizeof(double));
|
ivan@140
|
229 }
|
ivan@140
|
230 else {
|
ivan@140
|
231 memcpy(Dsub+i*n, D+pos*n, n*sizeof(double));
|
ivan@140
|
232 }
|
ivan@140
|
233
|
ivan@140
|
234
|
ivan@140
|
235 /*** Cholesky update ***/
|
ivan@140
|
236
|
ivan@140
|
237 if (i==0) {
|
ivan@140
|
238 *Lchol = 1;
|
ivan@140
|
239 }
|
ivan@140
|
240 else {
|
ivan@140
|
241
|
ivan@140
|
242 /* incremental Cholesky decomposition: compute next row of Lchol */
|
ivan@140
|
243
|
ivan@140
|
244 if (standardomp) {
|
ivan@140
|
245 matT_vec(1, Dsub, D+n*pos, tempvec1, n, i); /* compute tempvec1 := Dsub'*d where d is new atom */
|
ivan@140
|
246 addproftime(&pd, DtD_TIME);
|
ivan@140
|
247 }
|
ivan@140
|
248 else {
|
ivan@140
|
249 vec_assign(tempvec1, Gsub+i*m, ind, i); /* extract tempvec1 := Gsub(ind,i) */
|
ivan@140
|
250 }
|
ivan@140
|
251 backsubst('L', Lchol, tempvec1, tempvec2, n, i); /* compute tempvec2 = Lchol \ tempvec1 */
|
ivan@140
|
252 for (j=0; j<i; ++j) { /* write tempvec2 to end of Lchol */
|
ivan@140
|
253 Lchol[j*n+i] = tempvec2[j];
|
ivan@140
|
254 }
|
ivan@140
|
255
|
ivan@140
|
256 /* compute Lchol(i,i) */
|
ivan@140
|
257 sum = 0;
|
ivan@140
|
258 for (j=0; j<i; ++j) { /* compute sum of squares of last row without Lchol(i,i) */
|
ivan@140
|
259 sum += SQR(Lchol[j*n+i]);
|
ivan@140
|
260 }
|
ivan@140
|
261 if ( (1-sum) <= 1e-14 ) { /* Lchol(i,i) is zero => selected atoms are dependent */
|
ivan@140
|
262 break;
|
ivan@140
|
263 }
|
ivan@140
|
264 Lchol[i*n+i] = sqrt(1-sum);
|
ivan@140
|
265 }
|
ivan@140
|
266
|
ivan@140
|
267 addproftime(&pd, LCHOL_TIME);
|
ivan@140
|
268
|
ivan@140
|
269 i++;
|
ivan@140
|
270
|
ivan@140
|
271
|
ivan@140
|
272 /* perform orthogonal projection and compute sparse coefficients */
|
ivan@140
|
273
|
ivan@140
|
274 vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i); /* extract tempvec1 = DtX(ind) */
|
ivan@140
|
275 cholsolve('L', Lchol, tempvec1, c, n, i); /* solve LL'c = tempvec1 for c */
|
ivan@140
|
276 addproftime(&pd, COMPCOEF_TIME);
|
ivan@140
|
277
|
ivan@140
|
278
|
ivan@140
|
279 /* update alpha = D'*residual */
|
ivan@140
|
280
|
ivan@140
|
281 if (standardomp) {
|
ivan@140
|
282 mat_vec(-1, Dsub, c, r, n, i); /* compute r := -Dsub*c */
|
ivan@140
|
283 vec_sum(1, x+n*signum, r, n); /* compute r := x+r */
|
ivan@140
|
284
|
ivan@140
|
285
|
ivan@140
|
286 /*memcpy(r, x+n*signum, n*sizeof(double)); /* assign r := x */
|
ivan@140
|
287 /*mat_vec1(-1, Dsub, c, 1, r, n, i); /* compute r := r-Dsub*c */
|
ivan@140
|
288
|
ivan@140
|
289 addproftime(&pd, COMPRES_TIME);
|
ivan@140
|
290 matT_vec(1, D, r, alpha, n, m); /* compute alpha := D'*r */
|
ivan@140
|
291 addproftime(&pd, DtR_TIME);
|
ivan@140
|
292
|
ivan@140
|
293 /* update residual norm */
|
ivan@140
|
294 if (erroromp) {
|
ivan@140
|
295 resnorm = dotprod(r, r, n);
|
ivan@140
|
296 addproftime(&pd, UPDATE_RESNORM_TIME);
|
ivan@140
|
297 }
|
ivan@140
|
298 }
|
ivan@140
|
299 else {
|
ivan@140
|
300 mat_vec(1, Gsub, c, tempvec1, m, i); /* compute tempvec1 := Gsub*c */
|
ivan@140
|
301 memcpy(alpha, DtX + m*signum*DtX_specified, m*sizeof(double)); /* set alpha = D'*x */
|
ivan@140
|
302 vec_sum(-1, tempvec1, alpha, m); /* compute alpha := alpha - tempvec1 */
|
ivan@140
|
303 addproftime(&pd, UPDATE_DtR_TIME);
|
ivan@140
|
304
|
ivan@140
|
305 /* update residual norm */
|
ivan@140
|
306 if (erroromp) {
|
ivan@140
|
307 vec_assign(tempvec2, tempvec1, ind, i); /* assign tempvec2 := tempvec1(ind) */
|
ivan@140
|
308 delta = dotprod(c,tempvec2,i); /* compute c'*tempvec2 */
|
ivan@140
|
309 resnorm = resnorm - delta + deltaprev; /* residual norm update */
|
ivan@140
|
310 deltaprev = delta;
|
ivan@140
|
311 addproftime(&pd, UPDATE_RESNORM_TIME);
|
ivan@140
|
312 }
|
ivan@140
|
313 }
|
ivan@140
|
314 }
|
ivan@140
|
315
|
ivan@140
|
316
|
ivan@140
|
317 /*** generate output vector gamma ***/
|
ivan@140
|
318
|
ivan@140
|
319 if (gamma_mode == FULL_GAMMA) { /* write the coefs in c to their correct positions in gamma */
|
ivan@140
|
320 for (j=0; j<i; ++j) {
|
ivan@140
|
321 gammaPr[m*signum + ind[j]] = c[j];
|
ivan@140
|
322 }
|
ivan@140
|
323 }
|
ivan@140
|
324 else {
|
ivan@140
|
325 /* sort the coefs by index before writing them to gamma */
|
ivan@140
|
326 quicksort(ind,c,i);
|
ivan@140
|
327 addproftime(&pd, INDEXSORT_TIME);
|
ivan@140
|
328
|
ivan@140
|
329 /* gamma is full - reallocate */
|
ivan@140
|
330 if (gamma_count+i >= allocated_coefs) {
|
ivan@140
|
331
|
ivan@140
|
332 while(gamma_count+i >= allocated_coefs) {
|
ivan@140
|
333 allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01);
|
ivan@140
|
334 }
|
ivan@140
|
335
|
ivan@140
|
336 mxSetNzmax(Gamma, allocated_coefs);
|
ivan@140
|
337 mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double)));
|
ivan@140
|
338 mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex)));
|
ivan@140
|
339
|
ivan@140
|
340 gammaPr = mxGetPr(Gamma);
|
ivan@140
|
341 gammaIr = mxGetIr(Gamma);
|
ivan@140
|
342 }
|
ivan@140
|
343
|
ivan@140
|
344 /* append coefs to gamma and update the indices */
|
ivan@140
|
345 for (j=0; j<i; ++j) {
|
ivan@140
|
346 gammaPr[gamma_count] = c[j];
|
ivan@140
|
347 gammaIr[gamma_count] = ind[j];
|
ivan@140
|
348 gamma_count++;
|
ivan@140
|
349 }
|
ivan@140
|
350 gammaJc[signum+1] = gammaJc[signum] + i;
|
ivan@140
|
351 }
|
ivan@140
|
352
|
ivan@140
|
353
|
ivan@140
|
354
|
ivan@140
|
355 /*** display status messages ***/
|
ivan@140
|
356
|
ivan@140
|
357 if (msg_delta>0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta)
|
ivan@140
|
358 {
|
ivan@140
|
359 lastprint_time = clock();
|
ivan@140
|
360
|
ivan@140
|
361 /* estimated remainig time */
|
ivan@140
|
362 secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) ,
|
ivan@140
|
363 &hrs_remain, &mins_remain, &secs_remain);
|
ivan@140
|
364
|
ivan@140
|
365 mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n",
|
ivan@140
|
366 signum+1, L, hrs_remain, mins_remain, secs_remain);
|
ivan@140
|
367 mexEvalString("drawnow;");
|
ivan@140
|
368 }
|
ivan@140
|
369
|
ivan@140
|
370 }
|
ivan@140
|
371
|
ivan@140
|
372 /* end omp */
|
ivan@140
|
373
|
ivan@140
|
374
|
ivan@140
|
375
|
ivan@140
|
376 /*** print final messages ***/
|
ivan@140
|
377
|
ivan@140
|
378 if (msg_delta>0) {
|
ivan@140
|
379 mexPrintf("omp: signal %d / %d\n", signum, L);
|
ivan@140
|
380 }
|
ivan@140
|
381
|
ivan@140
|
382 if (profile) {
|
ivan@140
|
383 printprofinfo(&pd, erroromp, batchomp, L);
|
ivan@140
|
384 }
|
ivan@140
|
385
|
ivan@140
|
386
|
ivan@140
|
387
|
ivan@140
|
388 /* free memory */
|
ivan@140
|
389
|
ivan@140
|
390 if (!DtX_specified) {
|
ivan@140
|
391 mxFree(DtX);
|
ivan@140
|
392 }
|
ivan@140
|
393 if (standardomp) {
|
ivan@140
|
394 mxFree(r);
|
ivan@140
|
395 mxFree(Dsub);
|
ivan@140
|
396 }
|
ivan@140
|
397 else {
|
ivan@140
|
398 mxFree(Gsub);
|
ivan@140
|
399 }
|
ivan@140
|
400 mxFree(tempvec2);
|
ivan@140
|
401 mxFree(tempvec1);
|
ivan@140
|
402 mxFree(Lchol);
|
ivan@140
|
403 mxFree(c);
|
ivan@140
|
404 mxFree(selected_atoms);
|
ivan@140
|
405 mxFree(ind);
|
ivan@140
|
406 mxFree(alpha);
|
ivan@140
|
407
|
ivan@140
|
408 return Gamma;
|
ivan@140
|
409 }
|