Mercurial > hg > smallbox
comparison solvers/SMALL_ompGabor/ompcoreGabor.c @ 144:19e0af570914 release_1.5
Merge from branch "ivand_dev"
author | Ivan <ivan.damnjanovic@eecs.qmul.ac.uk> |
---|---|
date | Tue, 26 Jul 2011 15:14:15 +0100 |
parents | 31d2864dfdd4 |
children |
comparison
equal
deleted
inserted
replaced
143:8d866d96f006 | 144:19e0af570914 |
---|---|
1 /************************************************************************** | |
2 * | |
3 * File name: ompcoreGabor.c | |
4 * | |
5 * Ron Rubinstein | |
6 * Computer Science Department | |
7 * Technion, Haifa 32000 Israel | |
8 * ronrubin@cs | |
9 * | |
10 * Last Updated: 25.8.2009 | |
11 * | |
12 * Modified by Ivan damnjanovic July 2011 | |
13 * Takes to atoms per iteration. It should be used for Gabor dictionaries | |
14 * as specified in | |
15 * "Audio Inpainting" Amir Adler, Valentin Emiya, Maria G. Jafari, | |
16 * Michael Elad, Remi Gribonval and Mark D. Plumbley | |
17 * Draft version: March 6, 2011 | |
18 * | |
19 *************************************************************************/ | |
20 | |
21 | |
22 #include "ompcoreGabor.h" | |
23 #include "omputils.h" | |
24 #include "ompprof.h" | |
25 #include "myblas.h" | |
26 #include <math.h> | |
27 #include <string.h> | |
28 | |
29 | |
30 | |
31 /****************************************************************************** | |
32 * * | |
33 * Batch-OMP Implementation * | |
34 * * | |
35 ******************************************************************************/ | |
36 | |
37 mxArray* ompcoreGabor(double D[], double x[], double DtX[], double XtX[], double G[], mwSize n, mwSize m, mwSize L, | |
38 int T, double eps, int gamma_mode, int profile, double msg_delta, int erroromp) | |
39 { | |
40 | |
41 profdata pd; | |
42 mxArray *Gamma; | |
43 mwIndex i, j, k, signum, pos, *ind, *gammaIr, *gammaJc, gamma_count; | |
44 mwSize allocated_coefs, allocated_cols; | |
45 int DtX_specified, XtX_specified, batchomp, standardomp, *selected_atoms; | |
46 double *proj, *proj1, *proj2, *D1, *D2, *D1D2, *n12, *alpha, *beta, *error; | |
47 double *r, *Lchol, *c, *Gsub, *Dsub, sum, *gammaPr, *tempvec1, *tempvec2; | |
48 double eps2, resnorm, delta, deltaprev, secs_remain; | |
49 int mins_remain, hrs_remain; | |
50 clock_t lastprint_time, starttime; | |
51 | |
52 | |
53 | |
54 /*** status flags ***/ | |
55 | |
56 DtX_specified = (DtX!=0); /* indicates whether D'*x was provided */ | |
57 XtX_specified = (XtX!=0); /* indicates whether sum(x.*x) was provided */ | |
58 | |
59 standardomp = (G==0); /* batch-omp or standard omp are selected depending on availability of G */ | |
60 batchomp = !standardomp; | |
61 | |
62 | |
63 | |
64 /*** allocate output matrix ***/ | |
65 | |
66 | |
67 if (gamma_mode == FULL_GAMMA) { | |
68 | |
69 /* allocate full matrix of size m X L */ | |
70 | |
71 Gamma = mxCreateDoubleMatrix(m, L, mxREAL); | |
72 gammaPr = mxGetPr(Gamma); | |
73 gammaIr = 0; | |
74 gammaJc = 0; | |
75 } | |
76 else { | |
77 | |
78 /* allocate sparse matrix with room for allocated_coefs nonzeros */ | |
79 | |
80 /* for error-omp, begin with L*sqrt(n)/2 allocated nonzeros, otherwise allocate L*T nonzeros */ | |
81 allocated_coefs = erroromp ? (mwSize)(ceil(L*sqrt((double)n)/2.0) + 1.01) : L*T; | |
82 Gamma = mxCreateSparse(m, L, allocated_coefs, mxREAL); | |
83 gammaPr = mxGetPr(Gamma); | |
84 gammaIr = mxGetIr(Gamma); | |
85 gammaJc = mxGetJc(Gamma); | |
86 gamma_count = 0; | |
87 gammaJc[0] = 0; | |
88 } | |
89 | |
90 | |
91 /*** helper arrays ***/ | |
92 /* Ivan Damnjanovic July 2011*/ | |
93 proj = (double*)mxMalloc(m*sizeof(double)); | |
94 proj1 = (double*)mxMalloc(m/2*sizeof(double)); | |
95 proj2 = (double*)mxMalloc(m/2*sizeof(double)); | |
96 D1 = (double*)mxMalloc(n*m/2*sizeof(double)); | |
97 D2 = (double*)mxMalloc(n*m/2*sizeof(double)); | |
98 memcpy(D1, D , n*m/2*sizeof(double)); | |
99 memcpy(D2, D+n*m/2, n*m/2*sizeof(double)); | |
100 | |
101 D1D2 = (double*)mxMalloc(m/2*sizeof(double)); | |
102 n12 = (double*)mxMalloc(m/2*sizeof(double)); | |
103 | |
104 vec_smult(1,D2, D1, n*m/2); | |
105 | |
106 for (i=0; i<m/2; i++) { | |
107 D1D2[i]=0; | |
108 n12[i]=0; | |
109 for (j=0; j<n; j++) { | |
110 D1D2[i] += D1[i*n+j]; | |
111 } | |
112 n12[i]=1/(1-D1D2[i]*D1D2[i]); | |
113 } | |
114 | |
115 memcpy(D1, D , n*m/2*sizeof(double)); | |
116 | |
117 alpha = (double*)mxMalloc(m/2*sizeof(double)); /* contains D'*residual */ | |
118 beta = (double*)mxMalloc(m/2*sizeof(double)); | |
119 error = (double*)mxMalloc(m/2*sizeof(double)); | |
120 | |
121 ind = (mwIndex*)mxMalloc(m*sizeof(mwIndex)); /* indices of selected atoms */ | |
122 selected_atoms = (int*)mxMalloc(m*sizeof(int)); /* binary array with 1's for selected atoms */ | |
123 c = (double*)mxMalloc(n*sizeof(double)); /* orthogonal projection result */ | |
124 | |
125 /* current number of columns in Dsub / Gsub / Lchol */ | |
126 allocated_cols = erroromp ? (mwSize)(ceil(sqrt((double)n)/2.0) + 1.01) : T; | |
127 | |
128 /* Cholesky decomposition of D_I'*D_I */ | |
129 Lchol = (double*)mxMalloc(n*allocated_cols*sizeof(double)); | |
130 | |
131 /* temporary vectors for various computations */ | |
132 tempvec1 = (double*)mxMalloc(m*sizeof(double)); | |
133 tempvec2 = (double*)mxMalloc(m*sizeof(double)); | |
134 | |
135 if (batchomp) { | |
136 /* matrix containing G(:,ind) - the columns of G corresponding to the selected atoms, in order of selection */ | |
137 Gsub = (double*)mxMalloc(m*allocated_cols*sizeof(double)); | |
138 } | |
139 else { | |
140 /* matrix containing D(:,ind) - the selected atoms from D, in order of selection */ | |
141 Dsub = (double*)mxMalloc(n*allocated_cols*sizeof(double)); | |
142 | |
143 /* stores the residual */ | |
144 r = (double*)mxMalloc(n*sizeof(double)); | |
145 } | |
146 | |
147 if (!DtX_specified) { | |
148 /* contains D'*x for the current signal */ | |
149 DtX = (double*)mxMalloc(m*sizeof(double)); | |
150 } | |
151 | |
152 | |
153 | |
154 /*** initializations for error omp ***/ | |
155 | |
156 if (erroromp) { | |
157 eps2 = eps*eps; /* compute eps^2 */ | |
158 if (T<0 || T>n) { /* unspecified max atom num - set max atoms to n */ | |
159 T = n; | |
160 } | |
161 } | |
162 | |
163 | |
164 | |
165 /*** initialize timers ***/ | |
166 | |
167 initprofdata(&pd); /* initialize profiling counters */ | |
168 starttime = clock(); /* record starting time for eta computations */ | |
169 lastprint_time = starttime; /* time of last status display */ | |
170 | |
171 | |
172 | |
173 /********************** perform omp for each signal **********************/ | |
174 | |
175 | |
176 | |
177 for (signum=0; signum<L; ++signum) { | |
178 | |
179 | |
180 /* initialize residual norm and deltaprev for error-omp */ | |
181 | |
182 if (erroromp) { | |
183 if (XtX_specified) { | |
184 resnorm = XtX[signum]; | |
185 } | |
186 else { | |
187 resnorm = dotprod(x+n*signum, x+n*signum, n); | |
188 addproftime(&pd, XtX_TIME); | |
189 } | |
190 deltaprev = 0; /* delta tracks the value of gamma'*G*gamma */ | |
191 } | |
192 else { | |
193 /* ignore residual norm stopping criterion */ | |
194 eps2 = 0; | |
195 resnorm = 1; | |
196 } | |
197 | |
198 | |
199 if (resnorm>eps2 && T>0) { | |
200 | |
201 /* compute DtX */ | |
202 | |
203 if (!DtX_specified) { | |
204 matT_vec(1, D, x+n*signum, DtX, n, m); | |
205 addproftime(&pd, DtX_TIME); | |
206 memcpy(r , x+n*signum, n*sizeof(double)); | |
207 } | |
208 | |
209 | |
210 /* initialize projections to D1 and D2 := DtX */ | |
211 | |
212 memcpy(proj, DtX + m*signum*DtX_specified, m*sizeof(double)); | |
213 | |
214 | |
215 /* mark all atoms as unselected */ | |
216 | |
217 for (i=0; i<m; ++i) { | |
218 selected_atoms[i] = 0; | |
219 } | |
220 | |
221 } | |
222 | |
223 | |
224 /* main loop */ | |
225 | |
226 i=0; | |
227 while (resnorm>eps2 && i<T) { | |
228 | |
229 /* index of next atom */ | |
230 memcpy(proj1, proj, m/2*sizeof(double)); | |
231 memcpy(proj2, proj + m/2, m/2*sizeof(double)); | |
232 for (k=0; k<m/2; k++){ | |
233 alpha[k] = (proj1[k] - D1D2[k]*proj2[k])*n12[k]; | |
234 beta[k] = (proj2[k] - D1D2[k]*proj1[k])*n12[k]; | |
235 } | |
236 for (k=0; k<m/2; k++){ | |
237 error[k]=0; | |
238 for (j=0; j<n; j++){ | |
239 error[k]+= (abs(r[j])-D1[k*n+j]*alpha[k]-D2[k*n+j]*beta[k])*(abs(r[j])-D1[k*n+j]*alpha[k]-D2[k*n+j]*beta[k]); | |
240 } | |
241 } | |
242 pos = maxabs(error, m/2); | |
243 addproftime(&pd, MAXABS_TIME); | |
244 | |
245 | |
246 /* stop criterion: selected same atom twice, or inner product too small */ | |
247 | |
248 if (selected_atoms[pos] || alpha[pos]*alpha[pos]<1e-14) { | |
249 break; | |
250 } | |
251 | |
252 for (k=0;k<2;k++){ | |
253 /* mark selected atom */ | |
254 | |
255 ind[i] = pos+k*m/2; | |
256 selected_atoms[pos+k*m/2] = 1; | |
257 | |
258 | |
259 /* matrix reallocation */ | |
260 | |
261 if (erroromp && i>=allocated_cols) { | |
262 | |
263 allocated_cols = (mwSize)(ceil(allocated_cols*MAT_INC_FACTOR) + 1.01); | |
264 | |
265 Lchol = (double*)mxRealloc(Lchol,n*allocated_cols*sizeof(double)); | |
266 | |
267 batchomp ? (Gsub = (double*)mxRealloc(Gsub,m*allocated_cols*sizeof(double))) : | |
268 (Dsub = (double*)mxRealloc(Dsub,n*allocated_cols*sizeof(double))) ; | |
269 } | |
270 | |
271 | |
272 /* append column to Gsub or Dsub */ | |
273 | |
274 if (batchomp) { | |
275 memcpy(Gsub+i*m, G+(pos+k*m/2)*m, m*sizeof(double)); | |
276 } | |
277 else { | |
278 memcpy(Dsub+(i)*n, D+(pos+k*m/2)*n, n*sizeof(double)); | |
279 } | |
280 | |
281 | |
282 /*** Cholesky update ***/ | |
283 | |
284 if (i==0) { | |
285 *Lchol = 1; | |
286 } | |
287 else { | |
288 | |
289 /* incremental Cholesky decomposition: compute next row of Lchol */ | |
290 | |
291 if (standardomp) { | |
292 matT_vec(1, Dsub, D+n*(pos+k*m/2), tempvec1, n, i); /* compute tempvec1 := Dsub'*d where d is new atom */ | |
293 addproftime(&pd, DtD_TIME); | |
294 } | |
295 else { | |
296 vec_assign(tempvec1, Gsub+i*m, ind, i); /* extract tempvec1 := Gsub(ind,i) */ | |
297 } | |
298 backsubst('L', Lchol, tempvec1, tempvec2, n, i); /* compute tempvec2 = Lchol \ tempvec1 */ | |
299 for (j=0; j<i; ++j) { /* write tempvec2 to end of Lchol */ | |
300 Lchol[j*n+i] = tempvec2[j]; | |
301 } | |
302 | |
303 /* compute Lchol(i,i) */ | |
304 sum = 0; | |
305 for (j=0; j<i; ++j) { /* compute sum of squares of last row without Lchol(i,i) */ | |
306 sum += SQR(Lchol[j*n+i]); | |
307 } | |
308 if ( (1-sum) <= 1e-14 ) { /* Lchol(i,i) is zero => selected atoms are dependent */ | |
309 break; | |
310 } | |
311 Lchol[i*n+i] = sqrt(1-sum); | |
312 } | |
313 | |
314 addproftime(&pd, LCHOL_TIME); | |
315 | |
316 i++; | |
317 | |
318 } | |
319 /* perform orthogonal projection and compute sparse coefficients */ | |
320 | |
321 vec_assign(tempvec1, DtX + m*signum*DtX_specified, ind, i); /* extract tempvec1 = DtX(ind) */ | |
322 cholsolve('L', Lchol, tempvec1, c, n, i); /* solve LL'c = tempvec1 for c */ | |
323 addproftime(&pd, COMPCOEF_TIME); | |
324 | |
325 | |
326 /* update alpha = D'*residual */ | |
327 | |
328 if (standardomp) { | |
329 mat_vec(-1, Dsub, c, r, n, i); /* compute r := -Dsub*c */ | |
330 vec_sum(1, x+n*signum, r, n); /* compute r := x+r */ | |
331 | |
332 | |
333 /*memcpy(r, x+n*signum, n*sizeof(double)); /* assign r := x */ | |
334 /*mat_vec1(-1, Dsub, c, 1, r, n, i); /* compute r := r-Dsub*c */ | |
335 | |
336 addproftime(&pd, COMPRES_TIME); | |
337 matT_vec(1, D, r, proj, n, m); /* compute proj := D'*r */ | |
338 addproftime(&pd, DtR_TIME); | |
339 | |
340 /* update residual norm */ | |
341 if (erroromp) { | |
342 resnorm = dotprod(r, r, n); | |
343 addproftime(&pd, UPDATE_RESNORM_TIME); | |
344 } | |
345 } | |
346 else { | |
347 mat_vec(1, Gsub, c, tempvec1, m, i); /* compute tempvec1 := Gsub*c */ | |
348 memcpy(proj, DtX + m*signum*DtX_specified, m*sizeof(double)); /* set proj = D'*x */ | |
349 vec_sum(-1, tempvec1, proj, m); /* compute proj := proj - tempvec1 */ | |
350 addproftime(&pd, UPDATE_DtR_TIME); | |
351 | |
352 /* update residual norm */ | |
353 if (erroromp) { | |
354 vec_assign(tempvec2, tempvec1, ind, i); /* assign tempvec2 := tempvec1(ind) */ | |
355 delta = dotprod(c,tempvec2,i); /* compute c'*tempvec2 */ | |
356 resnorm = resnorm - delta + deltaprev; /* residual norm update */ | |
357 deltaprev = delta; | |
358 addproftime(&pd, UPDATE_RESNORM_TIME); | |
359 } | |
360 } | |
361 } | |
362 | |
363 | |
364 /*** generate output vector gamma ***/ | |
365 | |
366 if (gamma_mode == FULL_GAMMA) { /* write the coefs in c to their correct positions in gamma */ | |
367 for (j=0; j<i; ++j) { | |
368 gammaPr[m*signum + ind[j]] = c[j]; | |
369 } | |
370 } | |
371 else { | |
372 /* sort the coefs by index before writing them to gamma */ | |
373 quicksort(ind,c,i); | |
374 addproftime(&pd, INDEXSORT_TIME); | |
375 | |
376 /* gamma is full - reallocate */ | |
377 if (gamma_count+i >= allocated_coefs) { | |
378 | |
379 while(gamma_count+i >= allocated_coefs) { | |
380 allocated_coefs = (mwSize)(ceil(GAMMA_INC_FACTOR*allocated_coefs) + 1.01); | |
381 } | |
382 | |
383 mxSetNzmax(Gamma, allocated_coefs); | |
384 mxSetPr(Gamma, mxRealloc(gammaPr, allocated_coefs*sizeof(double))); | |
385 mxSetIr(Gamma, mxRealloc(gammaIr, allocated_coefs*sizeof(mwIndex))); | |
386 | |
387 gammaPr = mxGetPr(Gamma); | |
388 gammaIr = mxGetIr(Gamma); | |
389 } | |
390 | |
391 /* append coefs to gamma and update the indices */ | |
392 for (j=0; j<i; ++j) { | |
393 gammaPr[gamma_count] = c[j]; | |
394 gammaIr[gamma_count] = ind[j]; | |
395 gamma_count++; | |
396 } | |
397 gammaJc[signum+1] = gammaJc[signum] + i; | |
398 } | |
399 | |
400 | |
401 | |
402 /*** display status messages ***/ | |
403 | |
404 if (msg_delta>0 && (clock()-lastprint_time)/(double)CLOCKS_PER_SEC >= msg_delta) | |
405 { | |
406 lastprint_time = clock(); | |
407 | |
408 /* estimated remainig time */ | |
409 secs2hms( ((L-signum-1)/(double)(signum+1)) * ((lastprint_time-starttime)/(double)CLOCKS_PER_SEC) , | |
410 &hrs_remain, &mins_remain, &secs_remain); | |
411 | |
412 mexPrintf("omp: signal %d / %d, estimated remaining time: %02d:%02d:%05.2f\n", | |
413 signum+1, L, hrs_remain, mins_remain, secs_remain); | |
414 mexEvalString("drawnow;"); | |
415 } | |
416 | |
417 } | |
418 | |
419 /* end omp */ | |
420 | |
421 | |
422 | |
423 /*** print final messages ***/ | |
424 | |
425 if (msg_delta>0) { | |
426 mexPrintf("omp: signal %d / %d\n", signum, L); | |
427 } | |
428 | |
429 if (profile) { | |
430 printprofinfo(&pd, erroromp, batchomp, L); | |
431 } | |
432 | |
433 | |
434 | |
435 /* free memory */ | |
436 | |
437 if (!DtX_specified) { | |
438 mxFree(DtX); | |
439 } | |
440 if (standardomp) { | |
441 mxFree(r); | |
442 mxFree(Dsub); | |
443 } | |
444 else { | |
445 mxFree(Gsub); | |
446 } | |
447 mxFree(tempvec2); | |
448 mxFree(tempvec1); | |
449 mxFree(Lchol); | |
450 mxFree(c); | |
451 mxFree(selected_atoms); | |
452 mxFree(ind); | |
453 mxFree(proj); | |
454 mxFree(proj1); | |
455 mxFree(proj2); | |
456 mxFree(D1); | |
457 mxFree(D2); | |
458 mxFree(D1D2); | |
459 mxFree(n12); | |
460 mxFree(alpha); | |
461 mxFree(beta); | |
462 mxFree(error); | |
463 | |
464 return Gamma; | |
465 } |