comparison util/ksvd utils/ompbox utils/myblas.c @ 137:9207d56c5547 ivand_dev

New ompbox in utils for testing purposes
author Ivan Damnjanovic lnx <ivan.damnjanovic@eecs.qmul.ac.uk>
date Thu, 21 Jul 2011 14:07:41 +0100
parents
children
comparison
equal deleted inserted replaced
136:1334d2302dd9 137:9207d56c5547
1 /**************************************************************************
2 *
3 * File name: myblas.c
4 *
5 * Ron Rubinstein
6 * Computer Science Department
7 * Technion, Haifa 32000 Israel
8 * ronrubin@cs
9 *
10 * Version: 1.1
11 * Last updated: 13.8.2009
12 *
13 *************************************************************************/
14
15
16 #include "myblas.h"
17 #include <ctype.h>
18
19
20 /* find maximum of absolute values */
21
22 mwIndex maxabs(double c[], mwSize m)
23 {
24 mwIndex maxid=0, k;
25 double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */
26
27 for (k=1; k<m; ++k) {
28 absval = SQR(c[k]);
29 if (absval > maxval) {
30 maxval = absval;
31 maxid = k;
32 }
33 }
34 return maxid;
35 }
36
37
38 /* compute y := alpha*x + y */
39
40 void vec_sum(double alpha, double x[], double y[], mwSize n)
41 {
42 mwIndex i;
43
44 for (i=0; i<n; ++i) {
45 y[i] += alpha*x[i];
46 }
47 }
48
49 /* compute y := alpha*x .* y */
50
51 void vec_smult(double alpha, double x[], double y[], mwSize n)
52 {
53 mwIndex i;
54
55 for (i=0; i<n; ++i) {
56 y[i] *= alpha*x[i];
57 }
58 }
59
60 /* compute y := alpha*A*x */
61
62 void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
63 {
64 mwIndex i, j, i_n;
65 double *Ax;
66
67 Ax = mxCalloc(n,sizeof(double));
68
69 for (i=0; i<m; ++i) {
70 i_n = i*n;
71 for (j=0; j<n; ++j) {
72 Ax[j] += A[i_n+j] * x[i];
73 }
74 }
75
76 for (j=0; j<n; ++j) {
77 y[j] = alpha*Ax[j];
78 }
79
80 mxFree(Ax);
81 }
82
83
84 /* compute y := alpha*A'*x */
85
86 void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
87 {
88 mwIndex i, j, n_i;
89 double sum0, sum1, sum2, sum3;
90
91 for (j=0; j<m; ++j) {
92 y[j] = 0;
93 }
94
95 /* use loop unrolling to accelerate computation */
96
97 for (i=0; i<m; ++i) {
98 n_i = n*i;
99 sum0 = sum1 = sum2 = sum3 = 0;
100 for (j=0; j+4<n; j+=4) {
101 sum0 += A[n_i+j]*x[j];
102 sum1 += A[n_i+j+1]*x[j+1];
103 sum2 += A[n_i+j+2]*x[j+2];
104 sum3 += A[n_i+j+3]*x[j+3];
105 }
106 y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3));
107 while (j<n) {
108 y[i] += alpha*A[n_i+j]*x[j];
109 j++;
110 }
111 }
112 }
113
114
115 /* compute y := alpha*A*x */
116
117 void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
118 {
119
120 mwIndex i, j, j1, j2;
121
122 for (i=0; i<n; ++i) {
123 y[i] = 0;
124 }
125
126 j2 = jc[0];
127 for (i=0; i<m; ++i) {
128 j1 = j2; j2 = jc[i+1];
129 for (j=j1; j<j2; ++j) {
130 y[ir[j]] += alpha * pr[j] * x[i];
131 }
132 }
133
134 }
135
136
137 /* compute y := alpha*A'*x */
138
139 void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
140 {
141
142 mwIndex i, j, j1, j2;
143
144 for (i=0; i<m; ++i) {
145 y[i] = 0;
146 }
147
148 j2 = jc[0];
149 for (i=0; i<m; ++i) {
150 j1 = j2; j2 = jc[i+1];
151 for (j=j1; j<j2; ++j) {
152 y[i] += alpha * pr[j] * x[ir[j]];
153 }
154 }
155
156 }
157
158
159 /* compute y := alpha*A*x */
160
161 void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
162 {
163
164 mwIndex i, j, j_n, k, kend;
165
166 for (i=0; i<n; ++i) {
167 y[i] = 0;
168 }
169
170 kend = jc[1];
171 if (kend==0) { /* x is empty */
172 return;
173 }
174
175 for (k=0; k<kend; ++k) {
176 j = ir[k];
177 j_n = j*n;
178 for (i=0; i<n; ++i) {
179 y[i] += alpha * A[i+j_n] * pr[k];
180 }
181 }
182
183 }
184
185
186 /* compute y := alpha*A'*x */
187
188 void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
189 {
190
191 mwIndex i, j, j_n, k, kend;
192
193 for (i=0; i<m; ++i) {
194 y[i] = 0;
195 }
196
197 kend = jc[1];
198 if (kend==0) { /* x is empty */
199 return;
200 }
201
202 for (j=0; j<m; ++j) {
203 j_n = j*n;
204 for (k=0; k<kend; ++k) {
205 i = ir[k];
206 y[j] += alpha * A[i+j_n] * pr[k];
207 }
208 }
209
210 }
211
212
213 /* compute y := alpha*A*x */
214
215 void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
216 {
217
218 mwIndex i, j, k, kend, j1, j2;
219
220 for (i=0; i<n; ++i) {
221 y[i] = 0;
222 }
223
224 kend = jcx[1];
225 if (kend==0) { /* x is empty */
226 return;
227 }
228
229 for (k=0; k<kend; ++k) {
230 i = irx[k];
231 j1 = jc[i]; j2 = jc[i+1];
232 for (j=j1; j<j2; ++j) {
233 y[ir[j]] += alpha * pr[j] * prx[k];
234 }
235 }
236
237 }
238
239
240 /* compute y := alpha*A'*x */
241
242 void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
243 {
244
245 mwIndex i, j, k, jend, kend, jadd, kadd, delta;
246
247 for (i=0; i<m; ++i) {
248 y[i] = 0;
249 }
250
251 kend = jcx[1];
252 if (kend==0) { /* x is empty */
253 return;
254 }
255
256 for (i=0; i<m; ++i) {
257 j = jc[i];
258 jend = jc[i+1];
259 k = 0;
260 while (j<jend && k<kend) {
261
262 delta = ir[j] - irx[k];
263
264 if (delta) { /* if indices differ - increment the smaller one */
265 jadd = delta<0;
266 kadd = 1-jadd;
267 j += jadd;
268 k += kadd;
269 }
270
271 else { /* indices are equal - add to result and increment both */
272 y[i] += alpha * pr[j] * prx[k];
273 j++; k++;
274 }
275 }
276 }
277
278 }
279
280
281 /* matrix-matrix multiplication */
282
283 void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
284 {
285 mwIndex i1, i2, i3, iX, iA, i2_n;
286 double b;
287
288 for (i1=0; i1<n*k; i1++) {
289 X[i1] = 0;
290 }
291
292 for (i2=0; i2<m; ++i2) {
293 i2_n = i2*n;
294 iX = 0;
295 for (i3=0; i3<k; ++i3) {
296 iA = i2_n;
297 b = B[i2+i3*m];
298 for (i1=0; i1<n; ++i1) {
299 X[iX++] += A[iA++]*b;
300 }
301 }
302 }
303
304 for (i1=0; i1<n*k; i1++) {
305 X[i1] *= alpha;
306 }
307 }
308
309
310 /* matrix-transpose-matrix multiplication */
311
312 void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
313 {
314 mwIndex i1, i2, i3, iX, iA, i2_n;
315 double *x, sum0, sum1, sum2, sum3;
316
317 for (i2=0; i2<m; ++i2) {
318 for (i3=0; i3<k; ++i3) {
319 sum0 = sum1 = sum2 = sum3 = 0;
320 for (i1=0; i1+4<n; i1+=4) {
321 sum0 += A[i1+0+i2*n]*B[i1+0+i3*n];
322 sum1 += A[i1+1+i2*n]*B[i1+1+i3*n];
323 sum2 += A[i1+2+i2*n]*B[i1+2+i3*n];
324 sum3 += A[i1+3+i2*n]*B[i1+3+i3*n];
325 }
326 X[i2+i3*m] = (sum0+sum1) + (sum2+sum3);
327 while(i1<n) {
328 X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n];
329 i1++;
330 }
331 }
332 }
333
334 for (i1=0; i1<m*k; i1++) {
335 X[i1] *= alpha;
336 }
337 }
338
339
340 /* tensor-matrix product */
341
342 void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
343 {
344 mwIndex i1, i2, i3, i4, i2_n, nml;
345 double b;
346
347 nml = n*m*l;
348 for (i1=0; i1<nml; ++i1) {
349 X[i1] = 0;
350 }
351
352 for (i2=0; i2<m; ++i2) {
353 i2_n = i2*n;
354 for (i3=0; i3<k; ++i3) {
355 for (i4=0; i4<l; ++i4) {
356 b = B[i4+i3*l];
357 for (i1=0; i1<n; ++i1) {
358 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
359 }
360 }
361 }
362 }
363
364 for (i1=0; i1<nml; ++i1) {
365 X[i1] *= alpha;
366 }
367 }
368
369
370 /* tensor-matrix-transpose product */
371
372 void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
373 {
374 mwIndex i1, i2, i3, i4, i2_n, nml;
375 double b;
376
377 nml = n*m*l;
378 for (i1=0; i1<nml; ++i1) {
379 X[i1] = 0;
380 }
381
382 for (i2=0; i2<m; ++i2) {
383 i2_n = i2*n;
384 for (i4=0; i4<l; ++i4) {
385 for (i3=0; i3<k; ++i3) {
386 b = B[i3+i4*k];
387 for (i1=0; i1<n; ++i1) {
388 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
389 }
390 }
391 }
392 }
393
394 for (i1=0; i1<nml; ++i1) {
395 X[i1] *= alpha;
396 }
397 }
398
399
400 /* dot product */
401
402 double dotprod(double a[], double b[], mwSize n)
403 {
404 double sum = 0;
405 mwIndex i;
406 for (i=0; i<n; ++i)
407 sum += a[i]*b[i];
408 return sum;
409 }
410
411
412 /* find maximum of vector */
413
414 mwIndex maxpos(double c[], mwSize m)
415 {
416 mwIndex maxid=0, k;
417 double val, maxval = *c;
418
419 for (k=1; k<m; ++k) {
420 val = c[k];
421 if (val > maxval) {
422 maxval = val;
423 maxid = k;
424 }
425 }
426 return maxid;
427 }
428
429
430 /* solve L*x = b */
431
432 void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k)
433 {
434 mwIndex i, j;
435 double rhs;
436
437 for (i=0; i<k; ++i) {
438 rhs = b[i];
439 for (j=0; j<i; ++j) {
440 rhs -= L[j*n+i]*x[j];
441 }
442 x[i] = rhs/L[i*n+i];
443 }
444 }
445
446
447 /* solve L'*x = b */
448
449 void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k)
450 {
451 mwIndex i, j;
452 double rhs;
453
454 for (i=k; i>=1; --i) {
455 rhs = b[i-1];
456 for (j=i; j<k; ++j) {
457 rhs -= L[(i-1)*n+j]*x[j];
458 }
459 x[i-1] = rhs/L[(i-1)*n+i-1];
460 }
461 }
462
463
464 /* solve U*x = b */
465
466 void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k)
467 {
468 mwIndex i, j;
469 double rhs;
470
471 for (i=k; i>=1; --i) {
472 rhs = b[i-1];
473 for (j=i; j<k; ++j) {
474 rhs -= U[j*n+i-1]*x[j];
475 }
476 x[i-1] = rhs/U[(i-1)*n+i-1];
477 }
478 }
479
480
481 /* solve U'*x = b */
482
483 void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k)
484 {
485 mwIndex i, j;
486 double rhs;
487
488 for (i=0; i<k; ++i) {
489 rhs = b[i];
490 for (j=0; j<i; ++j) {
491 rhs -= U[i*n+j]*x[j];
492 }
493 x[i] = rhs/U[i*n+i];
494 }
495 }
496
497
498 /* back substitution solver */
499
500 void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
501 {
502 if (tolower(ul) == 'u') {
503 backsubst_U(A, b, x, n, k);
504 }
505 else if (tolower(ul) == 'l') {
506 backsubst_L(A, b, x, n, k);
507 }
508 else {
509 mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''");
510 }
511 }
512
513
514 /* solve equation set using cholesky decomposition */
515
516 void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
517 {
518 double *tmp;
519
520 tmp = mxMalloc(k*sizeof(double));
521
522 if (tolower(ul) == 'l') {
523 backsubst_L(A, b, tmp, n, k);
524 backsubst_Lt(A, tmp, x, n, k);
525 }
526 else if (tolower(ul) == 'u') {
527 backsubst_Ut(A, b, tmp, n, k);
528 backsubst_U(A, tmp, x, n, k);
529 }
530 else {
531 mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''");
532 }
533
534 mxFree(tmp);
535 }
536
537
538 /* perform a permutation assignment y := x(ind(1:k)) */
539
540 void vec_assign(double y[], double x[], mwIndex ind[], mwSize k)
541 {
542 mwIndex i;
543
544 for (i=0; i<k; ++i)
545 y[i] = x[ind[i]];
546 }
547
548
549 /* matrix transpose */
550
551 void transpose(double X[], double Y[], mwSize n, mwSize m)
552 {
553 mwIndex i, j, i_m, j_n;
554
555 if (n<m) {
556 for (j=0; j<m; ++j) {
557 j_n = j*n;
558 for (i=0; i<n; ++i) {
559 Y[j+i*m] = X[i+j_n];
560 }
561 }
562 }
563 else {
564 for (i=0; i<n; ++i) {
565 i_m = i*m;
566 for (j=0; j<m; ++j) {
567 Y[j+i_m] = X[i+j*n];
568 }
569 }
570 }
571 }
572
573
574 /* print contents of matrix */
575
576 void printmat(double A[], int n, int m, char* matname)
577 {
578 int i, j;
579 mexPrintf("\n%s = \n\n", matname);
580
581 if (n*m==0) {
582 mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m);
583 return;
584 }
585
586 for (i=0; i<n; ++i) {
587 for (j=0; j<m; ++j)
588 mexPrintf(" %lf", A[j*n+i]);
589 mexPrintf("\n");
590 }
591 mexPrintf("\n");
592 }
593
594
595 /* print contents of sparse matrix */
596
597 void printspmat(mxArray *a, char* matname)
598 {
599 mwIndex *aJc = mxGetJc(a);
600 mwIndex *aIr = mxGetIr(a);
601 double *aPr = mxGetPr(a);
602
603 int i;
604
605 mexPrintf("\n%s = \n\n", matname);
606
607 for (i=0; i<aJc[1]; ++i)
608 printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]);
609
610 mexPrintf("\n");
611 }
612
613
614
615 /* matrix multiplication using Winograd's algorithm */
616
617 /*
618 void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
619 {
620
621 mwIndex i1, i2, i3, iX, iA, i2_n;
622 double b, *AA, *BB;
623
624 AA = mxCalloc(n,sizeof(double));
625 BB = mxCalloc(k,sizeof(double));
626
627 for (i1=0; i1<n*k; i1++) {
628 X[i1] = 0;
629 }
630
631 for (i1=0; i1<n; ++i1) {
632 for (i2=0; i2<m/2; ++i2) {
633 AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n];
634 }
635 }
636
637 for (i2=0; i2<k; ++i2) {
638 for (i1=0; i1<m/2; ++i1) {
639 BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m];
640 }
641 }
642
643 for (i2=0; i2<k; ++i2) {
644 for (i3=0; i3<m/2; ++i3) {
645 for (i1=0; i1<n; ++i1) {
646 X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]);
647 }
648 }
649 }
650
651 if (m%2) {
652 for (i2=0; i2<k; ++i2) {
653 for (i1=0; i1<n; ++i1) {
654 X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m];
655 }
656 }
657 }
658
659 for (i2=0; i2<k; ++i2) {
660 for (i1=0; i1<n; ++i1) {
661 X[i1+i2*n] -= (AA[i1] + BB[i2]);
662 X[i1+i2*n] *= alpha;
663 }
664 }
665
666 mxFree(AA);
667 mxFree(BB);
668 }
669 */
670
671
672
673