Mercurial > hg > smallbox
comparison util/ksvd utils/ompbox utils/myblas.c @ 137:9207d56c5547 ivand_dev
New ompbox in utils for testing purposes
author | Ivan Damnjanovic lnx <ivan.damnjanovic@eecs.qmul.ac.uk> |
---|---|
date | Thu, 21 Jul 2011 14:07:41 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
136:1334d2302dd9 | 137:9207d56c5547 |
---|---|
1 /************************************************************************** | |
2 * | |
3 * File name: myblas.c | |
4 * | |
5 * Ron Rubinstein | |
6 * Computer Science Department | |
7 * Technion, Haifa 32000 Israel | |
8 * ronrubin@cs | |
9 * | |
10 * Version: 1.1 | |
11 * Last updated: 13.8.2009 | |
12 * | |
13 *************************************************************************/ | |
14 | |
15 | |
16 #include "myblas.h" | |
17 #include <ctype.h> | |
18 | |
19 | |
20 /* find maximum of absolute values */ | |
21 | |
22 mwIndex maxabs(double c[], mwSize m) | |
23 { | |
24 mwIndex maxid=0, k; | |
25 double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */ | |
26 | |
27 for (k=1; k<m; ++k) { | |
28 absval = SQR(c[k]); | |
29 if (absval > maxval) { | |
30 maxval = absval; | |
31 maxid = k; | |
32 } | |
33 } | |
34 return maxid; | |
35 } | |
36 | |
37 | |
38 /* compute y := alpha*x + y */ | |
39 | |
40 void vec_sum(double alpha, double x[], double y[], mwSize n) | |
41 { | |
42 mwIndex i; | |
43 | |
44 for (i=0; i<n; ++i) { | |
45 y[i] += alpha*x[i]; | |
46 } | |
47 } | |
48 | |
49 /* compute y := alpha*x .* y */ | |
50 | |
51 void vec_smult(double alpha, double x[], double y[], mwSize n) | |
52 { | |
53 mwIndex i; | |
54 | |
55 for (i=0; i<n; ++i) { | |
56 y[i] *= alpha*x[i]; | |
57 } | |
58 } | |
59 | |
60 /* compute y := alpha*A*x */ | |
61 | |
62 void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m) | |
63 { | |
64 mwIndex i, j, i_n; | |
65 double *Ax; | |
66 | |
67 Ax = mxCalloc(n,sizeof(double)); | |
68 | |
69 for (i=0; i<m; ++i) { | |
70 i_n = i*n; | |
71 for (j=0; j<n; ++j) { | |
72 Ax[j] += A[i_n+j] * x[i]; | |
73 } | |
74 } | |
75 | |
76 for (j=0; j<n; ++j) { | |
77 y[j] = alpha*Ax[j]; | |
78 } | |
79 | |
80 mxFree(Ax); | |
81 } | |
82 | |
83 | |
84 /* compute y := alpha*A'*x */ | |
85 | |
86 void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m) | |
87 { | |
88 mwIndex i, j, n_i; | |
89 double sum0, sum1, sum2, sum3; | |
90 | |
91 for (j=0; j<m; ++j) { | |
92 y[j] = 0; | |
93 } | |
94 | |
95 /* use loop unrolling to accelerate computation */ | |
96 | |
97 for (i=0; i<m; ++i) { | |
98 n_i = n*i; | |
99 sum0 = sum1 = sum2 = sum3 = 0; | |
100 for (j=0; j+4<n; j+=4) { | |
101 sum0 += A[n_i+j]*x[j]; | |
102 sum1 += A[n_i+j+1]*x[j+1]; | |
103 sum2 += A[n_i+j+2]*x[j+2]; | |
104 sum3 += A[n_i+j+3]*x[j+3]; | |
105 } | |
106 y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3)); | |
107 while (j<n) { | |
108 y[i] += alpha*A[n_i+j]*x[j]; | |
109 j++; | |
110 } | |
111 } | |
112 } | |
113 | |
114 | |
115 /* compute y := alpha*A*x */ | |
116 | |
117 void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m) | |
118 { | |
119 | |
120 mwIndex i, j, j1, j2; | |
121 | |
122 for (i=0; i<n; ++i) { | |
123 y[i] = 0; | |
124 } | |
125 | |
126 j2 = jc[0]; | |
127 for (i=0; i<m; ++i) { | |
128 j1 = j2; j2 = jc[i+1]; | |
129 for (j=j1; j<j2; ++j) { | |
130 y[ir[j]] += alpha * pr[j] * x[i]; | |
131 } | |
132 } | |
133 | |
134 } | |
135 | |
136 | |
137 /* compute y := alpha*A'*x */ | |
138 | |
139 void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m) | |
140 { | |
141 | |
142 mwIndex i, j, j1, j2; | |
143 | |
144 for (i=0; i<m; ++i) { | |
145 y[i] = 0; | |
146 } | |
147 | |
148 j2 = jc[0]; | |
149 for (i=0; i<m; ++i) { | |
150 j1 = j2; j2 = jc[i+1]; | |
151 for (j=j1; j<j2; ++j) { | |
152 y[i] += alpha * pr[j] * x[ir[j]]; | |
153 } | |
154 } | |
155 | |
156 } | |
157 | |
158 | |
159 /* compute y := alpha*A*x */ | |
160 | |
161 void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m) | |
162 { | |
163 | |
164 mwIndex i, j, j_n, k, kend; | |
165 | |
166 for (i=0; i<n; ++i) { | |
167 y[i] = 0; | |
168 } | |
169 | |
170 kend = jc[1]; | |
171 if (kend==0) { /* x is empty */ | |
172 return; | |
173 } | |
174 | |
175 for (k=0; k<kend; ++k) { | |
176 j = ir[k]; | |
177 j_n = j*n; | |
178 for (i=0; i<n; ++i) { | |
179 y[i] += alpha * A[i+j_n] * pr[k]; | |
180 } | |
181 } | |
182 | |
183 } | |
184 | |
185 | |
186 /* compute y := alpha*A'*x */ | |
187 | |
188 void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m) | |
189 { | |
190 | |
191 mwIndex i, j, j_n, k, kend; | |
192 | |
193 for (i=0; i<m; ++i) { | |
194 y[i] = 0; | |
195 } | |
196 | |
197 kend = jc[1]; | |
198 if (kend==0) { /* x is empty */ | |
199 return; | |
200 } | |
201 | |
202 for (j=0; j<m; ++j) { | |
203 j_n = j*n; | |
204 for (k=0; k<kend; ++k) { | |
205 i = ir[k]; | |
206 y[j] += alpha * A[i+j_n] * pr[k]; | |
207 } | |
208 } | |
209 | |
210 } | |
211 | |
212 | |
213 /* compute y := alpha*A*x */ | |
214 | |
215 void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m) | |
216 { | |
217 | |
218 mwIndex i, j, k, kend, j1, j2; | |
219 | |
220 for (i=0; i<n; ++i) { | |
221 y[i] = 0; | |
222 } | |
223 | |
224 kend = jcx[1]; | |
225 if (kend==0) { /* x is empty */ | |
226 return; | |
227 } | |
228 | |
229 for (k=0; k<kend; ++k) { | |
230 i = irx[k]; | |
231 j1 = jc[i]; j2 = jc[i+1]; | |
232 for (j=j1; j<j2; ++j) { | |
233 y[ir[j]] += alpha * pr[j] * prx[k]; | |
234 } | |
235 } | |
236 | |
237 } | |
238 | |
239 | |
240 /* compute y := alpha*A'*x */ | |
241 | |
242 void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m) | |
243 { | |
244 | |
245 mwIndex i, j, k, jend, kend, jadd, kadd, delta; | |
246 | |
247 for (i=0; i<m; ++i) { | |
248 y[i] = 0; | |
249 } | |
250 | |
251 kend = jcx[1]; | |
252 if (kend==0) { /* x is empty */ | |
253 return; | |
254 } | |
255 | |
256 for (i=0; i<m; ++i) { | |
257 j = jc[i]; | |
258 jend = jc[i+1]; | |
259 k = 0; | |
260 while (j<jend && k<kend) { | |
261 | |
262 delta = ir[j] - irx[k]; | |
263 | |
264 if (delta) { /* if indices differ - increment the smaller one */ | |
265 jadd = delta<0; | |
266 kadd = 1-jadd; | |
267 j += jadd; | |
268 k += kadd; | |
269 } | |
270 | |
271 else { /* indices are equal - add to result and increment both */ | |
272 y[i] += alpha * pr[j] * prx[k]; | |
273 j++; k++; | |
274 } | |
275 } | |
276 } | |
277 | |
278 } | |
279 | |
280 | |
281 /* matrix-matrix multiplication */ | |
282 | |
283 void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) | |
284 { | |
285 mwIndex i1, i2, i3, iX, iA, i2_n; | |
286 double b; | |
287 | |
288 for (i1=0; i1<n*k; i1++) { | |
289 X[i1] = 0; | |
290 } | |
291 | |
292 for (i2=0; i2<m; ++i2) { | |
293 i2_n = i2*n; | |
294 iX = 0; | |
295 for (i3=0; i3<k; ++i3) { | |
296 iA = i2_n; | |
297 b = B[i2+i3*m]; | |
298 for (i1=0; i1<n; ++i1) { | |
299 X[iX++] += A[iA++]*b; | |
300 } | |
301 } | |
302 } | |
303 | |
304 for (i1=0; i1<n*k; i1++) { | |
305 X[i1] *= alpha; | |
306 } | |
307 } | |
308 | |
309 | |
310 /* matrix-transpose-matrix multiplication */ | |
311 | |
312 void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) | |
313 { | |
314 mwIndex i1, i2, i3, iX, iA, i2_n; | |
315 double *x, sum0, sum1, sum2, sum3; | |
316 | |
317 for (i2=0; i2<m; ++i2) { | |
318 for (i3=0; i3<k; ++i3) { | |
319 sum0 = sum1 = sum2 = sum3 = 0; | |
320 for (i1=0; i1+4<n; i1+=4) { | |
321 sum0 += A[i1+0+i2*n]*B[i1+0+i3*n]; | |
322 sum1 += A[i1+1+i2*n]*B[i1+1+i3*n]; | |
323 sum2 += A[i1+2+i2*n]*B[i1+2+i3*n]; | |
324 sum3 += A[i1+3+i2*n]*B[i1+3+i3*n]; | |
325 } | |
326 X[i2+i3*m] = (sum0+sum1) + (sum2+sum3); | |
327 while(i1<n) { | |
328 X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n]; | |
329 i1++; | |
330 } | |
331 } | |
332 } | |
333 | |
334 for (i1=0; i1<m*k; i1++) { | |
335 X[i1] *= alpha; | |
336 } | |
337 } | |
338 | |
339 | |
340 /* tensor-matrix product */ | |
341 | |
342 void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l) | |
343 { | |
344 mwIndex i1, i2, i3, i4, i2_n, nml; | |
345 double b; | |
346 | |
347 nml = n*m*l; | |
348 for (i1=0; i1<nml; ++i1) { | |
349 X[i1] = 0; | |
350 } | |
351 | |
352 for (i2=0; i2<m; ++i2) { | |
353 i2_n = i2*n; | |
354 for (i3=0; i3<k; ++i3) { | |
355 for (i4=0; i4<l; ++i4) { | |
356 b = B[i4+i3*l]; | |
357 for (i1=0; i1<n; ++i1) { | |
358 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b; | |
359 } | |
360 } | |
361 } | |
362 } | |
363 | |
364 for (i1=0; i1<nml; ++i1) { | |
365 X[i1] *= alpha; | |
366 } | |
367 } | |
368 | |
369 | |
370 /* tensor-matrix-transpose product */ | |
371 | |
372 void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l) | |
373 { | |
374 mwIndex i1, i2, i3, i4, i2_n, nml; | |
375 double b; | |
376 | |
377 nml = n*m*l; | |
378 for (i1=0; i1<nml; ++i1) { | |
379 X[i1] = 0; | |
380 } | |
381 | |
382 for (i2=0; i2<m; ++i2) { | |
383 i2_n = i2*n; | |
384 for (i4=0; i4<l; ++i4) { | |
385 for (i3=0; i3<k; ++i3) { | |
386 b = B[i3+i4*k]; | |
387 for (i1=0; i1<n; ++i1) { | |
388 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b; | |
389 } | |
390 } | |
391 } | |
392 } | |
393 | |
394 for (i1=0; i1<nml; ++i1) { | |
395 X[i1] *= alpha; | |
396 } | |
397 } | |
398 | |
399 | |
400 /* dot product */ | |
401 | |
402 double dotprod(double a[], double b[], mwSize n) | |
403 { | |
404 double sum = 0; | |
405 mwIndex i; | |
406 for (i=0; i<n; ++i) | |
407 sum += a[i]*b[i]; | |
408 return sum; | |
409 } | |
410 | |
411 | |
412 /* find maximum of vector */ | |
413 | |
414 mwIndex maxpos(double c[], mwSize m) | |
415 { | |
416 mwIndex maxid=0, k; | |
417 double val, maxval = *c; | |
418 | |
419 for (k=1; k<m; ++k) { | |
420 val = c[k]; | |
421 if (val > maxval) { | |
422 maxval = val; | |
423 maxid = k; | |
424 } | |
425 } | |
426 return maxid; | |
427 } | |
428 | |
429 | |
430 /* solve L*x = b */ | |
431 | |
432 void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k) | |
433 { | |
434 mwIndex i, j; | |
435 double rhs; | |
436 | |
437 for (i=0; i<k; ++i) { | |
438 rhs = b[i]; | |
439 for (j=0; j<i; ++j) { | |
440 rhs -= L[j*n+i]*x[j]; | |
441 } | |
442 x[i] = rhs/L[i*n+i]; | |
443 } | |
444 } | |
445 | |
446 | |
447 /* solve L'*x = b */ | |
448 | |
449 void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k) | |
450 { | |
451 mwIndex i, j; | |
452 double rhs; | |
453 | |
454 for (i=k; i>=1; --i) { | |
455 rhs = b[i-1]; | |
456 for (j=i; j<k; ++j) { | |
457 rhs -= L[(i-1)*n+j]*x[j]; | |
458 } | |
459 x[i-1] = rhs/L[(i-1)*n+i-1]; | |
460 } | |
461 } | |
462 | |
463 | |
464 /* solve U*x = b */ | |
465 | |
466 void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k) | |
467 { | |
468 mwIndex i, j; | |
469 double rhs; | |
470 | |
471 for (i=k; i>=1; --i) { | |
472 rhs = b[i-1]; | |
473 for (j=i; j<k; ++j) { | |
474 rhs -= U[j*n+i-1]*x[j]; | |
475 } | |
476 x[i-1] = rhs/U[(i-1)*n+i-1]; | |
477 } | |
478 } | |
479 | |
480 | |
481 /* solve U'*x = b */ | |
482 | |
483 void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k) | |
484 { | |
485 mwIndex i, j; | |
486 double rhs; | |
487 | |
488 for (i=0; i<k; ++i) { | |
489 rhs = b[i]; | |
490 for (j=0; j<i; ++j) { | |
491 rhs -= U[i*n+j]*x[j]; | |
492 } | |
493 x[i] = rhs/U[i*n+i]; | |
494 } | |
495 } | |
496 | |
497 | |
498 /* back substitution solver */ | |
499 | |
500 void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k) | |
501 { | |
502 if (tolower(ul) == 'u') { | |
503 backsubst_U(A, b, x, n, k); | |
504 } | |
505 else if (tolower(ul) == 'l') { | |
506 backsubst_L(A, b, x, n, k); | |
507 } | |
508 else { | |
509 mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''"); | |
510 } | |
511 } | |
512 | |
513 | |
514 /* solve equation set using cholesky decomposition */ | |
515 | |
516 void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k) | |
517 { | |
518 double *tmp; | |
519 | |
520 tmp = mxMalloc(k*sizeof(double)); | |
521 | |
522 if (tolower(ul) == 'l') { | |
523 backsubst_L(A, b, tmp, n, k); | |
524 backsubst_Lt(A, tmp, x, n, k); | |
525 } | |
526 else if (tolower(ul) == 'u') { | |
527 backsubst_Ut(A, b, tmp, n, k); | |
528 backsubst_U(A, tmp, x, n, k); | |
529 } | |
530 else { | |
531 mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''"); | |
532 } | |
533 | |
534 mxFree(tmp); | |
535 } | |
536 | |
537 | |
538 /* perform a permutation assignment y := x(ind(1:k)) */ | |
539 | |
540 void vec_assign(double y[], double x[], mwIndex ind[], mwSize k) | |
541 { | |
542 mwIndex i; | |
543 | |
544 for (i=0; i<k; ++i) | |
545 y[i] = x[ind[i]]; | |
546 } | |
547 | |
548 | |
549 /* matrix transpose */ | |
550 | |
551 void transpose(double X[], double Y[], mwSize n, mwSize m) | |
552 { | |
553 mwIndex i, j, i_m, j_n; | |
554 | |
555 if (n<m) { | |
556 for (j=0; j<m; ++j) { | |
557 j_n = j*n; | |
558 for (i=0; i<n; ++i) { | |
559 Y[j+i*m] = X[i+j_n]; | |
560 } | |
561 } | |
562 } | |
563 else { | |
564 for (i=0; i<n; ++i) { | |
565 i_m = i*m; | |
566 for (j=0; j<m; ++j) { | |
567 Y[j+i_m] = X[i+j*n]; | |
568 } | |
569 } | |
570 } | |
571 } | |
572 | |
573 | |
574 /* print contents of matrix */ | |
575 | |
576 void printmat(double A[], int n, int m, char* matname) | |
577 { | |
578 int i, j; | |
579 mexPrintf("\n%s = \n\n", matname); | |
580 | |
581 if (n*m==0) { | |
582 mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m); | |
583 return; | |
584 } | |
585 | |
586 for (i=0; i<n; ++i) { | |
587 for (j=0; j<m; ++j) | |
588 mexPrintf(" %lf", A[j*n+i]); | |
589 mexPrintf("\n"); | |
590 } | |
591 mexPrintf("\n"); | |
592 } | |
593 | |
594 | |
595 /* print contents of sparse matrix */ | |
596 | |
597 void printspmat(mxArray *a, char* matname) | |
598 { | |
599 mwIndex *aJc = mxGetJc(a); | |
600 mwIndex *aIr = mxGetIr(a); | |
601 double *aPr = mxGetPr(a); | |
602 | |
603 int i; | |
604 | |
605 mexPrintf("\n%s = \n\n", matname); | |
606 | |
607 for (i=0; i<aJc[1]; ++i) | |
608 printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]); | |
609 | |
610 mexPrintf("\n"); | |
611 } | |
612 | |
613 | |
614 | |
615 /* matrix multiplication using Winograd's algorithm */ | |
616 | |
617 /* | |
618 void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) | |
619 { | |
620 | |
621 mwIndex i1, i2, i3, iX, iA, i2_n; | |
622 double b, *AA, *BB; | |
623 | |
624 AA = mxCalloc(n,sizeof(double)); | |
625 BB = mxCalloc(k,sizeof(double)); | |
626 | |
627 for (i1=0; i1<n*k; i1++) { | |
628 X[i1] = 0; | |
629 } | |
630 | |
631 for (i1=0; i1<n; ++i1) { | |
632 for (i2=0; i2<m/2; ++i2) { | |
633 AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n]; | |
634 } | |
635 } | |
636 | |
637 for (i2=0; i2<k; ++i2) { | |
638 for (i1=0; i1<m/2; ++i1) { | |
639 BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m]; | |
640 } | |
641 } | |
642 | |
643 for (i2=0; i2<k; ++i2) { | |
644 for (i3=0; i3<m/2; ++i3) { | |
645 for (i1=0; i1<n; ++i1) { | |
646 X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]); | |
647 } | |
648 } | |
649 } | |
650 | |
651 if (m%2) { | |
652 for (i2=0; i2<k; ++i2) { | |
653 for (i1=0; i1<n; ++i1) { | |
654 X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m]; | |
655 } | |
656 } | |
657 } | |
658 | |
659 for (i2=0; i2<k; ++i2) { | |
660 for (i1=0; i1<n; ++i1) { | |
661 X[i1+i2*n] -= (AA[i1] + BB[i2]); | |
662 X[i1+i2*n] *= alpha; | |
663 } | |
664 } | |
665 | |
666 mxFree(AA); | |
667 mxFree(BB); | |
668 } | |
669 */ | |
670 | |
671 | |
672 | |
673 |