comparison DL/RLS-DLA/private/myblas.c @ 60:ad36f80e2ccf

(none)
author idamnjanovic
date Tue, 15 Mar 2011 12:20:59 +0000
parents
children
comparison
equal deleted inserted replaced
59:23f9dd7b9d78 60:ad36f80e2ccf
1 /**************************************************************************
2 *
3 * File name: myblas.c
4 *
5 * Ron Rubinstein
6 * Computer Science Department
7 * Technion, Haifa 32000 Israel
8 * ronrubin@cs
9 *
10 * Version: 1.1
11 * Last updated: 13.8.2009
12 *
13 *************************************************************************/
14
15
16 #include "myblas.h"
17 #include <ctype.h>
18
19
20 /* find maximum of absolute values */
21
22 mwIndex maxabs(double c[], mwSize m)
23 {
24 mwIndex maxid=0, k;
25 double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */
26
27 for (k=1; k<m; ++k) {
28 absval = SQR(c[k]);
29 if (absval > maxval) {
30 maxval = absval;
31 maxid = k;
32 }
33 }
34 return maxid;
35 }
36
37
38 /* compute y := alpha*x + y */
39
40 void vec_sum(double alpha, double x[], double y[], mwSize n)
41 {
42 mwIndex i;
43
44 for (i=0; i<n; ++i) {
45 y[i] += alpha*x[i];
46 }
47 }
48
49
50 /* compute y := alpha*A*x */
51
52 void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
53 {
54 mwIndex i, j, i_n;
55 double *Ax;
56
57 Ax = mxCalloc(n,sizeof(double));
58
59 for (i=0; i<m; ++i) {
60 i_n = i*n;
61 for (j=0; j<n; ++j) {
62 Ax[j] += A[i_n+j] * x[i];
63 }
64 }
65
66 for (j=0; j<n; ++j) {
67 y[j] = alpha*Ax[j];
68 }
69
70 mxFree(Ax);
71 }
72
73
74 /* compute y := alpha*A'*x */
75
76 void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
77 {
78 mwIndex i, j, n_i;
79 double sum0, sum1, sum2, sum3;
80
81 for (j=0; j<m; ++j) {
82 y[j] = 0;
83 }
84
85 /* use loop unrolling to accelerate computation */
86
87 for (i=0; i<m; ++i) {
88 n_i = n*i;
89 sum0 = sum1 = sum2 = sum3 = 0;
90 for (j=0; j+4<n; j+=4) {
91 sum0 += A[n_i+j]*x[j];
92 sum1 += A[n_i+j+1]*x[j+1];
93 sum2 += A[n_i+j+2]*x[j+2];
94 sum3 += A[n_i+j+3]*x[j+3];
95 }
96 y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3));
97 while (j<n) {
98 y[i] += alpha*A[n_i+j]*x[j];
99 j++;
100 }
101 }
102 }
103
104
105 /* compute y := alpha*A*x */
106
107 void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
108 {
109
110 mwIndex i, j, j1, j2;
111
112 for (i=0; i<n; ++i) {
113 y[i] = 0;
114 }
115
116 j2 = jc[0];
117 for (i=0; i<m; ++i) {
118 j1 = j2; j2 = jc[i+1];
119 for (j=j1; j<j2; ++j) {
120 y[ir[j]] += alpha * pr[j] * x[i];
121 }
122 }
123
124 }
125
126
127 /* compute y := alpha*A'*x */
128
129 void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
130 {
131
132 mwIndex i, j, j1, j2;
133
134 for (i=0; i<m; ++i) {
135 y[i] = 0;
136 }
137
138 j2 = jc[0];
139 for (i=0; i<m; ++i) {
140 j1 = j2; j2 = jc[i+1];
141 for (j=j1; j<j2; ++j) {
142 y[i] += alpha * pr[j] * x[ir[j]];
143 }
144 }
145
146 }
147
148
149 /* compute y := alpha*A*x */
150
151 void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
152 {
153
154 mwIndex i, j, j_n, k, kend;
155
156 for (i=0; i<n; ++i) {
157 y[i] = 0;
158 }
159
160 kend = jc[1];
161 if (kend==0) { /* x is empty */
162 return;
163 }
164
165 for (k=0; k<kend; ++k) {
166 j = ir[k];
167 j_n = j*n;
168 for (i=0; i<n; ++i) {
169 y[i] += alpha * A[i+j_n] * pr[k];
170 }
171 }
172
173 }
174
175
176 /* compute y := alpha*A'*x */
177
178 void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
179 {
180
181 mwIndex i, j, j_n, k, kend;
182
183 for (i=0; i<m; ++i) {
184 y[i] = 0;
185 }
186
187 kend = jc[1];
188 if (kend==0) { /* x is empty */
189 return;
190 }
191
192 for (j=0; j<m; ++j) {
193 j_n = j*n;
194 for (k=0; k<kend; ++k) {
195 i = ir[k];
196 y[j] += alpha * A[i+j_n] * pr[k];
197 }
198 }
199
200 }
201
202
203 /* compute y := alpha*A*x */
204
205 void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
206 {
207
208 mwIndex i, j, k, kend, j1, j2;
209
210 for (i=0; i<n; ++i) {
211 y[i] = 0;
212 }
213
214 kend = jcx[1];
215 if (kend==0) { /* x is empty */
216 return;
217 }
218
219 for (k=0; k<kend; ++k) {
220 i = irx[k];
221 j1 = jc[i]; j2 = jc[i+1];
222 for (j=j1; j<j2; ++j) {
223 y[ir[j]] += alpha * pr[j] * prx[k];
224 }
225 }
226
227 }
228
229
230 /* compute y := alpha*A'*x */
231
232 void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
233 {
234
235 mwIndex i, j, k, jend, kend, jadd, kadd, delta;
236
237 for (i=0; i<m; ++i) {
238 y[i] = 0;
239 }
240
241 kend = jcx[1];
242 if (kend==0) { /* x is empty */
243 return;
244 }
245
246 for (i=0; i<m; ++i) {
247 j = jc[i];
248 jend = jc[i+1];
249 k = 0;
250 while (j<jend && k<kend) {
251
252 delta = ir[j] - irx[k];
253
254 if (delta) { /* if indices differ - increment the smaller one */
255 jadd = delta<0;
256 kadd = 1-jadd;
257 j += jadd;
258 k += kadd;
259 }
260
261 else { /* indices are equal - add to result and increment both */
262 y[i] += alpha * pr[j] * prx[k];
263 j++; k++;
264 }
265 }
266 }
267
268 }
269
270
271 /* matrix-matrix multiplication */
272
273 void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
274 {
275 mwIndex i1, i2, i3, iX, iA, i2_n;
276 double b;
277
278 for (i1=0; i1<n*k; i1++) {
279 X[i1] = 0;
280 }
281
282 for (i2=0; i2<m; ++i2) {
283 i2_n = i2*n;
284 iX = 0;
285 for (i3=0; i3<k; ++i3) {
286 iA = i2_n;
287 b = B[i2+i3*m];
288 for (i1=0; i1<n; ++i1) {
289 X[iX++] += A[iA++]*b;
290 }
291 }
292 }
293
294 for (i1=0; i1<n*k; i1++) {
295 X[i1] *= alpha;
296 }
297 }
298
299
300 /* matrix-transpose-matrix multiplication */
301
302 void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
303 {
304 mwIndex i1, i2, i3, iX, iA, i2_n;
305 double *x, sum0, sum1, sum2, sum3;
306
307 for (i2=0; i2<m; ++i2) {
308 for (i3=0; i3<k; ++i3) {
309 sum0 = sum1 = sum2 = sum3 = 0;
310 for (i1=0; i1+4<n; i1+=4) {
311 sum0 += A[i1+0+i2*n]*B[i1+0+i3*n];
312 sum1 += A[i1+1+i2*n]*B[i1+1+i3*n];
313 sum2 += A[i1+2+i2*n]*B[i1+2+i3*n];
314 sum3 += A[i1+3+i2*n]*B[i1+3+i3*n];
315 }
316 X[i2+i3*m] = (sum0+sum1) + (sum2+sum3);
317 while(i1<n) {
318 X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n];
319 i1++;
320 }
321 }
322 }
323
324 for (i1=0; i1<m*k; i1++) {
325 X[i1] *= alpha;
326 }
327 }
328
329
330 /* tensor-matrix product */
331
332 void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
333 {
334 mwIndex i1, i2, i3, i4, i2_n, nml;
335 double b;
336
337 nml = n*m*l;
338 for (i1=0; i1<nml; ++i1) {
339 X[i1] = 0;
340 }
341
342 for (i2=0; i2<m; ++i2) {
343 i2_n = i2*n;
344 for (i3=0; i3<k; ++i3) {
345 for (i4=0; i4<l; ++i4) {
346 b = B[i4+i3*l];
347 for (i1=0; i1<n; ++i1) {
348 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
349 }
350 }
351 }
352 }
353
354 for (i1=0; i1<nml; ++i1) {
355 X[i1] *= alpha;
356 }
357 }
358
359
360 /* tensor-matrix-transpose product */
361
362 void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
363 {
364 mwIndex i1, i2, i3, i4, i2_n, nml;
365 double b;
366
367 nml = n*m*l;
368 for (i1=0; i1<nml; ++i1) {
369 X[i1] = 0;
370 }
371
372 for (i2=0; i2<m; ++i2) {
373 i2_n = i2*n;
374 for (i4=0; i4<l; ++i4) {
375 for (i3=0; i3<k; ++i3) {
376 b = B[i3+i4*k];
377 for (i1=0; i1<n; ++i1) {
378 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
379 }
380 }
381 }
382 }
383
384 for (i1=0; i1<nml; ++i1) {
385 X[i1] *= alpha;
386 }
387 }
388
389
390 /* dot product */
391
392 double dotprod(double a[], double b[], mwSize n)
393 {
394 double sum = 0;
395 mwIndex i;
396 for (i=0; i<n; ++i)
397 sum += a[i]*b[i];
398 return sum;
399 }
400
401
402 /* find maximum of vector */
403
404 mwIndex maxpos(double c[], mwSize m)
405 {
406 mwIndex maxid=0, k;
407 double val, maxval = *c;
408
409 for (k=1; k<m; ++k) {
410 val = c[k];
411 if (val > maxval) {
412 maxval = val;
413 maxid = k;
414 }
415 }
416 return maxid;
417 }
418
419
420 /* solve L*x = b */
421
422 void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k)
423 {
424 mwIndex i, j;
425 double rhs;
426
427 for (i=0; i<k; ++i) {
428 rhs = b[i];
429 for (j=0; j<i; ++j) {
430 rhs -= L[j*n+i]*x[j];
431 }
432 x[i] = rhs/L[i*n+i];
433 }
434 }
435
436
437 /* solve L'*x = b */
438
439 void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k)
440 {
441 mwIndex i, j;
442 double rhs;
443
444 for (i=k; i>=1; --i) {
445 rhs = b[i-1];
446 for (j=i; j<k; ++j) {
447 rhs -= L[(i-1)*n+j]*x[j];
448 }
449 x[i-1] = rhs/L[(i-1)*n+i-1];
450 }
451 }
452
453
454 /* solve U*x = b */
455
456 void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k)
457 {
458 mwIndex i, j;
459 double rhs;
460
461 for (i=k; i>=1; --i) {
462 rhs = b[i-1];
463 for (j=i; j<k; ++j) {
464 rhs -= U[j*n+i-1]*x[j];
465 }
466 x[i-1] = rhs/U[(i-1)*n+i-1];
467 }
468 }
469
470
471 /* solve U'*x = b */
472
473 void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k)
474 {
475 mwIndex i, j;
476 double rhs;
477
478 for (i=0; i<k; ++i) {
479 rhs = b[i];
480 for (j=0; j<i; ++j) {
481 rhs -= U[i*n+j]*x[j];
482 }
483 x[i] = rhs/U[i*n+i];
484 }
485 }
486
487
488 /* back substitution solver */
489
490 void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
491 {
492 if (tolower(ul) == 'u') {
493 backsubst_U(A, b, x, n, k);
494 }
495 else if (tolower(ul) == 'l') {
496 backsubst_L(A, b, x, n, k);
497 }
498 else {
499 mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''");
500 }
501 }
502
503
504 /* solve equation set using cholesky decomposition */
505
506 void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
507 {
508 double *tmp;
509
510 tmp = mxMalloc(k*sizeof(double));
511
512 if (tolower(ul) == 'l') {
513 backsubst_L(A, b, tmp, n, k);
514 backsubst_Lt(A, tmp, x, n, k);
515 }
516 else if (tolower(ul) == 'u') {
517 backsubst_Ut(A, b, tmp, n, k);
518 backsubst_U(A, tmp, x, n, k);
519 }
520 else {
521 mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''");
522 }
523
524 mxFree(tmp);
525 }
526
527
528 /* perform a permutation assignment y := x(ind(1:k)) */
529
530 void vec_assign(double y[], double x[], mwIndex ind[], mwSize k)
531 {
532 mwIndex i;
533
534 for (i=0; i<k; ++i)
535 y[i] = x[ind[i]];
536 }
537
538
539 /* matrix transpose */
540
541 void transpose(double X[], double Y[], mwSize n, mwSize m)
542 {
543 mwIndex i, j, i_m, j_n;
544
545 if (n<m) {
546 for (j=0; j<m; ++j) {
547 j_n = j*n;
548 for (i=0; i<n; ++i) {
549 Y[j+i*m] = X[i+j_n];
550 }
551 }
552 }
553 else {
554 for (i=0; i<n; ++i) {
555 i_m = i*m;
556 for (j=0; j<m; ++j) {
557 Y[j+i_m] = X[i+j*n];
558 }
559 }
560 }
561 }
562
563
564 /* print contents of matrix */
565
566 void printmat(double A[], int n, int m, char* matname)
567 {
568 int i, j;
569 mexPrintf("\n%s = \n\n", matname);
570
571 if (n*m==0) {
572 mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m);
573 return;
574 }
575
576 for (i=0; i<n; ++i) {
577 for (j=0; j<m; ++j)
578 mexPrintf(" %lf", A[j*n+i]);
579 mexPrintf("\n");
580 }
581 mexPrintf("\n");
582 }
583
584
585 /* print contents of sparse matrix */
586
587 void printspmat(mxArray *a, char* matname)
588 {
589 mwIndex *aJc = mxGetJc(a);
590 mwIndex *aIr = mxGetIr(a);
591 double *aPr = mxGetPr(a);
592
593 int i;
594
595 mexPrintf("\n%s = \n\n", matname);
596
597 for (i=0; i<aJc[1]; ++i)
598 printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]);
599
600 mexPrintf("\n");
601 }
602
603
604
605 /* matrix multiplication using Winograd's algorithm */
606
607 /*
608 void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
609 {
610
611 mwIndex i1, i2, i3, iX, iA, i2_n;
612 double b, *AA, *BB;
613
614 AA = mxCalloc(n,sizeof(double));
615 BB = mxCalloc(k,sizeof(double));
616
617 for (i1=0; i1<n*k; i1++) {
618 X[i1] = 0;
619 }
620
621 for (i1=0; i1<n; ++i1) {
622 for (i2=0; i2<m/2; ++i2) {
623 AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n];
624 }
625 }
626
627 for (i2=0; i2<k; ++i2) {
628 for (i1=0; i1<m/2; ++i1) {
629 BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m];
630 }
631 }
632
633 for (i2=0; i2<k; ++i2) {
634 for (i3=0; i3<m/2; ++i3) {
635 for (i1=0; i1<n; ++i1) {
636 X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]);
637 }
638 }
639 }
640
641 if (m%2) {
642 for (i2=0; i2<k; ++i2) {
643 for (i1=0; i1<n; ++i1) {
644 X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m];
645 }
646 }
647 }
648
649 for (i2=0; i2<k; ++i2) {
650 for (i1=0; i1<n; ++i1) {
651 X[i1+i2*n] -= (AA[i1] + BB[i2]);
652 X[i1+i2*n] *= alpha;
653 }
654 }
655
656 mxFree(AA);
657 mxFree(BB);
658 }
659 */
660
661
662
663