Mercurial > hg > smallbox
comparison DL/RLS-DLA/private/myblas.c @ 60:ad36f80e2ccf
(none)
author | idamnjanovic |
---|---|
date | Tue, 15 Mar 2011 12:20:59 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
59:23f9dd7b9d78 | 60:ad36f80e2ccf |
---|---|
1 /************************************************************************** | |
2 * | |
3 * File name: myblas.c | |
4 * | |
5 * Ron Rubinstein | |
6 * Computer Science Department | |
7 * Technion, Haifa 32000 Israel | |
8 * ronrubin@cs | |
9 * | |
10 * Version: 1.1 | |
11 * Last updated: 13.8.2009 | |
12 * | |
13 *************************************************************************/ | |
14 | |
15 | |
16 #include "myblas.h" | |
17 #include <ctype.h> | |
18 | |
19 | |
20 /* find maximum of absolute values */ | |
21 | |
22 mwIndex maxabs(double c[], mwSize m) | |
23 { | |
24 mwIndex maxid=0, k; | |
25 double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */ | |
26 | |
27 for (k=1; k<m; ++k) { | |
28 absval = SQR(c[k]); | |
29 if (absval > maxval) { | |
30 maxval = absval; | |
31 maxid = k; | |
32 } | |
33 } | |
34 return maxid; | |
35 } | |
36 | |
37 | |
38 /* compute y := alpha*x + y */ | |
39 | |
40 void vec_sum(double alpha, double x[], double y[], mwSize n) | |
41 { | |
42 mwIndex i; | |
43 | |
44 for (i=0; i<n; ++i) { | |
45 y[i] += alpha*x[i]; | |
46 } | |
47 } | |
48 | |
49 | |
50 /* compute y := alpha*A*x */ | |
51 | |
52 void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m) | |
53 { | |
54 mwIndex i, j, i_n; | |
55 double *Ax; | |
56 | |
57 Ax = mxCalloc(n,sizeof(double)); | |
58 | |
59 for (i=0; i<m; ++i) { | |
60 i_n = i*n; | |
61 for (j=0; j<n; ++j) { | |
62 Ax[j] += A[i_n+j] * x[i]; | |
63 } | |
64 } | |
65 | |
66 for (j=0; j<n; ++j) { | |
67 y[j] = alpha*Ax[j]; | |
68 } | |
69 | |
70 mxFree(Ax); | |
71 } | |
72 | |
73 | |
74 /* compute y := alpha*A'*x */ | |
75 | |
76 void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m) | |
77 { | |
78 mwIndex i, j, n_i; | |
79 double sum0, sum1, sum2, sum3; | |
80 | |
81 for (j=0; j<m; ++j) { | |
82 y[j] = 0; | |
83 } | |
84 | |
85 /* use loop unrolling to accelerate computation */ | |
86 | |
87 for (i=0; i<m; ++i) { | |
88 n_i = n*i; | |
89 sum0 = sum1 = sum2 = sum3 = 0; | |
90 for (j=0; j+4<n; j+=4) { | |
91 sum0 += A[n_i+j]*x[j]; | |
92 sum1 += A[n_i+j+1]*x[j+1]; | |
93 sum2 += A[n_i+j+2]*x[j+2]; | |
94 sum3 += A[n_i+j+3]*x[j+3]; | |
95 } | |
96 y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3)); | |
97 while (j<n) { | |
98 y[i] += alpha*A[n_i+j]*x[j]; | |
99 j++; | |
100 } | |
101 } | |
102 } | |
103 | |
104 | |
105 /* compute y := alpha*A*x */ | |
106 | |
107 void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m) | |
108 { | |
109 | |
110 mwIndex i, j, j1, j2; | |
111 | |
112 for (i=0; i<n; ++i) { | |
113 y[i] = 0; | |
114 } | |
115 | |
116 j2 = jc[0]; | |
117 for (i=0; i<m; ++i) { | |
118 j1 = j2; j2 = jc[i+1]; | |
119 for (j=j1; j<j2; ++j) { | |
120 y[ir[j]] += alpha * pr[j] * x[i]; | |
121 } | |
122 } | |
123 | |
124 } | |
125 | |
126 | |
127 /* compute y := alpha*A'*x */ | |
128 | |
129 void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m) | |
130 { | |
131 | |
132 mwIndex i, j, j1, j2; | |
133 | |
134 for (i=0; i<m; ++i) { | |
135 y[i] = 0; | |
136 } | |
137 | |
138 j2 = jc[0]; | |
139 for (i=0; i<m; ++i) { | |
140 j1 = j2; j2 = jc[i+1]; | |
141 for (j=j1; j<j2; ++j) { | |
142 y[i] += alpha * pr[j] * x[ir[j]]; | |
143 } | |
144 } | |
145 | |
146 } | |
147 | |
148 | |
149 /* compute y := alpha*A*x */ | |
150 | |
151 void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m) | |
152 { | |
153 | |
154 mwIndex i, j, j_n, k, kend; | |
155 | |
156 for (i=0; i<n; ++i) { | |
157 y[i] = 0; | |
158 } | |
159 | |
160 kend = jc[1]; | |
161 if (kend==0) { /* x is empty */ | |
162 return; | |
163 } | |
164 | |
165 for (k=0; k<kend; ++k) { | |
166 j = ir[k]; | |
167 j_n = j*n; | |
168 for (i=0; i<n; ++i) { | |
169 y[i] += alpha * A[i+j_n] * pr[k]; | |
170 } | |
171 } | |
172 | |
173 } | |
174 | |
175 | |
176 /* compute y := alpha*A'*x */ | |
177 | |
178 void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m) | |
179 { | |
180 | |
181 mwIndex i, j, j_n, k, kend; | |
182 | |
183 for (i=0; i<m; ++i) { | |
184 y[i] = 0; | |
185 } | |
186 | |
187 kend = jc[1]; | |
188 if (kend==0) { /* x is empty */ | |
189 return; | |
190 } | |
191 | |
192 for (j=0; j<m; ++j) { | |
193 j_n = j*n; | |
194 for (k=0; k<kend; ++k) { | |
195 i = ir[k]; | |
196 y[j] += alpha * A[i+j_n] * pr[k]; | |
197 } | |
198 } | |
199 | |
200 } | |
201 | |
202 | |
203 /* compute y := alpha*A*x */ | |
204 | |
205 void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m) | |
206 { | |
207 | |
208 mwIndex i, j, k, kend, j1, j2; | |
209 | |
210 for (i=0; i<n; ++i) { | |
211 y[i] = 0; | |
212 } | |
213 | |
214 kend = jcx[1]; | |
215 if (kend==0) { /* x is empty */ | |
216 return; | |
217 } | |
218 | |
219 for (k=0; k<kend; ++k) { | |
220 i = irx[k]; | |
221 j1 = jc[i]; j2 = jc[i+1]; | |
222 for (j=j1; j<j2; ++j) { | |
223 y[ir[j]] += alpha * pr[j] * prx[k]; | |
224 } | |
225 } | |
226 | |
227 } | |
228 | |
229 | |
230 /* compute y := alpha*A'*x */ | |
231 | |
232 void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m) | |
233 { | |
234 | |
235 mwIndex i, j, k, jend, kend, jadd, kadd, delta; | |
236 | |
237 for (i=0; i<m; ++i) { | |
238 y[i] = 0; | |
239 } | |
240 | |
241 kend = jcx[1]; | |
242 if (kend==0) { /* x is empty */ | |
243 return; | |
244 } | |
245 | |
246 for (i=0; i<m; ++i) { | |
247 j = jc[i]; | |
248 jend = jc[i+1]; | |
249 k = 0; | |
250 while (j<jend && k<kend) { | |
251 | |
252 delta = ir[j] - irx[k]; | |
253 | |
254 if (delta) { /* if indices differ - increment the smaller one */ | |
255 jadd = delta<0; | |
256 kadd = 1-jadd; | |
257 j += jadd; | |
258 k += kadd; | |
259 } | |
260 | |
261 else { /* indices are equal - add to result and increment both */ | |
262 y[i] += alpha * pr[j] * prx[k]; | |
263 j++; k++; | |
264 } | |
265 } | |
266 } | |
267 | |
268 } | |
269 | |
270 | |
271 /* matrix-matrix multiplication */ | |
272 | |
273 void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) | |
274 { | |
275 mwIndex i1, i2, i3, iX, iA, i2_n; | |
276 double b; | |
277 | |
278 for (i1=0; i1<n*k; i1++) { | |
279 X[i1] = 0; | |
280 } | |
281 | |
282 for (i2=0; i2<m; ++i2) { | |
283 i2_n = i2*n; | |
284 iX = 0; | |
285 for (i3=0; i3<k; ++i3) { | |
286 iA = i2_n; | |
287 b = B[i2+i3*m]; | |
288 for (i1=0; i1<n; ++i1) { | |
289 X[iX++] += A[iA++]*b; | |
290 } | |
291 } | |
292 } | |
293 | |
294 for (i1=0; i1<n*k; i1++) { | |
295 X[i1] *= alpha; | |
296 } | |
297 } | |
298 | |
299 | |
300 /* matrix-transpose-matrix multiplication */ | |
301 | |
302 void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) | |
303 { | |
304 mwIndex i1, i2, i3, iX, iA, i2_n; | |
305 double *x, sum0, sum1, sum2, sum3; | |
306 | |
307 for (i2=0; i2<m; ++i2) { | |
308 for (i3=0; i3<k; ++i3) { | |
309 sum0 = sum1 = sum2 = sum3 = 0; | |
310 for (i1=0; i1+4<n; i1+=4) { | |
311 sum0 += A[i1+0+i2*n]*B[i1+0+i3*n]; | |
312 sum1 += A[i1+1+i2*n]*B[i1+1+i3*n]; | |
313 sum2 += A[i1+2+i2*n]*B[i1+2+i3*n]; | |
314 sum3 += A[i1+3+i2*n]*B[i1+3+i3*n]; | |
315 } | |
316 X[i2+i3*m] = (sum0+sum1) + (sum2+sum3); | |
317 while(i1<n) { | |
318 X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n]; | |
319 i1++; | |
320 } | |
321 } | |
322 } | |
323 | |
324 for (i1=0; i1<m*k; i1++) { | |
325 X[i1] *= alpha; | |
326 } | |
327 } | |
328 | |
329 | |
330 /* tensor-matrix product */ | |
331 | |
332 void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l) | |
333 { | |
334 mwIndex i1, i2, i3, i4, i2_n, nml; | |
335 double b; | |
336 | |
337 nml = n*m*l; | |
338 for (i1=0; i1<nml; ++i1) { | |
339 X[i1] = 0; | |
340 } | |
341 | |
342 for (i2=0; i2<m; ++i2) { | |
343 i2_n = i2*n; | |
344 for (i3=0; i3<k; ++i3) { | |
345 for (i4=0; i4<l; ++i4) { | |
346 b = B[i4+i3*l]; | |
347 for (i1=0; i1<n; ++i1) { | |
348 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b; | |
349 } | |
350 } | |
351 } | |
352 } | |
353 | |
354 for (i1=0; i1<nml; ++i1) { | |
355 X[i1] *= alpha; | |
356 } | |
357 } | |
358 | |
359 | |
360 /* tensor-matrix-transpose product */ | |
361 | |
362 void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l) | |
363 { | |
364 mwIndex i1, i2, i3, i4, i2_n, nml; | |
365 double b; | |
366 | |
367 nml = n*m*l; | |
368 for (i1=0; i1<nml; ++i1) { | |
369 X[i1] = 0; | |
370 } | |
371 | |
372 for (i2=0; i2<m; ++i2) { | |
373 i2_n = i2*n; | |
374 for (i4=0; i4<l; ++i4) { | |
375 for (i3=0; i3<k; ++i3) { | |
376 b = B[i3+i4*k]; | |
377 for (i1=0; i1<n; ++i1) { | |
378 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b; | |
379 } | |
380 } | |
381 } | |
382 } | |
383 | |
384 for (i1=0; i1<nml; ++i1) { | |
385 X[i1] *= alpha; | |
386 } | |
387 } | |
388 | |
389 | |
390 /* dot product */ | |
391 | |
392 double dotprod(double a[], double b[], mwSize n) | |
393 { | |
394 double sum = 0; | |
395 mwIndex i; | |
396 for (i=0; i<n; ++i) | |
397 sum += a[i]*b[i]; | |
398 return sum; | |
399 } | |
400 | |
401 | |
402 /* find maximum of vector */ | |
403 | |
404 mwIndex maxpos(double c[], mwSize m) | |
405 { | |
406 mwIndex maxid=0, k; | |
407 double val, maxval = *c; | |
408 | |
409 for (k=1; k<m; ++k) { | |
410 val = c[k]; | |
411 if (val > maxval) { | |
412 maxval = val; | |
413 maxid = k; | |
414 } | |
415 } | |
416 return maxid; | |
417 } | |
418 | |
419 | |
420 /* solve L*x = b */ | |
421 | |
422 void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k) | |
423 { | |
424 mwIndex i, j; | |
425 double rhs; | |
426 | |
427 for (i=0; i<k; ++i) { | |
428 rhs = b[i]; | |
429 for (j=0; j<i; ++j) { | |
430 rhs -= L[j*n+i]*x[j]; | |
431 } | |
432 x[i] = rhs/L[i*n+i]; | |
433 } | |
434 } | |
435 | |
436 | |
437 /* solve L'*x = b */ | |
438 | |
439 void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k) | |
440 { | |
441 mwIndex i, j; | |
442 double rhs; | |
443 | |
444 for (i=k; i>=1; --i) { | |
445 rhs = b[i-1]; | |
446 for (j=i; j<k; ++j) { | |
447 rhs -= L[(i-1)*n+j]*x[j]; | |
448 } | |
449 x[i-1] = rhs/L[(i-1)*n+i-1]; | |
450 } | |
451 } | |
452 | |
453 | |
454 /* solve U*x = b */ | |
455 | |
456 void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k) | |
457 { | |
458 mwIndex i, j; | |
459 double rhs; | |
460 | |
461 for (i=k; i>=1; --i) { | |
462 rhs = b[i-1]; | |
463 for (j=i; j<k; ++j) { | |
464 rhs -= U[j*n+i-1]*x[j]; | |
465 } | |
466 x[i-1] = rhs/U[(i-1)*n+i-1]; | |
467 } | |
468 } | |
469 | |
470 | |
471 /* solve U'*x = b */ | |
472 | |
473 void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k) | |
474 { | |
475 mwIndex i, j; | |
476 double rhs; | |
477 | |
478 for (i=0; i<k; ++i) { | |
479 rhs = b[i]; | |
480 for (j=0; j<i; ++j) { | |
481 rhs -= U[i*n+j]*x[j]; | |
482 } | |
483 x[i] = rhs/U[i*n+i]; | |
484 } | |
485 } | |
486 | |
487 | |
488 /* back substitution solver */ | |
489 | |
490 void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k) | |
491 { | |
492 if (tolower(ul) == 'u') { | |
493 backsubst_U(A, b, x, n, k); | |
494 } | |
495 else if (tolower(ul) == 'l') { | |
496 backsubst_L(A, b, x, n, k); | |
497 } | |
498 else { | |
499 mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''"); | |
500 } | |
501 } | |
502 | |
503 | |
504 /* solve equation set using cholesky decomposition */ | |
505 | |
506 void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k) | |
507 { | |
508 double *tmp; | |
509 | |
510 tmp = mxMalloc(k*sizeof(double)); | |
511 | |
512 if (tolower(ul) == 'l') { | |
513 backsubst_L(A, b, tmp, n, k); | |
514 backsubst_Lt(A, tmp, x, n, k); | |
515 } | |
516 else if (tolower(ul) == 'u') { | |
517 backsubst_Ut(A, b, tmp, n, k); | |
518 backsubst_U(A, tmp, x, n, k); | |
519 } | |
520 else { | |
521 mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''"); | |
522 } | |
523 | |
524 mxFree(tmp); | |
525 } | |
526 | |
527 | |
528 /* perform a permutation assignment y := x(ind(1:k)) */ | |
529 | |
530 void vec_assign(double y[], double x[], mwIndex ind[], mwSize k) | |
531 { | |
532 mwIndex i; | |
533 | |
534 for (i=0; i<k; ++i) | |
535 y[i] = x[ind[i]]; | |
536 } | |
537 | |
538 | |
539 /* matrix transpose */ | |
540 | |
541 void transpose(double X[], double Y[], mwSize n, mwSize m) | |
542 { | |
543 mwIndex i, j, i_m, j_n; | |
544 | |
545 if (n<m) { | |
546 for (j=0; j<m; ++j) { | |
547 j_n = j*n; | |
548 for (i=0; i<n; ++i) { | |
549 Y[j+i*m] = X[i+j_n]; | |
550 } | |
551 } | |
552 } | |
553 else { | |
554 for (i=0; i<n; ++i) { | |
555 i_m = i*m; | |
556 for (j=0; j<m; ++j) { | |
557 Y[j+i_m] = X[i+j*n]; | |
558 } | |
559 } | |
560 } | |
561 } | |
562 | |
563 | |
564 /* print contents of matrix */ | |
565 | |
566 void printmat(double A[], int n, int m, char* matname) | |
567 { | |
568 int i, j; | |
569 mexPrintf("\n%s = \n\n", matname); | |
570 | |
571 if (n*m==0) { | |
572 mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m); | |
573 return; | |
574 } | |
575 | |
576 for (i=0; i<n; ++i) { | |
577 for (j=0; j<m; ++j) | |
578 mexPrintf(" %lf", A[j*n+i]); | |
579 mexPrintf("\n"); | |
580 } | |
581 mexPrintf("\n"); | |
582 } | |
583 | |
584 | |
585 /* print contents of sparse matrix */ | |
586 | |
587 void printspmat(mxArray *a, char* matname) | |
588 { | |
589 mwIndex *aJc = mxGetJc(a); | |
590 mwIndex *aIr = mxGetIr(a); | |
591 double *aPr = mxGetPr(a); | |
592 | |
593 int i; | |
594 | |
595 mexPrintf("\n%s = \n\n", matname); | |
596 | |
597 for (i=0; i<aJc[1]; ++i) | |
598 printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]); | |
599 | |
600 mexPrintf("\n"); | |
601 } | |
602 | |
603 | |
604 | |
605 /* matrix multiplication using Winograd's algorithm */ | |
606 | |
607 /* | |
608 void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) | |
609 { | |
610 | |
611 mwIndex i1, i2, i3, iX, iA, i2_n; | |
612 double b, *AA, *BB; | |
613 | |
614 AA = mxCalloc(n,sizeof(double)); | |
615 BB = mxCalloc(k,sizeof(double)); | |
616 | |
617 for (i1=0; i1<n*k; i1++) { | |
618 X[i1] = 0; | |
619 } | |
620 | |
621 for (i1=0; i1<n; ++i1) { | |
622 for (i2=0; i2<m/2; ++i2) { | |
623 AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n]; | |
624 } | |
625 } | |
626 | |
627 for (i2=0; i2<k; ++i2) { | |
628 for (i1=0; i1<m/2; ++i1) { | |
629 BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m]; | |
630 } | |
631 } | |
632 | |
633 for (i2=0; i2<k; ++i2) { | |
634 for (i3=0; i3<m/2; ++i3) { | |
635 for (i1=0; i1<n; ++i1) { | |
636 X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]); | |
637 } | |
638 } | |
639 } | |
640 | |
641 if (m%2) { | |
642 for (i2=0; i2<k; ++i2) { | |
643 for (i1=0; i1<n; ++i1) { | |
644 X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m]; | |
645 } | |
646 } | |
647 } | |
648 | |
649 for (i2=0; i2<k; ++i2) { | |
650 for (i1=0; i1<n; ++i1) { | |
651 X[i1+i2*n] -= (AA[i1] + BB[i2]); | |
652 X[i1+i2*n] *= alpha; | |
653 } | |
654 } | |
655 | |
656 mxFree(AA); | |
657 mxFree(BB); | |
658 } | |
659 */ | |
660 | |
661 | |
662 | |
663 |