ivan@140
|
1 /**************************************************************************
|
ivan@140
|
2 *
|
ivan@140
|
3 * File name: myblas.c
|
ivan@140
|
4 *
|
ivan@140
|
5 * Ron Rubinstein
|
ivan@140
|
6 * Computer Science Department
|
ivan@140
|
7 * Technion, Haifa 32000 Israel
|
ivan@140
|
8 * ronrubin@cs
|
ivan@140
|
9 *
|
ivan@140
|
10 * Version: 1.1
|
ivan@140
|
11 * Last updated: 13.8.2009
|
ivan@140
|
12 *
|
ivan@140
|
13 *************************************************************************/
|
ivan@140
|
14
|
ivan@140
|
15
|
ivan@140
|
16 #include "myblas.h"
|
ivan@140
|
17 #include <ctype.h>
|
ivan@140
|
18
|
ivan@140
|
19
|
ivan@140
|
20 /* find maximum of absolute values */
|
ivan@140
|
21
|
ivan@140
|
22 mwIndex maxabs(double c[], mwSize m)
|
ivan@140
|
23 {
|
ivan@140
|
24 mwIndex maxid=0, k;
|
ivan@140
|
25 double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */
|
ivan@140
|
26
|
ivan@140
|
27 for (k=1; k<m; ++k) {
|
ivan@140
|
28 absval = SQR(c[k]);
|
ivan@140
|
29 if (absval > maxval) {
|
ivan@140
|
30 maxval = absval;
|
ivan@140
|
31 maxid = k;
|
ivan@140
|
32 }
|
ivan@140
|
33 }
|
ivan@140
|
34 return maxid;
|
ivan@140
|
35 }
|
ivan@140
|
36
|
ivan@140
|
37
|
ivan@140
|
38 /* compute y := alpha*x + y */
|
ivan@140
|
39
|
ivan@140
|
40 void vec_sum(double alpha, double x[], double y[], mwSize n)
|
ivan@140
|
41 {
|
ivan@140
|
42 mwIndex i;
|
ivan@140
|
43
|
ivan@140
|
44 for (i=0; i<n; ++i) {
|
ivan@140
|
45 y[i] += alpha*x[i];
|
ivan@140
|
46 }
|
ivan@140
|
47 }
|
ivan@140
|
48
|
ivan@140
|
49 /* compute y := alpha*x .* y */
|
ivan@140
|
50
|
ivan@140
|
51 void vec_smult(double alpha, double x[], double y[], mwSize n)
|
ivan@140
|
52 {
|
ivan@140
|
53 mwIndex i;
|
ivan@140
|
54
|
ivan@140
|
55 for (i=0; i<n; ++i) {
|
ivan@140
|
56 y[i] *= alpha*x[i];
|
ivan@140
|
57 }
|
ivan@140
|
58 }
|
ivan@140
|
59
|
ivan@140
|
60 /* compute y := alpha*A*x */
|
ivan@140
|
61
|
ivan@140
|
62 void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
|
ivan@140
|
63 {
|
ivan@140
|
64 mwIndex i, j, i_n;
|
ivan@140
|
65 double *Ax;
|
ivan@140
|
66
|
ivan@140
|
67 Ax = mxCalloc(n,sizeof(double));
|
ivan@140
|
68
|
ivan@140
|
69 for (i=0; i<m; ++i) {
|
ivan@140
|
70 i_n = i*n;
|
ivan@140
|
71 for (j=0; j<n; ++j) {
|
ivan@140
|
72 Ax[j] += A[i_n+j] * x[i];
|
ivan@140
|
73 }
|
ivan@140
|
74 }
|
ivan@140
|
75
|
ivan@140
|
76 for (j=0; j<n; ++j) {
|
ivan@140
|
77 y[j] = alpha*Ax[j];
|
ivan@140
|
78 }
|
ivan@140
|
79
|
ivan@140
|
80 mxFree(Ax);
|
ivan@140
|
81 }
|
ivan@140
|
82
|
ivan@140
|
83
|
ivan@140
|
84 /* compute y := alpha*A'*x */
|
ivan@140
|
85
|
ivan@140
|
86 void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
|
ivan@140
|
87 {
|
ivan@140
|
88 mwIndex i, j, n_i;
|
ivan@140
|
89 double sum0, sum1, sum2, sum3;
|
ivan@140
|
90
|
ivan@140
|
91 for (j=0; j<m; ++j) {
|
ivan@140
|
92 y[j] = 0;
|
ivan@140
|
93 }
|
ivan@140
|
94
|
ivan@140
|
95 /* use loop unrolling to accelerate computation */
|
ivan@140
|
96
|
ivan@140
|
97 for (i=0; i<m; ++i) {
|
ivan@140
|
98 n_i = n*i;
|
ivan@140
|
99 sum0 = sum1 = sum2 = sum3 = 0;
|
ivan@140
|
100 for (j=0; j+4<n; j+=4) {
|
ivan@140
|
101 sum0 += A[n_i+j]*x[j];
|
ivan@140
|
102 sum1 += A[n_i+j+1]*x[j+1];
|
ivan@140
|
103 sum2 += A[n_i+j+2]*x[j+2];
|
ivan@140
|
104 sum3 += A[n_i+j+3]*x[j+3];
|
ivan@140
|
105 }
|
ivan@140
|
106 y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3));
|
ivan@140
|
107 while (j<n) {
|
ivan@140
|
108 y[i] += alpha*A[n_i+j]*x[j];
|
ivan@140
|
109 j++;
|
ivan@140
|
110 }
|
ivan@140
|
111 }
|
ivan@140
|
112 }
|
ivan@140
|
113
|
ivan@140
|
114
|
ivan@140
|
115 /* compute y := alpha*A*x */
|
ivan@140
|
116
|
ivan@140
|
117 void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
|
ivan@140
|
118 {
|
ivan@140
|
119
|
ivan@140
|
120 mwIndex i, j, j1, j2;
|
ivan@140
|
121
|
ivan@140
|
122 for (i=0; i<n; ++i) {
|
ivan@140
|
123 y[i] = 0;
|
ivan@140
|
124 }
|
ivan@140
|
125
|
ivan@140
|
126 j2 = jc[0];
|
ivan@140
|
127 for (i=0; i<m; ++i) {
|
ivan@140
|
128 j1 = j2; j2 = jc[i+1];
|
ivan@140
|
129 for (j=j1; j<j2; ++j) {
|
ivan@140
|
130 y[ir[j]] += alpha * pr[j] * x[i];
|
ivan@140
|
131 }
|
ivan@140
|
132 }
|
ivan@140
|
133
|
ivan@140
|
134 }
|
ivan@140
|
135
|
ivan@140
|
136
|
ivan@140
|
137 /* compute y := alpha*A'*x */
|
ivan@140
|
138
|
ivan@140
|
139 void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
|
ivan@140
|
140 {
|
ivan@140
|
141
|
ivan@140
|
142 mwIndex i, j, j1, j2;
|
ivan@140
|
143
|
ivan@140
|
144 for (i=0; i<m; ++i) {
|
ivan@140
|
145 y[i] = 0;
|
ivan@140
|
146 }
|
ivan@140
|
147
|
ivan@140
|
148 j2 = jc[0];
|
ivan@140
|
149 for (i=0; i<m; ++i) {
|
ivan@140
|
150 j1 = j2; j2 = jc[i+1];
|
ivan@140
|
151 for (j=j1; j<j2; ++j) {
|
ivan@140
|
152 y[i] += alpha * pr[j] * x[ir[j]];
|
ivan@140
|
153 }
|
ivan@140
|
154 }
|
ivan@140
|
155
|
ivan@140
|
156 }
|
ivan@140
|
157
|
ivan@140
|
158
|
ivan@140
|
159 /* compute y := alpha*A*x */
|
ivan@140
|
160
|
ivan@140
|
161 void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
|
ivan@140
|
162 {
|
ivan@140
|
163
|
ivan@140
|
164 mwIndex i, j, j_n, k, kend;
|
ivan@140
|
165
|
ivan@140
|
166 for (i=0; i<n; ++i) {
|
ivan@140
|
167 y[i] = 0;
|
ivan@140
|
168 }
|
ivan@140
|
169
|
ivan@140
|
170 kend = jc[1];
|
ivan@140
|
171 if (kend==0) { /* x is empty */
|
ivan@140
|
172 return;
|
ivan@140
|
173 }
|
ivan@140
|
174
|
ivan@140
|
175 for (k=0; k<kend; ++k) {
|
ivan@140
|
176 j = ir[k];
|
ivan@140
|
177 j_n = j*n;
|
ivan@140
|
178 for (i=0; i<n; ++i) {
|
ivan@140
|
179 y[i] += alpha * A[i+j_n] * pr[k];
|
ivan@140
|
180 }
|
ivan@140
|
181 }
|
ivan@140
|
182
|
ivan@140
|
183 }
|
ivan@140
|
184
|
ivan@140
|
185
|
ivan@140
|
186 /* compute y := alpha*A'*x */
|
ivan@140
|
187
|
ivan@140
|
188 void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
|
ivan@140
|
189 {
|
ivan@140
|
190
|
ivan@140
|
191 mwIndex i, j, j_n, k, kend;
|
ivan@140
|
192
|
ivan@140
|
193 for (i=0; i<m; ++i) {
|
ivan@140
|
194 y[i] = 0;
|
ivan@140
|
195 }
|
ivan@140
|
196
|
ivan@140
|
197 kend = jc[1];
|
ivan@140
|
198 if (kend==0) { /* x is empty */
|
ivan@140
|
199 return;
|
ivan@140
|
200 }
|
ivan@140
|
201
|
ivan@140
|
202 for (j=0; j<m; ++j) {
|
ivan@140
|
203 j_n = j*n;
|
ivan@140
|
204 for (k=0; k<kend; ++k) {
|
ivan@140
|
205 i = ir[k];
|
ivan@140
|
206 y[j] += alpha * A[i+j_n] * pr[k];
|
ivan@140
|
207 }
|
ivan@140
|
208 }
|
ivan@140
|
209
|
ivan@140
|
210 }
|
ivan@140
|
211
|
ivan@140
|
212
|
ivan@140
|
213 /* compute y := alpha*A*x */
|
ivan@140
|
214
|
ivan@140
|
215 void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
|
ivan@140
|
216 {
|
ivan@140
|
217
|
ivan@140
|
218 mwIndex i, j, k, kend, j1, j2;
|
ivan@140
|
219
|
ivan@140
|
220 for (i=0; i<n; ++i) {
|
ivan@140
|
221 y[i] = 0;
|
ivan@140
|
222 }
|
ivan@140
|
223
|
ivan@140
|
224 kend = jcx[1];
|
ivan@140
|
225 if (kend==0) { /* x is empty */
|
ivan@140
|
226 return;
|
ivan@140
|
227 }
|
ivan@140
|
228
|
ivan@140
|
229 for (k=0; k<kend; ++k) {
|
ivan@140
|
230 i = irx[k];
|
ivan@140
|
231 j1 = jc[i]; j2 = jc[i+1];
|
ivan@140
|
232 for (j=j1; j<j2; ++j) {
|
ivan@140
|
233 y[ir[j]] += alpha * pr[j] * prx[k];
|
ivan@140
|
234 }
|
ivan@140
|
235 }
|
ivan@140
|
236
|
ivan@140
|
237 }
|
ivan@140
|
238
|
ivan@140
|
239
|
ivan@140
|
240 /* compute y := alpha*A'*x */
|
ivan@140
|
241
|
ivan@140
|
242 void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
|
ivan@140
|
243 {
|
ivan@140
|
244
|
ivan@140
|
245 mwIndex i, j, k, jend, kend, jadd, kadd, delta;
|
ivan@140
|
246
|
ivan@140
|
247 for (i=0; i<m; ++i) {
|
ivan@140
|
248 y[i] = 0;
|
ivan@140
|
249 }
|
ivan@140
|
250
|
ivan@140
|
251 kend = jcx[1];
|
ivan@140
|
252 if (kend==0) { /* x is empty */
|
ivan@140
|
253 return;
|
ivan@140
|
254 }
|
ivan@140
|
255
|
ivan@140
|
256 for (i=0; i<m; ++i) {
|
ivan@140
|
257 j = jc[i];
|
ivan@140
|
258 jend = jc[i+1];
|
ivan@140
|
259 k = 0;
|
ivan@140
|
260 while (j<jend && k<kend) {
|
ivan@140
|
261
|
ivan@140
|
262 delta = ir[j] - irx[k];
|
ivan@140
|
263
|
ivan@140
|
264 if (delta) { /* if indices differ - increment the smaller one */
|
ivan@140
|
265 jadd = delta<0;
|
ivan@140
|
266 kadd = 1-jadd;
|
ivan@140
|
267 j += jadd;
|
ivan@140
|
268 k += kadd;
|
ivan@140
|
269 }
|
ivan@140
|
270
|
ivan@140
|
271 else { /* indices are equal - add to result and increment both */
|
ivan@140
|
272 y[i] += alpha * pr[j] * prx[k];
|
ivan@140
|
273 j++; k++;
|
ivan@140
|
274 }
|
ivan@140
|
275 }
|
ivan@140
|
276 }
|
ivan@140
|
277
|
ivan@140
|
278 }
|
ivan@140
|
279
|
ivan@140
|
280
|
ivan@140
|
281 /* matrix-matrix multiplication */
|
ivan@140
|
282
|
ivan@140
|
283 void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
|
ivan@140
|
284 {
|
ivan@140
|
285 mwIndex i1, i2, i3, iX, iA, i2_n;
|
ivan@140
|
286 double b;
|
ivan@140
|
287
|
ivan@140
|
288 for (i1=0; i1<n*k; i1++) {
|
ivan@140
|
289 X[i1] = 0;
|
ivan@140
|
290 }
|
ivan@140
|
291
|
ivan@140
|
292 for (i2=0; i2<m; ++i2) {
|
ivan@140
|
293 i2_n = i2*n;
|
ivan@140
|
294 iX = 0;
|
ivan@140
|
295 for (i3=0; i3<k; ++i3) {
|
ivan@140
|
296 iA = i2_n;
|
ivan@140
|
297 b = B[i2+i3*m];
|
ivan@140
|
298 for (i1=0; i1<n; ++i1) {
|
ivan@140
|
299 X[iX++] += A[iA++]*b;
|
ivan@140
|
300 }
|
ivan@140
|
301 }
|
ivan@140
|
302 }
|
ivan@140
|
303
|
ivan@140
|
304 for (i1=0; i1<n*k; i1++) {
|
ivan@140
|
305 X[i1] *= alpha;
|
ivan@140
|
306 }
|
ivan@140
|
307 }
|
ivan@140
|
308
|
ivan@140
|
309
|
ivan@140
|
310 /* matrix-transpose-matrix multiplication */
|
ivan@140
|
311
|
ivan@140
|
312 void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
|
ivan@140
|
313 {
|
ivan@140
|
314 mwIndex i1, i2, i3, iX, iA, i2_n;
|
ivan@140
|
315 double *x, sum0, sum1, sum2, sum3;
|
ivan@140
|
316
|
ivan@140
|
317 for (i2=0; i2<m; ++i2) {
|
ivan@140
|
318 for (i3=0; i3<k; ++i3) {
|
ivan@140
|
319 sum0 = sum1 = sum2 = sum3 = 0;
|
ivan@140
|
320 for (i1=0; i1+4<n; i1+=4) {
|
ivan@140
|
321 sum0 += A[i1+0+i2*n]*B[i1+0+i3*n];
|
ivan@140
|
322 sum1 += A[i1+1+i2*n]*B[i1+1+i3*n];
|
ivan@140
|
323 sum2 += A[i1+2+i2*n]*B[i1+2+i3*n];
|
ivan@140
|
324 sum3 += A[i1+3+i2*n]*B[i1+3+i3*n];
|
ivan@140
|
325 }
|
ivan@140
|
326 X[i2+i3*m] = (sum0+sum1) + (sum2+sum3);
|
ivan@140
|
327 while(i1<n) {
|
ivan@140
|
328 X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n];
|
ivan@140
|
329 i1++;
|
ivan@140
|
330 }
|
ivan@140
|
331 }
|
ivan@140
|
332 }
|
ivan@140
|
333
|
ivan@140
|
334 for (i1=0; i1<m*k; i1++) {
|
ivan@140
|
335 X[i1] *= alpha;
|
ivan@140
|
336 }
|
ivan@140
|
337 }
|
ivan@140
|
338
|
ivan@140
|
339
|
ivan@140
|
340 /* tensor-matrix product */
|
ivan@140
|
341
|
ivan@140
|
342 void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
|
ivan@140
|
343 {
|
ivan@140
|
344 mwIndex i1, i2, i3, i4, i2_n, nml;
|
ivan@140
|
345 double b;
|
ivan@140
|
346
|
ivan@140
|
347 nml = n*m*l;
|
ivan@140
|
348 for (i1=0; i1<nml; ++i1) {
|
ivan@140
|
349 X[i1] = 0;
|
ivan@140
|
350 }
|
ivan@140
|
351
|
ivan@140
|
352 for (i2=0; i2<m; ++i2) {
|
ivan@140
|
353 i2_n = i2*n;
|
ivan@140
|
354 for (i3=0; i3<k; ++i3) {
|
ivan@140
|
355 for (i4=0; i4<l; ++i4) {
|
ivan@140
|
356 b = B[i4+i3*l];
|
ivan@140
|
357 for (i1=0; i1<n; ++i1) {
|
ivan@140
|
358 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
|
ivan@140
|
359 }
|
ivan@140
|
360 }
|
ivan@140
|
361 }
|
ivan@140
|
362 }
|
ivan@140
|
363
|
ivan@140
|
364 for (i1=0; i1<nml; ++i1) {
|
ivan@140
|
365 X[i1] *= alpha;
|
ivan@140
|
366 }
|
ivan@140
|
367 }
|
ivan@140
|
368
|
ivan@140
|
369
|
ivan@140
|
370 /* tensor-matrix-transpose product */
|
ivan@140
|
371
|
ivan@140
|
372 void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
|
ivan@140
|
373 {
|
ivan@140
|
374 mwIndex i1, i2, i3, i4, i2_n, nml;
|
ivan@140
|
375 double b;
|
ivan@140
|
376
|
ivan@140
|
377 nml = n*m*l;
|
ivan@140
|
378 for (i1=0; i1<nml; ++i1) {
|
ivan@140
|
379 X[i1] = 0;
|
ivan@140
|
380 }
|
ivan@140
|
381
|
ivan@140
|
382 for (i2=0; i2<m; ++i2) {
|
ivan@140
|
383 i2_n = i2*n;
|
ivan@140
|
384 for (i4=0; i4<l; ++i4) {
|
ivan@140
|
385 for (i3=0; i3<k; ++i3) {
|
ivan@140
|
386 b = B[i3+i4*k];
|
ivan@140
|
387 for (i1=0; i1<n; ++i1) {
|
ivan@140
|
388 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
|
ivan@140
|
389 }
|
ivan@140
|
390 }
|
ivan@140
|
391 }
|
ivan@140
|
392 }
|
ivan@140
|
393
|
ivan@140
|
394 for (i1=0; i1<nml; ++i1) {
|
ivan@140
|
395 X[i1] *= alpha;
|
ivan@140
|
396 }
|
ivan@140
|
397 }
|
ivan@140
|
398
|
ivan@140
|
399
|
ivan@140
|
400 /* dot product */
|
ivan@140
|
401
|
ivan@140
|
402 double dotprod(double a[], double b[], mwSize n)
|
ivan@140
|
403 {
|
ivan@140
|
404 double sum = 0;
|
ivan@140
|
405 mwIndex i;
|
ivan@140
|
406 for (i=0; i<n; ++i)
|
ivan@140
|
407 sum += a[i]*b[i];
|
ivan@140
|
408 return sum;
|
ivan@140
|
409 }
|
ivan@140
|
410
|
ivan@140
|
411
|
ivan@140
|
412 /* find maximum of vector */
|
ivan@140
|
413
|
ivan@140
|
414 mwIndex maxpos(double c[], mwSize m)
|
ivan@140
|
415 {
|
ivan@140
|
416 mwIndex maxid=0, k;
|
ivan@140
|
417 double val, maxval = *c;
|
ivan@140
|
418
|
ivan@140
|
419 for (k=1; k<m; ++k) {
|
ivan@140
|
420 val = c[k];
|
ivan@140
|
421 if (val > maxval) {
|
ivan@140
|
422 maxval = val;
|
ivan@140
|
423 maxid = k;
|
ivan@140
|
424 }
|
ivan@140
|
425 }
|
ivan@140
|
426 return maxid;
|
ivan@140
|
427 }
|
ivan@140
|
428
|
ivan@140
|
429
|
ivan@140
|
430 /* solve L*x = b */
|
ivan@140
|
431
|
ivan@140
|
432 void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k)
|
ivan@140
|
433 {
|
ivan@140
|
434 mwIndex i, j;
|
ivan@140
|
435 double rhs;
|
ivan@140
|
436
|
ivan@140
|
437 for (i=0; i<k; ++i) {
|
ivan@140
|
438 rhs = b[i];
|
ivan@140
|
439 for (j=0; j<i; ++j) {
|
ivan@140
|
440 rhs -= L[j*n+i]*x[j];
|
ivan@140
|
441 }
|
ivan@140
|
442 x[i] = rhs/L[i*n+i];
|
ivan@140
|
443 }
|
ivan@140
|
444 }
|
ivan@140
|
445
|
ivan@140
|
446
|
ivan@140
|
447 /* solve L'*x = b */
|
ivan@140
|
448
|
ivan@140
|
449 void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k)
|
ivan@140
|
450 {
|
ivan@140
|
451 mwIndex i, j;
|
ivan@140
|
452 double rhs;
|
ivan@140
|
453
|
ivan@140
|
454 for (i=k; i>=1; --i) {
|
ivan@140
|
455 rhs = b[i-1];
|
ivan@140
|
456 for (j=i; j<k; ++j) {
|
ivan@140
|
457 rhs -= L[(i-1)*n+j]*x[j];
|
ivan@140
|
458 }
|
ivan@140
|
459 x[i-1] = rhs/L[(i-1)*n+i-1];
|
ivan@140
|
460 }
|
ivan@140
|
461 }
|
ivan@140
|
462
|
ivan@140
|
463
|
ivan@140
|
464 /* solve U*x = b */
|
ivan@140
|
465
|
ivan@140
|
466 void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k)
|
ivan@140
|
467 {
|
ivan@140
|
468 mwIndex i, j;
|
ivan@140
|
469 double rhs;
|
ivan@140
|
470
|
ivan@140
|
471 for (i=k; i>=1; --i) {
|
ivan@140
|
472 rhs = b[i-1];
|
ivan@140
|
473 for (j=i; j<k; ++j) {
|
ivan@140
|
474 rhs -= U[j*n+i-1]*x[j];
|
ivan@140
|
475 }
|
ivan@140
|
476 x[i-1] = rhs/U[(i-1)*n+i-1];
|
ivan@140
|
477 }
|
ivan@140
|
478 }
|
ivan@140
|
479
|
ivan@140
|
480
|
ivan@140
|
481 /* solve U'*x = b */
|
ivan@140
|
482
|
ivan@140
|
483 void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k)
|
ivan@140
|
484 {
|
ivan@140
|
485 mwIndex i, j;
|
ivan@140
|
486 double rhs;
|
ivan@140
|
487
|
ivan@140
|
488 for (i=0; i<k; ++i) {
|
ivan@140
|
489 rhs = b[i];
|
ivan@140
|
490 for (j=0; j<i; ++j) {
|
ivan@140
|
491 rhs -= U[i*n+j]*x[j];
|
ivan@140
|
492 }
|
ivan@140
|
493 x[i] = rhs/U[i*n+i];
|
ivan@140
|
494 }
|
ivan@140
|
495 }
|
ivan@140
|
496
|
ivan@140
|
497
|
ivan@140
|
498 /* back substitution solver */
|
ivan@140
|
499
|
ivan@140
|
500 void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
|
ivan@140
|
501 {
|
ivan@140
|
502 if (tolower(ul) == 'u') {
|
ivan@140
|
503 backsubst_U(A, b, x, n, k);
|
ivan@140
|
504 }
|
ivan@140
|
505 else if (tolower(ul) == 'l') {
|
ivan@140
|
506 backsubst_L(A, b, x, n, k);
|
ivan@140
|
507 }
|
ivan@140
|
508 else {
|
ivan@140
|
509 mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''");
|
ivan@140
|
510 }
|
ivan@140
|
511 }
|
ivan@140
|
512
|
ivan@140
|
513
|
ivan@140
|
514 /* solve equation set using cholesky decomposition */
|
ivan@140
|
515
|
ivan@140
|
516 void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
|
ivan@140
|
517 {
|
ivan@140
|
518 double *tmp;
|
ivan@140
|
519
|
ivan@140
|
520 tmp = mxMalloc(k*sizeof(double));
|
ivan@140
|
521
|
ivan@140
|
522 if (tolower(ul) == 'l') {
|
ivan@140
|
523 backsubst_L(A, b, tmp, n, k);
|
ivan@140
|
524 backsubst_Lt(A, tmp, x, n, k);
|
ivan@140
|
525 }
|
ivan@140
|
526 else if (tolower(ul) == 'u') {
|
ivan@140
|
527 backsubst_Ut(A, b, tmp, n, k);
|
ivan@140
|
528 backsubst_U(A, tmp, x, n, k);
|
ivan@140
|
529 }
|
ivan@140
|
530 else {
|
ivan@140
|
531 mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''");
|
ivan@140
|
532 }
|
ivan@140
|
533
|
ivan@140
|
534 mxFree(tmp);
|
ivan@140
|
535 }
|
ivan@140
|
536
|
ivan@140
|
537
|
ivan@140
|
538 /* perform a permutation assignment y := x(ind(1:k)) */
|
ivan@140
|
539
|
ivan@140
|
540 void vec_assign(double y[], double x[], mwIndex ind[], mwSize k)
|
ivan@140
|
541 {
|
ivan@140
|
542 mwIndex i;
|
ivan@140
|
543
|
ivan@140
|
544 for (i=0; i<k; ++i)
|
ivan@140
|
545 y[i] = x[ind[i]];
|
ivan@140
|
546 }
|
ivan@140
|
547
|
ivan@140
|
548
|
ivan@140
|
549 /* matrix transpose */
|
ivan@140
|
550
|
ivan@140
|
551 void transpose(double X[], double Y[], mwSize n, mwSize m)
|
ivan@140
|
552 {
|
ivan@140
|
553 mwIndex i, j, i_m, j_n;
|
ivan@140
|
554
|
ivan@140
|
555 if (n<m) {
|
ivan@140
|
556 for (j=0; j<m; ++j) {
|
ivan@140
|
557 j_n = j*n;
|
ivan@140
|
558 for (i=0; i<n; ++i) {
|
ivan@140
|
559 Y[j+i*m] = X[i+j_n];
|
ivan@140
|
560 }
|
ivan@140
|
561 }
|
ivan@140
|
562 }
|
ivan@140
|
563 else {
|
ivan@140
|
564 for (i=0; i<n; ++i) {
|
ivan@140
|
565 i_m = i*m;
|
ivan@140
|
566 for (j=0; j<m; ++j) {
|
ivan@140
|
567 Y[j+i_m] = X[i+j*n];
|
ivan@140
|
568 }
|
ivan@140
|
569 }
|
ivan@140
|
570 }
|
ivan@140
|
571 }
|
ivan@140
|
572
|
ivan@140
|
573
|
ivan@140
|
574 /* print contents of matrix */
|
ivan@140
|
575
|
ivan@140
|
576 void printmat(double A[], int n, int m, char* matname)
|
ivan@140
|
577 {
|
ivan@140
|
578 int i, j;
|
ivan@140
|
579 mexPrintf("\n%s = \n\n", matname);
|
ivan@140
|
580
|
ivan@140
|
581 if (n*m==0) {
|
ivan@140
|
582 mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m);
|
ivan@140
|
583 return;
|
ivan@140
|
584 }
|
ivan@140
|
585
|
ivan@140
|
586 for (i=0; i<n; ++i) {
|
ivan@140
|
587 for (j=0; j<m; ++j)
|
ivan@140
|
588 mexPrintf(" %lf", A[j*n+i]);
|
ivan@140
|
589 mexPrintf("\n");
|
ivan@140
|
590 }
|
ivan@140
|
591 mexPrintf("\n");
|
ivan@140
|
592 }
|
ivan@140
|
593
|
ivan@140
|
594
|
ivan@140
|
595 /* print contents of sparse matrix */
|
ivan@140
|
596
|
ivan@140
|
597 void printspmat(mxArray *a, char* matname)
|
ivan@140
|
598 {
|
ivan@140
|
599 mwIndex *aJc = mxGetJc(a);
|
ivan@140
|
600 mwIndex *aIr = mxGetIr(a);
|
ivan@140
|
601 double *aPr = mxGetPr(a);
|
ivan@140
|
602
|
ivan@140
|
603 int i;
|
ivan@140
|
604
|
ivan@140
|
605 mexPrintf("\n%s = \n\n", matname);
|
ivan@140
|
606
|
ivan@140
|
607 for (i=0; i<aJc[1]; ++i)
|
ivan@140
|
608 printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]);
|
ivan@140
|
609
|
ivan@140
|
610 mexPrintf("\n");
|
ivan@140
|
611 }
|
ivan@140
|
612
|
ivan@140
|
613
|
ivan@140
|
614
|
ivan@140
|
615 /* matrix multiplication using Winograd's algorithm */
|
ivan@140
|
616
|
ivan@140
|
617 /*
|
ivan@140
|
618 void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
|
ivan@140
|
619 {
|
ivan@140
|
620
|
ivan@140
|
621 mwIndex i1, i2, i3, iX, iA, i2_n;
|
ivan@140
|
622 double b, *AA, *BB;
|
ivan@140
|
623
|
ivan@140
|
624 AA = mxCalloc(n,sizeof(double));
|
ivan@140
|
625 BB = mxCalloc(k,sizeof(double));
|
ivan@140
|
626
|
ivan@140
|
627 for (i1=0; i1<n*k; i1++) {
|
ivan@140
|
628 X[i1] = 0;
|
ivan@140
|
629 }
|
ivan@140
|
630
|
ivan@140
|
631 for (i1=0; i1<n; ++i1) {
|
ivan@140
|
632 for (i2=0; i2<m/2; ++i2) {
|
ivan@140
|
633 AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n];
|
ivan@140
|
634 }
|
ivan@140
|
635 }
|
ivan@140
|
636
|
ivan@140
|
637 for (i2=0; i2<k; ++i2) {
|
ivan@140
|
638 for (i1=0; i1<m/2; ++i1) {
|
ivan@140
|
639 BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m];
|
ivan@140
|
640 }
|
ivan@140
|
641 }
|
ivan@140
|
642
|
ivan@140
|
643 for (i2=0; i2<k; ++i2) {
|
ivan@140
|
644 for (i3=0; i3<m/2; ++i3) {
|
ivan@140
|
645 for (i1=0; i1<n; ++i1) {
|
ivan@140
|
646 X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]);
|
ivan@140
|
647 }
|
ivan@140
|
648 }
|
ivan@140
|
649 }
|
ivan@140
|
650
|
ivan@140
|
651 if (m%2) {
|
ivan@140
|
652 for (i2=0; i2<k; ++i2) {
|
ivan@140
|
653 for (i1=0; i1<n; ++i1) {
|
ivan@140
|
654 X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m];
|
ivan@140
|
655 }
|
ivan@140
|
656 }
|
ivan@140
|
657 }
|
ivan@140
|
658
|
ivan@140
|
659 for (i2=0; i2<k; ++i2) {
|
ivan@140
|
660 for (i1=0; i1<n; ++i1) {
|
ivan@140
|
661 X[i1+i2*n] -= (AA[i1] + BB[i2]);
|
ivan@140
|
662 X[i1+i2*n] *= alpha;
|
ivan@140
|
663 }
|
ivan@140
|
664 }
|
ivan@140
|
665
|
ivan@140
|
666 mxFree(AA);
|
ivan@140
|
667 mxFree(BB);
|
ivan@140
|
668 }
|
ivan@140
|
669 */
|
ivan@140
|
670
|
ivan@140
|
671
|
ivan@140
|
672
|
ivan@140
|
673
|