idamnjanovic@51
|
1 /**************************************************************************
|
idamnjanovic@51
|
2 *
|
idamnjanovic@51
|
3 * File name: myblas.c
|
idamnjanovic@51
|
4 *
|
idamnjanovic@51
|
5 * Ron Rubinstein
|
idamnjanovic@51
|
6 * Computer Science Department
|
idamnjanovic@51
|
7 * Technion, Haifa 32000 Israel
|
idamnjanovic@51
|
8 * ronrubin@cs
|
idamnjanovic@51
|
9 *
|
idamnjanovic@51
|
10 * Version: 1.1
|
idamnjanovic@51
|
11 * Last updated: 13.8.2009
|
idamnjanovic@51
|
12 *
|
idamnjanovic@51
|
13 *************************************************************************/
|
idamnjanovic@51
|
14
|
idamnjanovic@51
|
15
|
idamnjanovic@51
|
16 #include "myblas.h"
|
idamnjanovic@51
|
17 #include <ctype.h>
|
idamnjanovic@51
|
18
|
idamnjanovic@51
|
19
|
idamnjanovic@51
|
20 /* find maximum of absolute values */
|
idamnjanovic@51
|
21
|
idamnjanovic@51
|
22 mwIndex maxabs(double c[], mwSize m)
|
idamnjanovic@51
|
23 {
|
idamnjanovic@51
|
24 mwIndex maxid=0, k;
|
idamnjanovic@51
|
25 double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */
|
idamnjanovic@51
|
26
|
idamnjanovic@51
|
27 for (k=1; k<m; ++k) {
|
idamnjanovic@51
|
28 absval = SQR(c[k]);
|
idamnjanovic@51
|
29 if (absval > maxval) {
|
idamnjanovic@51
|
30 maxval = absval;
|
idamnjanovic@51
|
31 maxid = k;
|
idamnjanovic@51
|
32 }
|
idamnjanovic@51
|
33 }
|
idamnjanovic@51
|
34 return maxid;
|
idamnjanovic@51
|
35 }
|
idamnjanovic@51
|
36
|
idamnjanovic@51
|
37
|
idamnjanovic@51
|
38 /* compute y := alpha*x + y */
|
idamnjanovic@51
|
39
|
idamnjanovic@51
|
40 void vec_sum(double alpha, double x[], double y[], mwSize n)
|
idamnjanovic@51
|
41 {
|
idamnjanovic@51
|
42 mwIndex i;
|
idamnjanovic@51
|
43
|
idamnjanovic@51
|
44 for (i=0; i<n; ++i) {
|
idamnjanovic@51
|
45 y[i] += alpha*x[i];
|
idamnjanovic@51
|
46 }
|
idamnjanovic@51
|
47 }
|
idamnjanovic@51
|
48
|
idamnjanovic@51
|
49
|
idamnjanovic@51
|
50 /* compute y := alpha*A*x */
|
idamnjanovic@51
|
51
|
idamnjanovic@51
|
52 void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
53 {
|
idamnjanovic@51
|
54 mwIndex i, j, i_n;
|
idamnjanovic@51
|
55 double *Ax;
|
idamnjanovic@51
|
56
|
idamnjanovic@51
|
57 Ax = mxCalloc(n,sizeof(double));
|
idamnjanovic@51
|
58
|
idamnjanovic@51
|
59 for (i=0; i<m; ++i) {
|
idamnjanovic@51
|
60 i_n = i*n;
|
idamnjanovic@51
|
61 for (j=0; j<n; ++j) {
|
idamnjanovic@51
|
62 Ax[j] += A[i_n+j] * x[i];
|
idamnjanovic@51
|
63 }
|
idamnjanovic@51
|
64 }
|
idamnjanovic@51
|
65
|
idamnjanovic@51
|
66 for (j=0; j<n; ++j) {
|
idamnjanovic@51
|
67 y[j] = alpha*Ax[j];
|
idamnjanovic@51
|
68 }
|
idamnjanovic@51
|
69
|
idamnjanovic@51
|
70 mxFree(Ax);
|
idamnjanovic@51
|
71 }
|
idamnjanovic@51
|
72
|
idamnjanovic@51
|
73
|
idamnjanovic@51
|
74 /* compute y := alpha*A'*x */
|
idamnjanovic@51
|
75
|
idamnjanovic@51
|
76 void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
77 {
|
idamnjanovic@51
|
78 mwIndex i, j, n_i;
|
idamnjanovic@51
|
79 double sum0, sum1, sum2, sum3;
|
idamnjanovic@51
|
80
|
idamnjanovic@51
|
81 for (j=0; j<m; ++j) {
|
idamnjanovic@51
|
82 y[j] = 0;
|
idamnjanovic@51
|
83 }
|
idamnjanovic@51
|
84
|
idamnjanovic@51
|
85 /* use loop unrolling to accelerate computation */
|
idamnjanovic@51
|
86
|
idamnjanovic@51
|
87 for (i=0; i<m; ++i) {
|
idamnjanovic@51
|
88 n_i = n*i;
|
idamnjanovic@51
|
89 sum0 = sum1 = sum2 = sum3 = 0;
|
idamnjanovic@51
|
90 for (j=0; j+4<n; j+=4) {
|
idamnjanovic@51
|
91 sum0 += A[n_i+j]*x[j];
|
idamnjanovic@51
|
92 sum1 += A[n_i+j+1]*x[j+1];
|
idamnjanovic@51
|
93 sum2 += A[n_i+j+2]*x[j+2];
|
idamnjanovic@51
|
94 sum3 += A[n_i+j+3]*x[j+3];
|
idamnjanovic@51
|
95 }
|
idamnjanovic@51
|
96 y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3));
|
idamnjanovic@51
|
97 while (j<n) {
|
idamnjanovic@51
|
98 y[i] += alpha*A[n_i+j]*x[j];
|
idamnjanovic@51
|
99 j++;
|
idamnjanovic@51
|
100 }
|
idamnjanovic@51
|
101 }
|
idamnjanovic@51
|
102 }
|
idamnjanovic@51
|
103
|
idamnjanovic@51
|
104
|
idamnjanovic@51
|
105 /* compute y := alpha*A*x */
|
idamnjanovic@51
|
106
|
idamnjanovic@51
|
107 void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
108 {
|
idamnjanovic@51
|
109
|
idamnjanovic@51
|
110 mwIndex i, j, j1, j2;
|
idamnjanovic@51
|
111
|
idamnjanovic@51
|
112 for (i=0; i<n; ++i) {
|
idamnjanovic@51
|
113 y[i] = 0;
|
idamnjanovic@51
|
114 }
|
idamnjanovic@51
|
115
|
idamnjanovic@51
|
116 j2 = jc[0];
|
idamnjanovic@51
|
117 for (i=0; i<m; ++i) {
|
idamnjanovic@51
|
118 j1 = j2; j2 = jc[i+1];
|
idamnjanovic@51
|
119 for (j=j1; j<j2; ++j) {
|
idamnjanovic@51
|
120 y[ir[j]] += alpha * pr[j] * x[i];
|
idamnjanovic@51
|
121 }
|
idamnjanovic@51
|
122 }
|
idamnjanovic@51
|
123
|
idamnjanovic@51
|
124 }
|
idamnjanovic@51
|
125
|
idamnjanovic@51
|
126
|
idamnjanovic@51
|
127 /* compute y := alpha*A'*x */
|
idamnjanovic@51
|
128
|
idamnjanovic@51
|
129 void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
130 {
|
idamnjanovic@51
|
131
|
idamnjanovic@51
|
132 mwIndex i, j, j1, j2;
|
idamnjanovic@51
|
133
|
idamnjanovic@51
|
134 for (i=0; i<m; ++i) {
|
idamnjanovic@51
|
135 y[i] = 0;
|
idamnjanovic@51
|
136 }
|
idamnjanovic@51
|
137
|
idamnjanovic@51
|
138 j2 = jc[0];
|
idamnjanovic@51
|
139 for (i=0; i<m; ++i) {
|
idamnjanovic@51
|
140 j1 = j2; j2 = jc[i+1];
|
idamnjanovic@51
|
141 for (j=j1; j<j2; ++j) {
|
idamnjanovic@51
|
142 y[i] += alpha * pr[j] * x[ir[j]];
|
idamnjanovic@51
|
143 }
|
idamnjanovic@51
|
144 }
|
idamnjanovic@51
|
145
|
idamnjanovic@51
|
146 }
|
idamnjanovic@51
|
147
|
idamnjanovic@51
|
148
|
idamnjanovic@51
|
149 /* compute y := alpha*A*x */
|
idamnjanovic@51
|
150
|
idamnjanovic@51
|
151 void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
152 {
|
idamnjanovic@51
|
153
|
idamnjanovic@51
|
154 mwIndex i, j, j_n, k, kend;
|
idamnjanovic@51
|
155
|
idamnjanovic@51
|
156 for (i=0; i<n; ++i) {
|
idamnjanovic@51
|
157 y[i] = 0;
|
idamnjanovic@51
|
158 }
|
idamnjanovic@51
|
159
|
idamnjanovic@51
|
160 kend = jc[1];
|
idamnjanovic@51
|
161 if (kend==0) { /* x is empty */
|
idamnjanovic@51
|
162 return;
|
idamnjanovic@51
|
163 }
|
idamnjanovic@51
|
164
|
idamnjanovic@51
|
165 for (k=0; k<kend; ++k) {
|
idamnjanovic@51
|
166 j = ir[k];
|
idamnjanovic@51
|
167 j_n = j*n;
|
idamnjanovic@51
|
168 for (i=0; i<n; ++i) {
|
idamnjanovic@51
|
169 y[i] += alpha * A[i+j_n] * pr[k];
|
idamnjanovic@51
|
170 }
|
idamnjanovic@51
|
171 }
|
idamnjanovic@51
|
172
|
idamnjanovic@51
|
173 }
|
idamnjanovic@51
|
174
|
idamnjanovic@51
|
175
|
idamnjanovic@51
|
176 /* compute y := alpha*A'*x */
|
idamnjanovic@51
|
177
|
idamnjanovic@51
|
178 void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
179 {
|
idamnjanovic@51
|
180
|
idamnjanovic@51
|
181 mwIndex i, j, j_n, k, kend;
|
idamnjanovic@51
|
182
|
idamnjanovic@51
|
183 for (i=0; i<m; ++i) {
|
idamnjanovic@51
|
184 y[i] = 0;
|
idamnjanovic@51
|
185 }
|
idamnjanovic@51
|
186
|
idamnjanovic@51
|
187 kend = jc[1];
|
idamnjanovic@51
|
188 if (kend==0) { /* x is empty */
|
idamnjanovic@51
|
189 return;
|
idamnjanovic@51
|
190 }
|
idamnjanovic@51
|
191
|
idamnjanovic@51
|
192 for (j=0; j<m; ++j) {
|
idamnjanovic@51
|
193 j_n = j*n;
|
idamnjanovic@51
|
194 for (k=0; k<kend; ++k) {
|
idamnjanovic@51
|
195 i = ir[k];
|
idamnjanovic@51
|
196 y[j] += alpha * A[i+j_n] * pr[k];
|
idamnjanovic@51
|
197 }
|
idamnjanovic@51
|
198 }
|
idamnjanovic@51
|
199
|
idamnjanovic@51
|
200 }
|
idamnjanovic@51
|
201
|
idamnjanovic@51
|
202
|
idamnjanovic@51
|
203 /* compute y := alpha*A*x */
|
idamnjanovic@51
|
204
|
idamnjanovic@51
|
205 void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
206 {
|
idamnjanovic@51
|
207
|
idamnjanovic@51
|
208 mwIndex i, j, k, kend, j1, j2;
|
idamnjanovic@51
|
209
|
idamnjanovic@51
|
210 for (i=0; i<n; ++i) {
|
idamnjanovic@51
|
211 y[i] = 0;
|
idamnjanovic@51
|
212 }
|
idamnjanovic@51
|
213
|
idamnjanovic@51
|
214 kend = jcx[1];
|
idamnjanovic@51
|
215 if (kend==0) { /* x is empty */
|
idamnjanovic@51
|
216 return;
|
idamnjanovic@51
|
217 }
|
idamnjanovic@51
|
218
|
idamnjanovic@51
|
219 for (k=0; k<kend; ++k) {
|
idamnjanovic@51
|
220 i = irx[k];
|
idamnjanovic@51
|
221 j1 = jc[i]; j2 = jc[i+1];
|
idamnjanovic@51
|
222 for (j=j1; j<j2; ++j) {
|
idamnjanovic@51
|
223 y[ir[j]] += alpha * pr[j] * prx[k];
|
idamnjanovic@51
|
224 }
|
idamnjanovic@51
|
225 }
|
idamnjanovic@51
|
226
|
idamnjanovic@51
|
227 }
|
idamnjanovic@51
|
228
|
idamnjanovic@51
|
229
|
idamnjanovic@51
|
230 /* compute y := alpha*A'*x */
|
idamnjanovic@51
|
231
|
idamnjanovic@51
|
232 void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
233 {
|
idamnjanovic@51
|
234
|
idamnjanovic@51
|
235 mwIndex i, j, k, jend, kend, jadd, kadd, delta;
|
idamnjanovic@51
|
236
|
idamnjanovic@51
|
237 for (i=0; i<m; ++i) {
|
idamnjanovic@51
|
238 y[i] = 0;
|
idamnjanovic@51
|
239 }
|
idamnjanovic@51
|
240
|
idamnjanovic@51
|
241 kend = jcx[1];
|
idamnjanovic@51
|
242 if (kend==0) { /* x is empty */
|
idamnjanovic@51
|
243 return;
|
idamnjanovic@51
|
244 }
|
idamnjanovic@51
|
245
|
idamnjanovic@51
|
246 for (i=0; i<m; ++i) {
|
idamnjanovic@51
|
247 j = jc[i];
|
idamnjanovic@51
|
248 jend = jc[i+1];
|
idamnjanovic@51
|
249 k = 0;
|
idamnjanovic@51
|
250 while (j<jend && k<kend) {
|
idamnjanovic@51
|
251
|
idamnjanovic@51
|
252 delta = ir[j] - irx[k];
|
idamnjanovic@51
|
253
|
idamnjanovic@51
|
254 if (delta) { /* if indices differ - increment the smaller one */
|
idamnjanovic@51
|
255 jadd = delta<0;
|
idamnjanovic@51
|
256 kadd = 1-jadd;
|
idamnjanovic@51
|
257 j += jadd;
|
idamnjanovic@51
|
258 k += kadd;
|
idamnjanovic@51
|
259 }
|
idamnjanovic@51
|
260
|
idamnjanovic@51
|
261 else { /* indices are equal - add to result and increment both */
|
idamnjanovic@51
|
262 y[i] += alpha * pr[j] * prx[k];
|
idamnjanovic@51
|
263 j++; k++;
|
idamnjanovic@51
|
264 }
|
idamnjanovic@51
|
265 }
|
idamnjanovic@51
|
266 }
|
idamnjanovic@51
|
267
|
idamnjanovic@51
|
268 }
|
idamnjanovic@51
|
269
|
idamnjanovic@51
|
270
|
idamnjanovic@51
|
271 /* matrix-matrix multiplication */
|
idamnjanovic@51
|
272
|
idamnjanovic@51
|
273 void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
|
idamnjanovic@51
|
274 {
|
idamnjanovic@51
|
275 mwIndex i1, i2, i3, iX, iA, i2_n;
|
idamnjanovic@51
|
276 double b;
|
idamnjanovic@51
|
277
|
idamnjanovic@51
|
278 for (i1=0; i1<n*k; i1++) {
|
idamnjanovic@51
|
279 X[i1] = 0;
|
idamnjanovic@51
|
280 }
|
idamnjanovic@51
|
281
|
idamnjanovic@51
|
282 for (i2=0; i2<m; ++i2) {
|
idamnjanovic@51
|
283 i2_n = i2*n;
|
idamnjanovic@51
|
284 iX = 0;
|
idamnjanovic@51
|
285 for (i3=0; i3<k; ++i3) {
|
idamnjanovic@51
|
286 iA = i2_n;
|
idamnjanovic@51
|
287 b = B[i2+i3*m];
|
idamnjanovic@51
|
288 for (i1=0; i1<n; ++i1) {
|
idamnjanovic@51
|
289 X[iX++] += A[iA++]*b;
|
idamnjanovic@51
|
290 }
|
idamnjanovic@51
|
291 }
|
idamnjanovic@51
|
292 }
|
idamnjanovic@51
|
293
|
idamnjanovic@51
|
294 for (i1=0; i1<n*k; i1++) {
|
idamnjanovic@51
|
295 X[i1] *= alpha;
|
idamnjanovic@51
|
296 }
|
idamnjanovic@51
|
297 }
|
idamnjanovic@51
|
298
|
idamnjanovic@51
|
299
|
idamnjanovic@51
|
300 /* matrix-transpose-matrix multiplication */
|
idamnjanovic@51
|
301
|
idamnjanovic@51
|
302 void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
|
idamnjanovic@51
|
303 {
|
idamnjanovic@51
|
304 mwIndex i1, i2, i3, iX, iA, i2_n;
|
idamnjanovic@51
|
305 double *x, sum0, sum1, sum2, sum3;
|
idamnjanovic@51
|
306
|
idamnjanovic@51
|
307 for (i2=0; i2<m; ++i2) {
|
idamnjanovic@51
|
308 for (i3=0; i3<k; ++i3) {
|
idamnjanovic@51
|
309 sum0 = sum1 = sum2 = sum3 = 0;
|
idamnjanovic@51
|
310 for (i1=0; i1+4<n; i1+=4) {
|
idamnjanovic@51
|
311 sum0 += A[i1+0+i2*n]*B[i1+0+i3*n];
|
idamnjanovic@51
|
312 sum1 += A[i1+1+i2*n]*B[i1+1+i3*n];
|
idamnjanovic@51
|
313 sum2 += A[i1+2+i2*n]*B[i1+2+i3*n];
|
idamnjanovic@51
|
314 sum3 += A[i1+3+i2*n]*B[i1+3+i3*n];
|
idamnjanovic@51
|
315 }
|
idamnjanovic@51
|
316 X[i2+i3*m] = (sum0+sum1) + (sum2+sum3);
|
idamnjanovic@51
|
317 while(i1<n) {
|
idamnjanovic@51
|
318 X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n];
|
idamnjanovic@51
|
319 i1++;
|
idamnjanovic@51
|
320 }
|
idamnjanovic@51
|
321 }
|
idamnjanovic@51
|
322 }
|
idamnjanovic@51
|
323
|
idamnjanovic@51
|
324 for (i1=0; i1<m*k; i1++) {
|
idamnjanovic@51
|
325 X[i1] *= alpha;
|
idamnjanovic@51
|
326 }
|
idamnjanovic@51
|
327 }
|
idamnjanovic@51
|
328
|
idamnjanovic@51
|
329
|
idamnjanovic@51
|
330 /* tensor-matrix product */
|
idamnjanovic@51
|
331
|
idamnjanovic@51
|
332 void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
|
idamnjanovic@51
|
333 {
|
idamnjanovic@51
|
334 mwIndex i1, i2, i3, i4, i2_n, nml;
|
idamnjanovic@51
|
335 double b;
|
idamnjanovic@51
|
336
|
idamnjanovic@51
|
337 nml = n*m*l;
|
idamnjanovic@51
|
338 for (i1=0; i1<nml; ++i1) {
|
idamnjanovic@51
|
339 X[i1] = 0;
|
idamnjanovic@51
|
340 }
|
idamnjanovic@51
|
341
|
idamnjanovic@51
|
342 for (i2=0; i2<m; ++i2) {
|
idamnjanovic@51
|
343 i2_n = i2*n;
|
idamnjanovic@51
|
344 for (i3=0; i3<k; ++i3) {
|
idamnjanovic@51
|
345 for (i4=0; i4<l; ++i4) {
|
idamnjanovic@51
|
346 b = B[i4+i3*l];
|
idamnjanovic@51
|
347 for (i1=0; i1<n; ++i1) {
|
idamnjanovic@51
|
348 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
|
idamnjanovic@51
|
349 }
|
idamnjanovic@51
|
350 }
|
idamnjanovic@51
|
351 }
|
idamnjanovic@51
|
352 }
|
idamnjanovic@51
|
353
|
idamnjanovic@51
|
354 for (i1=0; i1<nml; ++i1) {
|
idamnjanovic@51
|
355 X[i1] *= alpha;
|
idamnjanovic@51
|
356 }
|
idamnjanovic@51
|
357 }
|
idamnjanovic@51
|
358
|
idamnjanovic@51
|
359
|
idamnjanovic@51
|
360 /* tensor-matrix-transpose product */
|
idamnjanovic@51
|
361
|
idamnjanovic@51
|
362 void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
|
idamnjanovic@51
|
363 {
|
idamnjanovic@51
|
364 mwIndex i1, i2, i3, i4, i2_n, nml;
|
idamnjanovic@51
|
365 double b;
|
idamnjanovic@51
|
366
|
idamnjanovic@51
|
367 nml = n*m*l;
|
idamnjanovic@51
|
368 for (i1=0; i1<nml; ++i1) {
|
idamnjanovic@51
|
369 X[i1] = 0;
|
idamnjanovic@51
|
370 }
|
idamnjanovic@51
|
371
|
idamnjanovic@51
|
372 for (i2=0; i2<m; ++i2) {
|
idamnjanovic@51
|
373 i2_n = i2*n;
|
idamnjanovic@51
|
374 for (i4=0; i4<l; ++i4) {
|
idamnjanovic@51
|
375 for (i3=0; i3<k; ++i3) {
|
idamnjanovic@51
|
376 b = B[i3+i4*k];
|
idamnjanovic@51
|
377 for (i1=0; i1<n; ++i1) {
|
idamnjanovic@51
|
378 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
|
idamnjanovic@51
|
379 }
|
idamnjanovic@51
|
380 }
|
idamnjanovic@51
|
381 }
|
idamnjanovic@51
|
382 }
|
idamnjanovic@51
|
383
|
idamnjanovic@51
|
384 for (i1=0; i1<nml; ++i1) {
|
idamnjanovic@51
|
385 X[i1] *= alpha;
|
idamnjanovic@51
|
386 }
|
idamnjanovic@51
|
387 }
|
idamnjanovic@51
|
388
|
idamnjanovic@51
|
389
|
idamnjanovic@51
|
390 /* dot product */
|
idamnjanovic@51
|
391
|
idamnjanovic@51
|
392 double dotprod(double a[], double b[], mwSize n)
|
idamnjanovic@51
|
393 {
|
idamnjanovic@51
|
394 double sum = 0;
|
idamnjanovic@51
|
395 mwIndex i;
|
idamnjanovic@51
|
396 for (i=0; i<n; ++i)
|
idamnjanovic@51
|
397 sum += a[i]*b[i];
|
idamnjanovic@51
|
398 return sum;
|
idamnjanovic@51
|
399 }
|
idamnjanovic@51
|
400
|
idamnjanovic@51
|
401
|
idamnjanovic@51
|
402 /* find maximum of vector */
|
idamnjanovic@51
|
403
|
idamnjanovic@51
|
404 mwIndex maxpos(double c[], mwSize m)
|
idamnjanovic@51
|
405 {
|
idamnjanovic@51
|
406 mwIndex maxid=0, k;
|
idamnjanovic@51
|
407 double val, maxval = *c;
|
idamnjanovic@51
|
408
|
idamnjanovic@51
|
409 for (k=1; k<m; ++k) {
|
idamnjanovic@51
|
410 val = c[k];
|
idamnjanovic@51
|
411 if (val > maxval) {
|
idamnjanovic@51
|
412 maxval = val;
|
idamnjanovic@51
|
413 maxid = k;
|
idamnjanovic@51
|
414 }
|
idamnjanovic@51
|
415 }
|
idamnjanovic@51
|
416 return maxid;
|
idamnjanovic@51
|
417 }
|
idamnjanovic@51
|
418
|
idamnjanovic@51
|
419
|
idamnjanovic@51
|
420 /* solve L*x = b */
|
idamnjanovic@51
|
421
|
idamnjanovic@51
|
422 void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k)
|
idamnjanovic@51
|
423 {
|
idamnjanovic@51
|
424 mwIndex i, j;
|
idamnjanovic@51
|
425 double rhs;
|
idamnjanovic@51
|
426
|
idamnjanovic@51
|
427 for (i=0; i<k; ++i) {
|
idamnjanovic@51
|
428 rhs = b[i];
|
idamnjanovic@51
|
429 for (j=0; j<i; ++j) {
|
idamnjanovic@51
|
430 rhs -= L[j*n+i]*x[j];
|
idamnjanovic@51
|
431 }
|
idamnjanovic@51
|
432 x[i] = rhs/L[i*n+i];
|
idamnjanovic@51
|
433 }
|
idamnjanovic@51
|
434 }
|
idamnjanovic@51
|
435
|
idamnjanovic@51
|
436
|
idamnjanovic@51
|
437 /* solve L'*x = b */
|
idamnjanovic@51
|
438
|
idamnjanovic@51
|
439 void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k)
|
idamnjanovic@51
|
440 {
|
idamnjanovic@51
|
441 mwIndex i, j;
|
idamnjanovic@51
|
442 double rhs;
|
idamnjanovic@51
|
443
|
idamnjanovic@51
|
444 for (i=k; i>=1; --i) {
|
idamnjanovic@51
|
445 rhs = b[i-1];
|
idamnjanovic@51
|
446 for (j=i; j<k; ++j) {
|
idamnjanovic@51
|
447 rhs -= L[(i-1)*n+j]*x[j];
|
idamnjanovic@51
|
448 }
|
idamnjanovic@51
|
449 x[i-1] = rhs/L[(i-1)*n+i-1];
|
idamnjanovic@51
|
450 }
|
idamnjanovic@51
|
451 }
|
idamnjanovic@51
|
452
|
idamnjanovic@51
|
453
|
idamnjanovic@51
|
454 /* solve U*x = b */
|
idamnjanovic@51
|
455
|
idamnjanovic@51
|
456 void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k)
|
idamnjanovic@51
|
457 {
|
idamnjanovic@51
|
458 mwIndex i, j;
|
idamnjanovic@51
|
459 double rhs;
|
idamnjanovic@51
|
460
|
idamnjanovic@51
|
461 for (i=k; i>=1; --i) {
|
idamnjanovic@51
|
462 rhs = b[i-1];
|
idamnjanovic@51
|
463 for (j=i; j<k; ++j) {
|
idamnjanovic@51
|
464 rhs -= U[j*n+i-1]*x[j];
|
idamnjanovic@51
|
465 }
|
idamnjanovic@51
|
466 x[i-1] = rhs/U[(i-1)*n+i-1];
|
idamnjanovic@51
|
467 }
|
idamnjanovic@51
|
468 }
|
idamnjanovic@51
|
469
|
idamnjanovic@51
|
470
|
idamnjanovic@51
|
471 /* solve U'*x = b */
|
idamnjanovic@51
|
472
|
idamnjanovic@51
|
473 void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k)
|
idamnjanovic@51
|
474 {
|
idamnjanovic@51
|
475 mwIndex i, j;
|
idamnjanovic@51
|
476 double rhs;
|
idamnjanovic@51
|
477
|
idamnjanovic@51
|
478 for (i=0; i<k; ++i) {
|
idamnjanovic@51
|
479 rhs = b[i];
|
idamnjanovic@51
|
480 for (j=0; j<i; ++j) {
|
idamnjanovic@51
|
481 rhs -= U[i*n+j]*x[j];
|
idamnjanovic@51
|
482 }
|
idamnjanovic@51
|
483 x[i] = rhs/U[i*n+i];
|
idamnjanovic@51
|
484 }
|
idamnjanovic@51
|
485 }
|
idamnjanovic@51
|
486
|
idamnjanovic@51
|
487
|
idamnjanovic@51
|
488 /* back substitution solver */
|
idamnjanovic@51
|
489
|
idamnjanovic@51
|
490 void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
|
idamnjanovic@51
|
491 {
|
idamnjanovic@51
|
492 if (tolower(ul) == 'u') {
|
idamnjanovic@51
|
493 backsubst_U(A, b, x, n, k);
|
idamnjanovic@51
|
494 }
|
idamnjanovic@51
|
495 else if (tolower(ul) == 'l') {
|
idamnjanovic@51
|
496 backsubst_L(A, b, x, n, k);
|
idamnjanovic@51
|
497 }
|
idamnjanovic@51
|
498 else {
|
idamnjanovic@51
|
499 mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''");
|
idamnjanovic@51
|
500 }
|
idamnjanovic@51
|
501 }
|
idamnjanovic@51
|
502
|
idamnjanovic@51
|
503
|
idamnjanovic@51
|
504 /* solve equation set using cholesky decomposition */
|
idamnjanovic@51
|
505
|
idamnjanovic@51
|
506 void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
|
idamnjanovic@51
|
507 {
|
idamnjanovic@51
|
508 double *tmp;
|
idamnjanovic@51
|
509
|
idamnjanovic@51
|
510 tmp = mxMalloc(k*sizeof(double));
|
idamnjanovic@51
|
511
|
idamnjanovic@51
|
512 if (tolower(ul) == 'l') {
|
idamnjanovic@51
|
513 backsubst_L(A, b, tmp, n, k);
|
idamnjanovic@51
|
514 backsubst_Lt(A, tmp, x, n, k);
|
idamnjanovic@51
|
515 }
|
idamnjanovic@51
|
516 else if (tolower(ul) == 'u') {
|
idamnjanovic@51
|
517 backsubst_Ut(A, b, tmp, n, k);
|
idamnjanovic@51
|
518 backsubst_U(A, tmp, x, n, k);
|
idamnjanovic@51
|
519 }
|
idamnjanovic@51
|
520 else {
|
idamnjanovic@51
|
521 mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''");
|
idamnjanovic@51
|
522 }
|
idamnjanovic@51
|
523
|
idamnjanovic@51
|
524 mxFree(tmp);
|
idamnjanovic@51
|
525 }
|
idamnjanovic@51
|
526
|
idamnjanovic@51
|
527
|
idamnjanovic@51
|
528 /* perform a permutation assignment y := x(ind(1:k)) */
|
idamnjanovic@51
|
529
|
idamnjanovic@51
|
530 void vec_assign(double y[], double x[], mwIndex ind[], mwSize k)
|
idamnjanovic@51
|
531 {
|
idamnjanovic@51
|
532 mwIndex i;
|
idamnjanovic@51
|
533
|
idamnjanovic@51
|
534 for (i=0; i<k; ++i)
|
idamnjanovic@51
|
535 y[i] = x[ind[i]];
|
idamnjanovic@51
|
536 }
|
idamnjanovic@51
|
537
|
idamnjanovic@51
|
538
|
idamnjanovic@51
|
539 /* matrix transpose */
|
idamnjanovic@51
|
540
|
idamnjanovic@51
|
541 void transpose(double X[], double Y[], mwSize n, mwSize m)
|
idamnjanovic@51
|
542 {
|
idamnjanovic@51
|
543 mwIndex i, j, i_m, j_n;
|
idamnjanovic@51
|
544
|
idamnjanovic@51
|
545 if (n<m) {
|
idamnjanovic@51
|
546 for (j=0; j<m; ++j) {
|
idamnjanovic@51
|
547 j_n = j*n;
|
idamnjanovic@51
|
548 for (i=0; i<n; ++i) {
|
idamnjanovic@51
|
549 Y[j+i*m] = X[i+j_n];
|
idamnjanovic@51
|
550 }
|
idamnjanovic@51
|
551 }
|
idamnjanovic@51
|
552 }
|
idamnjanovic@51
|
553 else {
|
idamnjanovic@51
|
554 for (i=0; i<n; ++i) {
|
idamnjanovic@51
|
555 i_m = i*m;
|
idamnjanovic@51
|
556 for (j=0; j<m; ++j) {
|
idamnjanovic@51
|
557 Y[j+i_m] = X[i+j*n];
|
idamnjanovic@51
|
558 }
|
idamnjanovic@51
|
559 }
|
idamnjanovic@51
|
560 }
|
idamnjanovic@51
|
561 }
|
idamnjanovic@51
|
562
|
idamnjanovic@51
|
563
|
idamnjanovic@51
|
564 /* print contents of matrix */
|
idamnjanovic@51
|
565
|
idamnjanovic@51
|
566 void printmat(double A[], int n, int m, char* matname)
|
idamnjanovic@51
|
567 {
|
idamnjanovic@51
|
568 int i, j;
|
idamnjanovic@51
|
569 mexPrintf("\n%s = \n\n", matname);
|
idamnjanovic@51
|
570
|
idamnjanovic@51
|
571 if (n*m==0) {
|
idamnjanovic@51
|
572 mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m);
|
idamnjanovic@51
|
573 return;
|
idamnjanovic@51
|
574 }
|
idamnjanovic@51
|
575
|
idamnjanovic@51
|
576 for (i=0; i<n; ++i) {
|
idamnjanovic@51
|
577 for (j=0; j<m; ++j)
|
idamnjanovic@51
|
578 mexPrintf(" %lf", A[j*n+i]);
|
idamnjanovic@51
|
579 mexPrintf("\n");
|
idamnjanovic@51
|
580 }
|
idamnjanovic@51
|
581 mexPrintf("\n");
|
idamnjanovic@51
|
582 }
|
idamnjanovic@51
|
583
|
idamnjanovic@51
|
584
|
idamnjanovic@51
|
585 /* print contents of sparse matrix */
|
idamnjanovic@51
|
586
|
idamnjanovic@51
|
587 void printspmat(mxArray *a, char* matname)
|
idamnjanovic@51
|
588 {
|
idamnjanovic@51
|
589 mwIndex *aJc = mxGetJc(a);
|
idamnjanovic@51
|
590 mwIndex *aIr = mxGetIr(a);
|
idamnjanovic@51
|
591 double *aPr = mxGetPr(a);
|
idamnjanovic@51
|
592
|
idamnjanovic@51
|
593 int i;
|
idamnjanovic@51
|
594
|
idamnjanovic@51
|
595 mexPrintf("\n%s = \n\n", matname);
|
idamnjanovic@51
|
596
|
idamnjanovic@51
|
597 for (i=0; i<aJc[1]; ++i)
|
idamnjanovic@51
|
598 printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]);
|
idamnjanovic@51
|
599
|
idamnjanovic@51
|
600 mexPrintf("\n");
|
idamnjanovic@51
|
601 }
|
idamnjanovic@51
|
602
|
idamnjanovic@51
|
603
|
idamnjanovic@51
|
604
|
idamnjanovic@51
|
605 /* matrix multiplication using Winograd's algorithm */
|
idamnjanovic@51
|
606
|
idamnjanovic@51
|
607 /*
|
idamnjanovic@51
|
608 void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
|
idamnjanovic@51
|
609 {
|
idamnjanovic@51
|
610
|
idamnjanovic@51
|
611 mwIndex i1, i2, i3, iX, iA, i2_n;
|
idamnjanovic@51
|
612 double b, *AA, *BB;
|
idamnjanovic@51
|
613
|
idamnjanovic@51
|
614 AA = mxCalloc(n,sizeof(double));
|
idamnjanovic@51
|
615 BB = mxCalloc(k,sizeof(double));
|
idamnjanovic@51
|
616
|
idamnjanovic@51
|
617 for (i1=0; i1<n*k; i1++) {
|
idamnjanovic@51
|
618 X[i1] = 0;
|
idamnjanovic@51
|
619 }
|
idamnjanovic@51
|
620
|
idamnjanovic@51
|
621 for (i1=0; i1<n; ++i1) {
|
idamnjanovic@51
|
622 for (i2=0; i2<m/2; ++i2) {
|
idamnjanovic@51
|
623 AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n];
|
idamnjanovic@51
|
624 }
|
idamnjanovic@51
|
625 }
|
idamnjanovic@51
|
626
|
idamnjanovic@51
|
627 for (i2=0; i2<k; ++i2) {
|
idamnjanovic@51
|
628 for (i1=0; i1<m/2; ++i1) {
|
idamnjanovic@51
|
629 BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m];
|
idamnjanovic@51
|
630 }
|
idamnjanovic@51
|
631 }
|
idamnjanovic@51
|
632
|
idamnjanovic@51
|
633 for (i2=0; i2<k; ++i2) {
|
idamnjanovic@51
|
634 for (i3=0; i3<m/2; ++i3) {
|
idamnjanovic@51
|
635 for (i1=0; i1<n; ++i1) {
|
idamnjanovic@51
|
636 X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]);
|
idamnjanovic@51
|
637 }
|
idamnjanovic@51
|
638 }
|
idamnjanovic@51
|
639 }
|
idamnjanovic@51
|
640
|
idamnjanovic@51
|
641 if (m%2) {
|
idamnjanovic@51
|
642 for (i2=0; i2<k; ++i2) {
|
idamnjanovic@51
|
643 for (i1=0; i1<n; ++i1) {
|
idamnjanovic@51
|
644 X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m];
|
idamnjanovic@51
|
645 }
|
idamnjanovic@51
|
646 }
|
idamnjanovic@51
|
647 }
|
idamnjanovic@51
|
648
|
idamnjanovic@51
|
649 for (i2=0; i2<k; ++i2) {
|
idamnjanovic@51
|
650 for (i1=0; i1<n; ++i1) {
|
idamnjanovic@51
|
651 X[i1+i2*n] -= (AA[i1] + BB[i2]);
|
idamnjanovic@51
|
652 X[i1+i2*n] *= alpha;
|
idamnjanovic@51
|
653 }
|
idamnjanovic@51
|
654 }
|
idamnjanovic@51
|
655
|
idamnjanovic@51
|
656 mxFree(AA);
|
idamnjanovic@51
|
657 mxFree(BB);
|
idamnjanovic@51
|
658 }
|
idamnjanovic@51
|
659 */
|
idamnjanovic@51
|
660
|
idamnjanovic@51
|
661
|
idamnjanovic@51
|
662
|
idamnjanovic@51
|
663
|