annotate DL/RLS-DLA/private/myblas.c @ 63:e08af264ef93

(none)
author idamnjanovic
date Tue, 15 Mar 2011 12:23:35 +0000
parents ad36f80e2ccf
children
rev   line source
idamnjanovic@60 1 /**************************************************************************
idamnjanovic@60 2 *
idamnjanovic@60 3 * File name: myblas.c
idamnjanovic@60 4 *
idamnjanovic@60 5 * Ron Rubinstein
idamnjanovic@60 6 * Computer Science Department
idamnjanovic@60 7 * Technion, Haifa 32000 Israel
idamnjanovic@60 8 * ronrubin@cs
idamnjanovic@60 9 *
idamnjanovic@60 10 * Version: 1.1
idamnjanovic@60 11 * Last updated: 13.8.2009
idamnjanovic@60 12 *
idamnjanovic@60 13 *************************************************************************/
idamnjanovic@60 14
idamnjanovic@60 15
idamnjanovic@60 16 #include "myblas.h"
idamnjanovic@60 17 #include <ctype.h>
idamnjanovic@60 18
idamnjanovic@60 19
idamnjanovic@60 20 /* find maximum of absolute values */
idamnjanovic@60 21
idamnjanovic@60 22 mwIndex maxabs(double c[], mwSize m)
idamnjanovic@60 23 {
idamnjanovic@60 24 mwIndex maxid=0, k;
idamnjanovic@60 25 double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */
idamnjanovic@60 26
idamnjanovic@60 27 for (k=1; k<m; ++k) {
idamnjanovic@60 28 absval = SQR(c[k]);
idamnjanovic@60 29 if (absval > maxval) {
idamnjanovic@60 30 maxval = absval;
idamnjanovic@60 31 maxid = k;
idamnjanovic@60 32 }
idamnjanovic@60 33 }
idamnjanovic@60 34 return maxid;
idamnjanovic@60 35 }
idamnjanovic@60 36
idamnjanovic@60 37
idamnjanovic@60 38 /* compute y := alpha*x + y */
idamnjanovic@60 39
idamnjanovic@60 40 void vec_sum(double alpha, double x[], double y[], mwSize n)
idamnjanovic@60 41 {
idamnjanovic@60 42 mwIndex i;
idamnjanovic@60 43
idamnjanovic@60 44 for (i=0; i<n; ++i) {
idamnjanovic@60 45 y[i] += alpha*x[i];
idamnjanovic@60 46 }
idamnjanovic@60 47 }
idamnjanovic@60 48
idamnjanovic@60 49
idamnjanovic@60 50 /* compute y := alpha*A*x */
idamnjanovic@60 51
idamnjanovic@60 52 void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
idamnjanovic@60 53 {
idamnjanovic@60 54 mwIndex i, j, i_n;
idamnjanovic@60 55 double *Ax;
idamnjanovic@60 56
idamnjanovic@60 57 Ax = mxCalloc(n,sizeof(double));
idamnjanovic@60 58
idamnjanovic@60 59 for (i=0; i<m; ++i) {
idamnjanovic@60 60 i_n = i*n;
idamnjanovic@60 61 for (j=0; j<n; ++j) {
idamnjanovic@60 62 Ax[j] += A[i_n+j] * x[i];
idamnjanovic@60 63 }
idamnjanovic@60 64 }
idamnjanovic@60 65
idamnjanovic@60 66 for (j=0; j<n; ++j) {
idamnjanovic@60 67 y[j] = alpha*Ax[j];
idamnjanovic@60 68 }
idamnjanovic@60 69
idamnjanovic@60 70 mxFree(Ax);
idamnjanovic@60 71 }
idamnjanovic@60 72
idamnjanovic@60 73
idamnjanovic@60 74 /* compute y := alpha*A'*x */
idamnjanovic@60 75
idamnjanovic@60 76 void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
idamnjanovic@60 77 {
idamnjanovic@60 78 mwIndex i, j, n_i;
idamnjanovic@60 79 double sum0, sum1, sum2, sum3;
idamnjanovic@60 80
idamnjanovic@60 81 for (j=0; j<m; ++j) {
idamnjanovic@60 82 y[j] = 0;
idamnjanovic@60 83 }
idamnjanovic@60 84
idamnjanovic@60 85 /* use loop unrolling to accelerate computation */
idamnjanovic@60 86
idamnjanovic@60 87 for (i=0; i<m; ++i) {
idamnjanovic@60 88 n_i = n*i;
idamnjanovic@60 89 sum0 = sum1 = sum2 = sum3 = 0;
idamnjanovic@60 90 for (j=0; j+4<n; j+=4) {
idamnjanovic@60 91 sum0 += A[n_i+j]*x[j];
idamnjanovic@60 92 sum1 += A[n_i+j+1]*x[j+1];
idamnjanovic@60 93 sum2 += A[n_i+j+2]*x[j+2];
idamnjanovic@60 94 sum3 += A[n_i+j+3]*x[j+3];
idamnjanovic@60 95 }
idamnjanovic@60 96 y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3));
idamnjanovic@60 97 while (j<n) {
idamnjanovic@60 98 y[i] += alpha*A[n_i+j]*x[j];
idamnjanovic@60 99 j++;
idamnjanovic@60 100 }
idamnjanovic@60 101 }
idamnjanovic@60 102 }
idamnjanovic@60 103
idamnjanovic@60 104
idamnjanovic@60 105 /* compute y := alpha*A*x */
idamnjanovic@60 106
idamnjanovic@60 107 void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
idamnjanovic@60 108 {
idamnjanovic@60 109
idamnjanovic@60 110 mwIndex i, j, j1, j2;
idamnjanovic@60 111
idamnjanovic@60 112 for (i=0; i<n; ++i) {
idamnjanovic@60 113 y[i] = 0;
idamnjanovic@60 114 }
idamnjanovic@60 115
idamnjanovic@60 116 j2 = jc[0];
idamnjanovic@60 117 for (i=0; i<m; ++i) {
idamnjanovic@60 118 j1 = j2; j2 = jc[i+1];
idamnjanovic@60 119 for (j=j1; j<j2; ++j) {
idamnjanovic@60 120 y[ir[j]] += alpha * pr[j] * x[i];
idamnjanovic@60 121 }
idamnjanovic@60 122 }
idamnjanovic@60 123
idamnjanovic@60 124 }
idamnjanovic@60 125
idamnjanovic@60 126
idamnjanovic@60 127 /* compute y := alpha*A'*x */
idamnjanovic@60 128
idamnjanovic@60 129 void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
idamnjanovic@60 130 {
idamnjanovic@60 131
idamnjanovic@60 132 mwIndex i, j, j1, j2;
idamnjanovic@60 133
idamnjanovic@60 134 for (i=0; i<m; ++i) {
idamnjanovic@60 135 y[i] = 0;
idamnjanovic@60 136 }
idamnjanovic@60 137
idamnjanovic@60 138 j2 = jc[0];
idamnjanovic@60 139 for (i=0; i<m; ++i) {
idamnjanovic@60 140 j1 = j2; j2 = jc[i+1];
idamnjanovic@60 141 for (j=j1; j<j2; ++j) {
idamnjanovic@60 142 y[i] += alpha * pr[j] * x[ir[j]];
idamnjanovic@60 143 }
idamnjanovic@60 144 }
idamnjanovic@60 145
idamnjanovic@60 146 }
idamnjanovic@60 147
idamnjanovic@60 148
idamnjanovic@60 149 /* compute y := alpha*A*x */
idamnjanovic@60 150
idamnjanovic@60 151 void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
idamnjanovic@60 152 {
idamnjanovic@60 153
idamnjanovic@60 154 mwIndex i, j, j_n, k, kend;
idamnjanovic@60 155
idamnjanovic@60 156 for (i=0; i<n; ++i) {
idamnjanovic@60 157 y[i] = 0;
idamnjanovic@60 158 }
idamnjanovic@60 159
idamnjanovic@60 160 kend = jc[1];
idamnjanovic@60 161 if (kend==0) { /* x is empty */
idamnjanovic@60 162 return;
idamnjanovic@60 163 }
idamnjanovic@60 164
idamnjanovic@60 165 for (k=0; k<kend; ++k) {
idamnjanovic@60 166 j = ir[k];
idamnjanovic@60 167 j_n = j*n;
idamnjanovic@60 168 for (i=0; i<n; ++i) {
idamnjanovic@60 169 y[i] += alpha * A[i+j_n] * pr[k];
idamnjanovic@60 170 }
idamnjanovic@60 171 }
idamnjanovic@60 172
idamnjanovic@60 173 }
idamnjanovic@60 174
idamnjanovic@60 175
idamnjanovic@60 176 /* compute y := alpha*A'*x */
idamnjanovic@60 177
idamnjanovic@60 178 void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
idamnjanovic@60 179 {
idamnjanovic@60 180
idamnjanovic@60 181 mwIndex i, j, j_n, k, kend;
idamnjanovic@60 182
idamnjanovic@60 183 for (i=0; i<m; ++i) {
idamnjanovic@60 184 y[i] = 0;
idamnjanovic@60 185 }
idamnjanovic@60 186
idamnjanovic@60 187 kend = jc[1];
idamnjanovic@60 188 if (kend==0) { /* x is empty */
idamnjanovic@60 189 return;
idamnjanovic@60 190 }
idamnjanovic@60 191
idamnjanovic@60 192 for (j=0; j<m; ++j) {
idamnjanovic@60 193 j_n = j*n;
idamnjanovic@60 194 for (k=0; k<kend; ++k) {
idamnjanovic@60 195 i = ir[k];
idamnjanovic@60 196 y[j] += alpha * A[i+j_n] * pr[k];
idamnjanovic@60 197 }
idamnjanovic@60 198 }
idamnjanovic@60 199
idamnjanovic@60 200 }
idamnjanovic@60 201
idamnjanovic@60 202
idamnjanovic@60 203 /* compute y := alpha*A*x */
idamnjanovic@60 204
idamnjanovic@60 205 void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
idamnjanovic@60 206 {
idamnjanovic@60 207
idamnjanovic@60 208 mwIndex i, j, k, kend, j1, j2;
idamnjanovic@60 209
idamnjanovic@60 210 for (i=0; i<n; ++i) {
idamnjanovic@60 211 y[i] = 0;
idamnjanovic@60 212 }
idamnjanovic@60 213
idamnjanovic@60 214 kend = jcx[1];
idamnjanovic@60 215 if (kend==0) { /* x is empty */
idamnjanovic@60 216 return;
idamnjanovic@60 217 }
idamnjanovic@60 218
idamnjanovic@60 219 for (k=0; k<kend; ++k) {
idamnjanovic@60 220 i = irx[k];
idamnjanovic@60 221 j1 = jc[i]; j2 = jc[i+1];
idamnjanovic@60 222 for (j=j1; j<j2; ++j) {
idamnjanovic@60 223 y[ir[j]] += alpha * pr[j] * prx[k];
idamnjanovic@60 224 }
idamnjanovic@60 225 }
idamnjanovic@60 226
idamnjanovic@60 227 }
idamnjanovic@60 228
idamnjanovic@60 229
idamnjanovic@60 230 /* compute y := alpha*A'*x */
idamnjanovic@60 231
idamnjanovic@60 232 void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
idamnjanovic@60 233 {
idamnjanovic@60 234
idamnjanovic@60 235 mwIndex i, j, k, jend, kend, jadd, kadd, delta;
idamnjanovic@60 236
idamnjanovic@60 237 for (i=0; i<m; ++i) {
idamnjanovic@60 238 y[i] = 0;
idamnjanovic@60 239 }
idamnjanovic@60 240
idamnjanovic@60 241 kend = jcx[1];
idamnjanovic@60 242 if (kend==0) { /* x is empty */
idamnjanovic@60 243 return;
idamnjanovic@60 244 }
idamnjanovic@60 245
idamnjanovic@60 246 for (i=0; i<m; ++i) {
idamnjanovic@60 247 j = jc[i];
idamnjanovic@60 248 jend = jc[i+1];
idamnjanovic@60 249 k = 0;
idamnjanovic@60 250 while (j<jend && k<kend) {
idamnjanovic@60 251
idamnjanovic@60 252 delta = ir[j] - irx[k];
idamnjanovic@60 253
idamnjanovic@60 254 if (delta) { /* if indices differ - increment the smaller one */
idamnjanovic@60 255 jadd = delta<0;
idamnjanovic@60 256 kadd = 1-jadd;
idamnjanovic@60 257 j += jadd;
idamnjanovic@60 258 k += kadd;
idamnjanovic@60 259 }
idamnjanovic@60 260
idamnjanovic@60 261 else { /* indices are equal - add to result and increment both */
idamnjanovic@60 262 y[i] += alpha * pr[j] * prx[k];
idamnjanovic@60 263 j++; k++;
idamnjanovic@60 264 }
idamnjanovic@60 265 }
idamnjanovic@60 266 }
idamnjanovic@60 267
idamnjanovic@60 268 }
idamnjanovic@60 269
idamnjanovic@60 270
idamnjanovic@60 271 /* matrix-matrix multiplication */
idamnjanovic@60 272
idamnjanovic@60 273 void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
idamnjanovic@60 274 {
idamnjanovic@60 275 mwIndex i1, i2, i3, iX, iA, i2_n;
idamnjanovic@60 276 double b;
idamnjanovic@60 277
idamnjanovic@60 278 for (i1=0; i1<n*k; i1++) {
idamnjanovic@60 279 X[i1] = 0;
idamnjanovic@60 280 }
idamnjanovic@60 281
idamnjanovic@60 282 for (i2=0; i2<m; ++i2) {
idamnjanovic@60 283 i2_n = i2*n;
idamnjanovic@60 284 iX = 0;
idamnjanovic@60 285 for (i3=0; i3<k; ++i3) {
idamnjanovic@60 286 iA = i2_n;
idamnjanovic@60 287 b = B[i2+i3*m];
idamnjanovic@60 288 for (i1=0; i1<n; ++i1) {
idamnjanovic@60 289 X[iX++] += A[iA++]*b;
idamnjanovic@60 290 }
idamnjanovic@60 291 }
idamnjanovic@60 292 }
idamnjanovic@60 293
idamnjanovic@60 294 for (i1=0; i1<n*k; i1++) {
idamnjanovic@60 295 X[i1] *= alpha;
idamnjanovic@60 296 }
idamnjanovic@60 297 }
idamnjanovic@60 298
idamnjanovic@60 299
idamnjanovic@60 300 /* matrix-transpose-matrix multiplication */
idamnjanovic@60 301
idamnjanovic@60 302 void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
idamnjanovic@60 303 {
idamnjanovic@60 304 mwIndex i1, i2, i3, iX, iA, i2_n;
idamnjanovic@60 305 double *x, sum0, sum1, sum2, sum3;
idamnjanovic@60 306
idamnjanovic@60 307 for (i2=0; i2<m; ++i2) {
idamnjanovic@60 308 for (i3=0; i3<k; ++i3) {
idamnjanovic@60 309 sum0 = sum1 = sum2 = sum3 = 0;
idamnjanovic@60 310 for (i1=0; i1+4<n; i1+=4) {
idamnjanovic@60 311 sum0 += A[i1+0+i2*n]*B[i1+0+i3*n];
idamnjanovic@60 312 sum1 += A[i1+1+i2*n]*B[i1+1+i3*n];
idamnjanovic@60 313 sum2 += A[i1+2+i2*n]*B[i1+2+i3*n];
idamnjanovic@60 314 sum3 += A[i1+3+i2*n]*B[i1+3+i3*n];
idamnjanovic@60 315 }
idamnjanovic@60 316 X[i2+i3*m] = (sum0+sum1) + (sum2+sum3);
idamnjanovic@60 317 while(i1<n) {
idamnjanovic@60 318 X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n];
idamnjanovic@60 319 i1++;
idamnjanovic@60 320 }
idamnjanovic@60 321 }
idamnjanovic@60 322 }
idamnjanovic@60 323
idamnjanovic@60 324 for (i1=0; i1<m*k; i1++) {
idamnjanovic@60 325 X[i1] *= alpha;
idamnjanovic@60 326 }
idamnjanovic@60 327 }
idamnjanovic@60 328
idamnjanovic@60 329
idamnjanovic@60 330 /* tensor-matrix product */
idamnjanovic@60 331
idamnjanovic@60 332 void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
idamnjanovic@60 333 {
idamnjanovic@60 334 mwIndex i1, i2, i3, i4, i2_n, nml;
idamnjanovic@60 335 double b;
idamnjanovic@60 336
idamnjanovic@60 337 nml = n*m*l;
idamnjanovic@60 338 for (i1=0; i1<nml; ++i1) {
idamnjanovic@60 339 X[i1] = 0;
idamnjanovic@60 340 }
idamnjanovic@60 341
idamnjanovic@60 342 for (i2=0; i2<m; ++i2) {
idamnjanovic@60 343 i2_n = i2*n;
idamnjanovic@60 344 for (i3=0; i3<k; ++i3) {
idamnjanovic@60 345 for (i4=0; i4<l; ++i4) {
idamnjanovic@60 346 b = B[i4+i3*l];
idamnjanovic@60 347 for (i1=0; i1<n; ++i1) {
idamnjanovic@60 348 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
idamnjanovic@60 349 }
idamnjanovic@60 350 }
idamnjanovic@60 351 }
idamnjanovic@60 352 }
idamnjanovic@60 353
idamnjanovic@60 354 for (i1=0; i1<nml; ++i1) {
idamnjanovic@60 355 X[i1] *= alpha;
idamnjanovic@60 356 }
idamnjanovic@60 357 }
idamnjanovic@60 358
idamnjanovic@60 359
idamnjanovic@60 360 /* tensor-matrix-transpose product */
idamnjanovic@60 361
idamnjanovic@60 362 void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
idamnjanovic@60 363 {
idamnjanovic@60 364 mwIndex i1, i2, i3, i4, i2_n, nml;
idamnjanovic@60 365 double b;
idamnjanovic@60 366
idamnjanovic@60 367 nml = n*m*l;
idamnjanovic@60 368 for (i1=0; i1<nml; ++i1) {
idamnjanovic@60 369 X[i1] = 0;
idamnjanovic@60 370 }
idamnjanovic@60 371
idamnjanovic@60 372 for (i2=0; i2<m; ++i2) {
idamnjanovic@60 373 i2_n = i2*n;
idamnjanovic@60 374 for (i4=0; i4<l; ++i4) {
idamnjanovic@60 375 for (i3=0; i3<k; ++i3) {
idamnjanovic@60 376 b = B[i3+i4*k];
idamnjanovic@60 377 for (i1=0; i1<n; ++i1) {
idamnjanovic@60 378 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
idamnjanovic@60 379 }
idamnjanovic@60 380 }
idamnjanovic@60 381 }
idamnjanovic@60 382 }
idamnjanovic@60 383
idamnjanovic@60 384 for (i1=0; i1<nml; ++i1) {
idamnjanovic@60 385 X[i1] *= alpha;
idamnjanovic@60 386 }
idamnjanovic@60 387 }
idamnjanovic@60 388
idamnjanovic@60 389
idamnjanovic@60 390 /* dot product */
idamnjanovic@60 391
idamnjanovic@60 392 double dotprod(double a[], double b[], mwSize n)
idamnjanovic@60 393 {
idamnjanovic@60 394 double sum = 0;
idamnjanovic@60 395 mwIndex i;
idamnjanovic@60 396 for (i=0; i<n; ++i)
idamnjanovic@60 397 sum += a[i]*b[i];
idamnjanovic@60 398 return sum;
idamnjanovic@60 399 }
idamnjanovic@60 400
idamnjanovic@60 401
idamnjanovic@60 402 /* find maximum of vector */
idamnjanovic@60 403
idamnjanovic@60 404 mwIndex maxpos(double c[], mwSize m)
idamnjanovic@60 405 {
idamnjanovic@60 406 mwIndex maxid=0, k;
idamnjanovic@60 407 double val, maxval = *c;
idamnjanovic@60 408
idamnjanovic@60 409 for (k=1; k<m; ++k) {
idamnjanovic@60 410 val = c[k];
idamnjanovic@60 411 if (val > maxval) {
idamnjanovic@60 412 maxval = val;
idamnjanovic@60 413 maxid = k;
idamnjanovic@60 414 }
idamnjanovic@60 415 }
idamnjanovic@60 416 return maxid;
idamnjanovic@60 417 }
idamnjanovic@60 418
idamnjanovic@60 419
idamnjanovic@60 420 /* solve L*x = b */
idamnjanovic@60 421
idamnjanovic@60 422 void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@60 423 {
idamnjanovic@60 424 mwIndex i, j;
idamnjanovic@60 425 double rhs;
idamnjanovic@60 426
idamnjanovic@60 427 for (i=0; i<k; ++i) {
idamnjanovic@60 428 rhs = b[i];
idamnjanovic@60 429 for (j=0; j<i; ++j) {
idamnjanovic@60 430 rhs -= L[j*n+i]*x[j];
idamnjanovic@60 431 }
idamnjanovic@60 432 x[i] = rhs/L[i*n+i];
idamnjanovic@60 433 }
idamnjanovic@60 434 }
idamnjanovic@60 435
idamnjanovic@60 436
idamnjanovic@60 437 /* solve L'*x = b */
idamnjanovic@60 438
idamnjanovic@60 439 void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@60 440 {
idamnjanovic@60 441 mwIndex i, j;
idamnjanovic@60 442 double rhs;
idamnjanovic@60 443
idamnjanovic@60 444 for (i=k; i>=1; --i) {
idamnjanovic@60 445 rhs = b[i-1];
idamnjanovic@60 446 for (j=i; j<k; ++j) {
idamnjanovic@60 447 rhs -= L[(i-1)*n+j]*x[j];
idamnjanovic@60 448 }
idamnjanovic@60 449 x[i-1] = rhs/L[(i-1)*n+i-1];
idamnjanovic@60 450 }
idamnjanovic@60 451 }
idamnjanovic@60 452
idamnjanovic@60 453
idamnjanovic@60 454 /* solve U*x = b */
idamnjanovic@60 455
idamnjanovic@60 456 void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@60 457 {
idamnjanovic@60 458 mwIndex i, j;
idamnjanovic@60 459 double rhs;
idamnjanovic@60 460
idamnjanovic@60 461 for (i=k; i>=1; --i) {
idamnjanovic@60 462 rhs = b[i-1];
idamnjanovic@60 463 for (j=i; j<k; ++j) {
idamnjanovic@60 464 rhs -= U[j*n+i-1]*x[j];
idamnjanovic@60 465 }
idamnjanovic@60 466 x[i-1] = rhs/U[(i-1)*n+i-1];
idamnjanovic@60 467 }
idamnjanovic@60 468 }
idamnjanovic@60 469
idamnjanovic@60 470
idamnjanovic@60 471 /* solve U'*x = b */
idamnjanovic@60 472
idamnjanovic@60 473 void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@60 474 {
idamnjanovic@60 475 mwIndex i, j;
idamnjanovic@60 476 double rhs;
idamnjanovic@60 477
idamnjanovic@60 478 for (i=0; i<k; ++i) {
idamnjanovic@60 479 rhs = b[i];
idamnjanovic@60 480 for (j=0; j<i; ++j) {
idamnjanovic@60 481 rhs -= U[i*n+j]*x[j];
idamnjanovic@60 482 }
idamnjanovic@60 483 x[i] = rhs/U[i*n+i];
idamnjanovic@60 484 }
idamnjanovic@60 485 }
idamnjanovic@60 486
idamnjanovic@60 487
idamnjanovic@60 488 /* back substitution solver */
idamnjanovic@60 489
idamnjanovic@60 490 void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@60 491 {
idamnjanovic@60 492 if (tolower(ul) == 'u') {
idamnjanovic@60 493 backsubst_U(A, b, x, n, k);
idamnjanovic@60 494 }
idamnjanovic@60 495 else if (tolower(ul) == 'l') {
idamnjanovic@60 496 backsubst_L(A, b, x, n, k);
idamnjanovic@60 497 }
idamnjanovic@60 498 else {
idamnjanovic@60 499 mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''");
idamnjanovic@60 500 }
idamnjanovic@60 501 }
idamnjanovic@60 502
idamnjanovic@60 503
idamnjanovic@60 504 /* solve equation set using cholesky decomposition */
idamnjanovic@60 505
idamnjanovic@60 506 void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@60 507 {
idamnjanovic@60 508 double *tmp;
idamnjanovic@60 509
idamnjanovic@60 510 tmp = mxMalloc(k*sizeof(double));
idamnjanovic@60 511
idamnjanovic@60 512 if (tolower(ul) == 'l') {
idamnjanovic@60 513 backsubst_L(A, b, tmp, n, k);
idamnjanovic@60 514 backsubst_Lt(A, tmp, x, n, k);
idamnjanovic@60 515 }
idamnjanovic@60 516 else if (tolower(ul) == 'u') {
idamnjanovic@60 517 backsubst_Ut(A, b, tmp, n, k);
idamnjanovic@60 518 backsubst_U(A, tmp, x, n, k);
idamnjanovic@60 519 }
idamnjanovic@60 520 else {
idamnjanovic@60 521 mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''");
idamnjanovic@60 522 }
idamnjanovic@60 523
idamnjanovic@60 524 mxFree(tmp);
idamnjanovic@60 525 }
idamnjanovic@60 526
idamnjanovic@60 527
idamnjanovic@60 528 /* perform a permutation assignment y := x(ind(1:k)) */
idamnjanovic@60 529
idamnjanovic@60 530 void vec_assign(double y[], double x[], mwIndex ind[], mwSize k)
idamnjanovic@60 531 {
idamnjanovic@60 532 mwIndex i;
idamnjanovic@60 533
idamnjanovic@60 534 for (i=0; i<k; ++i)
idamnjanovic@60 535 y[i] = x[ind[i]];
idamnjanovic@60 536 }
idamnjanovic@60 537
idamnjanovic@60 538
idamnjanovic@60 539 /* matrix transpose */
idamnjanovic@60 540
idamnjanovic@60 541 void transpose(double X[], double Y[], mwSize n, mwSize m)
idamnjanovic@60 542 {
idamnjanovic@60 543 mwIndex i, j, i_m, j_n;
idamnjanovic@60 544
idamnjanovic@60 545 if (n<m) {
idamnjanovic@60 546 for (j=0; j<m; ++j) {
idamnjanovic@60 547 j_n = j*n;
idamnjanovic@60 548 for (i=0; i<n; ++i) {
idamnjanovic@60 549 Y[j+i*m] = X[i+j_n];
idamnjanovic@60 550 }
idamnjanovic@60 551 }
idamnjanovic@60 552 }
idamnjanovic@60 553 else {
idamnjanovic@60 554 for (i=0; i<n; ++i) {
idamnjanovic@60 555 i_m = i*m;
idamnjanovic@60 556 for (j=0; j<m; ++j) {
idamnjanovic@60 557 Y[j+i_m] = X[i+j*n];
idamnjanovic@60 558 }
idamnjanovic@60 559 }
idamnjanovic@60 560 }
idamnjanovic@60 561 }
idamnjanovic@60 562
idamnjanovic@60 563
idamnjanovic@60 564 /* print contents of matrix */
idamnjanovic@60 565
idamnjanovic@60 566 void printmat(double A[], int n, int m, char* matname)
idamnjanovic@60 567 {
idamnjanovic@60 568 int i, j;
idamnjanovic@60 569 mexPrintf("\n%s = \n\n", matname);
idamnjanovic@60 570
idamnjanovic@60 571 if (n*m==0) {
idamnjanovic@60 572 mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m);
idamnjanovic@60 573 return;
idamnjanovic@60 574 }
idamnjanovic@60 575
idamnjanovic@60 576 for (i=0; i<n; ++i) {
idamnjanovic@60 577 for (j=0; j<m; ++j)
idamnjanovic@60 578 mexPrintf(" %lf", A[j*n+i]);
idamnjanovic@60 579 mexPrintf("\n");
idamnjanovic@60 580 }
idamnjanovic@60 581 mexPrintf("\n");
idamnjanovic@60 582 }
idamnjanovic@60 583
idamnjanovic@60 584
idamnjanovic@60 585 /* print contents of sparse matrix */
idamnjanovic@60 586
idamnjanovic@60 587 void printspmat(mxArray *a, char* matname)
idamnjanovic@60 588 {
idamnjanovic@60 589 mwIndex *aJc = mxGetJc(a);
idamnjanovic@60 590 mwIndex *aIr = mxGetIr(a);
idamnjanovic@60 591 double *aPr = mxGetPr(a);
idamnjanovic@60 592
idamnjanovic@60 593 int i;
idamnjanovic@60 594
idamnjanovic@60 595 mexPrintf("\n%s = \n\n", matname);
idamnjanovic@60 596
idamnjanovic@60 597 for (i=0; i<aJc[1]; ++i)
idamnjanovic@60 598 printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]);
idamnjanovic@60 599
idamnjanovic@60 600 mexPrintf("\n");
idamnjanovic@60 601 }
idamnjanovic@60 602
idamnjanovic@60 603
idamnjanovic@60 604
idamnjanovic@60 605 /* matrix multiplication using Winograd's algorithm */
idamnjanovic@60 606
idamnjanovic@60 607 /*
idamnjanovic@60 608 void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
idamnjanovic@60 609 {
idamnjanovic@60 610
idamnjanovic@60 611 mwIndex i1, i2, i3, iX, iA, i2_n;
idamnjanovic@60 612 double b, *AA, *BB;
idamnjanovic@60 613
idamnjanovic@60 614 AA = mxCalloc(n,sizeof(double));
idamnjanovic@60 615 BB = mxCalloc(k,sizeof(double));
idamnjanovic@60 616
idamnjanovic@60 617 for (i1=0; i1<n*k; i1++) {
idamnjanovic@60 618 X[i1] = 0;
idamnjanovic@60 619 }
idamnjanovic@60 620
idamnjanovic@60 621 for (i1=0; i1<n; ++i1) {
idamnjanovic@60 622 for (i2=0; i2<m/2; ++i2) {
idamnjanovic@60 623 AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n];
idamnjanovic@60 624 }
idamnjanovic@60 625 }
idamnjanovic@60 626
idamnjanovic@60 627 for (i2=0; i2<k; ++i2) {
idamnjanovic@60 628 for (i1=0; i1<m/2; ++i1) {
idamnjanovic@60 629 BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m];
idamnjanovic@60 630 }
idamnjanovic@60 631 }
idamnjanovic@60 632
idamnjanovic@60 633 for (i2=0; i2<k; ++i2) {
idamnjanovic@60 634 for (i3=0; i3<m/2; ++i3) {
idamnjanovic@60 635 for (i1=0; i1<n; ++i1) {
idamnjanovic@60 636 X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]);
idamnjanovic@60 637 }
idamnjanovic@60 638 }
idamnjanovic@60 639 }
idamnjanovic@60 640
idamnjanovic@60 641 if (m%2) {
idamnjanovic@60 642 for (i2=0; i2<k; ++i2) {
idamnjanovic@60 643 for (i1=0; i1<n; ++i1) {
idamnjanovic@60 644 X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m];
idamnjanovic@60 645 }
idamnjanovic@60 646 }
idamnjanovic@60 647 }
idamnjanovic@60 648
idamnjanovic@60 649 for (i2=0; i2<k; ++i2) {
idamnjanovic@60 650 for (i1=0; i1<n; ++i1) {
idamnjanovic@60 651 X[i1+i2*n] -= (AA[i1] + BB[i2]);
idamnjanovic@60 652 X[i1+i2*n] *= alpha;
idamnjanovic@60 653 }
idamnjanovic@60 654 }
idamnjanovic@60 655
idamnjanovic@60 656 mxFree(AA);
idamnjanovic@60 657 mxFree(BB);
idamnjanovic@60 658 }
idamnjanovic@60 659 */
idamnjanovic@60 660
idamnjanovic@60 661
idamnjanovic@60 662
idamnjanovic@60 663