annotate Problems/private/myblas.c @ 57:3a58e70e8cbe

(none)
author idamnjanovic
date Mon, 14 Mar 2011 17:06:07 +0000
parents 217a33ac374e
children
rev   line source
idamnjanovic@51 1 /**************************************************************************
idamnjanovic@51 2 *
idamnjanovic@51 3 * File name: myblas.c
idamnjanovic@51 4 *
idamnjanovic@51 5 * Ron Rubinstein
idamnjanovic@51 6 * Computer Science Department
idamnjanovic@51 7 * Technion, Haifa 32000 Israel
idamnjanovic@51 8 * ronrubin@cs
idamnjanovic@51 9 *
idamnjanovic@51 10 * Version: 1.1
idamnjanovic@51 11 * Last updated: 13.8.2009
idamnjanovic@51 12 *
idamnjanovic@51 13 *************************************************************************/
idamnjanovic@51 14
idamnjanovic@51 15
idamnjanovic@51 16 #include "myblas.h"
idamnjanovic@51 17 #include <ctype.h>
idamnjanovic@51 18
idamnjanovic@51 19
idamnjanovic@51 20 /* find maximum of absolute values */
idamnjanovic@51 21
idamnjanovic@51 22 mwIndex maxabs(double c[], mwSize m)
idamnjanovic@51 23 {
idamnjanovic@51 24 mwIndex maxid=0, k;
idamnjanovic@51 25 double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */
idamnjanovic@51 26
idamnjanovic@51 27 for (k=1; k<m; ++k) {
idamnjanovic@51 28 absval = SQR(c[k]);
idamnjanovic@51 29 if (absval > maxval) {
idamnjanovic@51 30 maxval = absval;
idamnjanovic@51 31 maxid = k;
idamnjanovic@51 32 }
idamnjanovic@51 33 }
idamnjanovic@51 34 return maxid;
idamnjanovic@51 35 }
idamnjanovic@51 36
idamnjanovic@51 37
idamnjanovic@51 38 /* compute y := alpha*x + y */
idamnjanovic@51 39
idamnjanovic@51 40 void vec_sum(double alpha, double x[], double y[], mwSize n)
idamnjanovic@51 41 {
idamnjanovic@51 42 mwIndex i;
idamnjanovic@51 43
idamnjanovic@51 44 for (i=0; i<n; ++i) {
idamnjanovic@51 45 y[i] += alpha*x[i];
idamnjanovic@51 46 }
idamnjanovic@51 47 }
idamnjanovic@51 48
idamnjanovic@51 49
idamnjanovic@51 50 /* compute y := alpha*A*x */
idamnjanovic@51 51
idamnjanovic@51 52 void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
idamnjanovic@51 53 {
idamnjanovic@51 54 mwIndex i, j, i_n;
idamnjanovic@51 55 double *Ax;
idamnjanovic@51 56
idamnjanovic@51 57 Ax = mxCalloc(n,sizeof(double));
idamnjanovic@51 58
idamnjanovic@51 59 for (i=0; i<m; ++i) {
idamnjanovic@51 60 i_n = i*n;
idamnjanovic@51 61 for (j=0; j<n; ++j) {
idamnjanovic@51 62 Ax[j] += A[i_n+j] * x[i];
idamnjanovic@51 63 }
idamnjanovic@51 64 }
idamnjanovic@51 65
idamnjanovic@51 66 for (j=0; j<n; ++j) {
idamnjanovic@51 67 y[j] = alpha*Ax[j];
idamnjanovic@51 68 }
idamnjanovic@51 69
idamnjanovic@51 70 mxFree(Ax);
idamnjanovic@51 71 }
idamnjanovic@51 72
idamnjanovic@51 73
idamnjanovic@51 74 /* compute y := alpha*A'*x */
idamnjanovic@51 75
idamnjanovic@51 76 void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m)
idamnjanovic@51 77 {
idamnjanovic@51 78 mwIndex i, j, n_i;
idamnjanovic@51 79 double sum0, sum1, sum2, sum3;
idamnjanovic@51 80
idamnjanovic@51 81 for (j=0; j<m; ++j) {
idamnjanovic@51 82 y[j] = 0;
idamnjanovic@51 83 }
idamnjanovic@51 84
idamnjanovic@51 85 /* use loop unrolling to accelerate computation */
idamnjanovic@51 86
idamnjanovic@51 87 for (i=0; i<m; ++i) {
idamnjanovic@51 88 n_i = n*i;
idamnjanovic@51 89 sum0 = sum1 = sum2 = sum3 = 0;
idamnjanovic@51 90 for (j=0; j+4<n; j+=4) {
idamnjanovic@51 91 sum0 += A[n_i+j]*x[j];
idamnjanovic@51 92 sum1 += A[n_i+j+1]*x[j+1];
idamnjanovic@51 93 sum2 += A[n_i+j+2]*x[j+2];
idamnjanovic@51 94 sum3 += A[n_i+j+3]*x[j+3];
idamnjanovic@51 95 }
idamnjanovic@51 96 y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3));
idamnjanovic@51 97 while (j<n) {
idamnjanovic@51 98 y[i] += alpha*A[n_i+j]*x[j];
idamnjanovic@51 99 j++;
idamnjanovic@51 100 }
idamnjanovic@51 101 }
idamnjanovic@51 102 }
idamnjanovic@51 103
idamnjanovic@51 104
idamnjanovic@51 105 /* compute y := alpha*A*x */
idamnjanovic@51 106
idamnjanovic@51 107 void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
idamnjanovic@51 108 {
idamnjanovic@51 109
idamnjanovic@51 110 mwIndex i, j, j1, j2;
idamnjanovic@51 111
idamnjanovic@51 112 for (i=0; i<n; ++i) {
idamnjanovic@51 113 y[i] = 0;
idamnjanovic@51 114 }
idamnjanovic@51 115
idamnjanovic@51 116 j2 = jc[0];
idamnjanovic@51 117 for (i=0; i<m; ++i) {
idamnjanovic@51 118 j1 = j2; j2 = jc[i+1];
idamnjanovic@51 119 for (j=j1; j<j2; ++j) {
idamnjanovic@51 120 y[ir[j]] += alpha * pr[j] * x[i];
idamnjanovic@51 121 }
idamnjanovic@51 122 }
idamnjanovic@51 123
idamnjanovic@51 124 }
idamnjanovic@51 125
idamnjanovic@51 126
idamnjanovic@51 127 /* compute y := alpha*A'*x */
idamnjanovic@51 128
idamnjanovic@51 129 void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m)
idamnjanovic@51 130 {
idamnjanovic@51 131
idamnjanovic@51 132 mwIndex i, j, j1, j2;
idamnjanovic@51 133
idamnjanovic@51 134 for (i=0; i<m; ++i) {
idamnjanovic@51 135 y[i] = 0;
idamnjanovic@51 136 }
idamnjanovic@51 137
idamnjanovic@51 138 j2 = jc[0];
idamnjanovic@51 139 for (i=0; i<m; ++i) {
idamnjanovic@51 140 j1 = j2; j2 = jc[i+1];
idamnjanovic@51 141 for (j=j1; j<j2; ++j) {
idamnjanovic@51 142 y[i] += alpha * pr[j] * x[ir[j]];
idamnjanovic@51 143 }
idamnjanovic@51 144 }
idamnjanovic@51 145
idamnjanovic@51 146 }
idamnjanovic@51 147
idamnjanovic@51 148
idamnjanovic@51 149 /* compute y := alpha*A*x */
idamnjanovic@51 150
idamnjanovic@51 151 void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
idamnjanovic@51 152 {
idamnjanovic@51 153
idamnjanovic@51 154 mwIndex i, j, j_n, k, kend;
idamnjanovic@51 155
idamnjanovic@51 156 for (i=0; i<n; ++i) {
idamnjanovic@51 157 y[i] = 0;
idamnjanovic@51 158 }
idamnjanovic@51 159
idamnjanovic@51 160 kend = jc[1];
idamnjanovic@51 161 if (kend==0) { /* x is empty */
idamnjanovic@51 162 return;
idamnjanovic@51 163 }
idamnjanovic@51 164
idamnjanovic@51 165 for (k=0; k<kend; ++k) {
idamnjanovic@51 166 j = ir[k];
idamnjanovic@51 167 j_n = j*n;
idamnjanovic@51 168 for (i=0; i<n; ++i) {
idamnjanovic@51 169 y[i] += alpha * A[i+j_n] * pr[k];
idamnjanovic@51 170 }
idamnjanovic@51 171 }
idamnjanovic@51 172
idamnjanovic@51 173 }
idamnjanovic@51 174
idamnjanovic@51 175
idamnjanovic@51 176 /* compute y := alpha*A'*x */
idamnjanovic@51 177
idamnjanovic@51 178 void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m)
idamnjanovic@51 179 {
idamnjanovic@51 180
idamnjanovic@51 181 mwIndex i, j, j_n, k, kend;
idamnjanovic@51 182
idamnjanovic@51 183 for (i=0; i<m; ++i) {
idamnjanovic@51 184 y[i] = 0;
idamnjanovic@51 185 }
idamnjanovic@51 186
idamnjanovic@51 187 kend = jc[1];
idamnjanovic@51 188 if (kend==0) { /* x is empty */
idamnjanovic@51 189 return;
idamnjanovic@51 190 }
idamnjanovic@51 191
idamnjanovic@51 192 for (j=0; j<m; ++j) {
idamnjanovic@51 193 j_n = j*n;
idamnjanovic@51 194 for (k=0; k<kend; ++k) {
idamnjanovic@51 195 i = ir[k];
idamnjanovic@51 196 y[j] += alpha * A[i+j_n] * pr[k];
idamnjanovic@51 197 }
idamnjanovic@51 198 }
idamnjanovic@51 199
idamnjanovic@51 200 }
idamnjanovic@51 201
idamnjanovic@51 202
idamnjanovic@51 203 /* compute y := alpha*A*x */
idamnjanovic@51 204
idamnjanovic@51 205 void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
idamnjanovic@51 206 {
idamnjanovic@51 207
idamnjanovic@51 208 mwIndex i, j, k, kend, j1, j2;
idamnjanovic@51 209
idamnjanovic@51 210 for (i=0; i<n; ++i) {
idamnjanovic@51 211 y[i] = 0;
idamnjanovic@51 212 }
idamnjanovic@51 213
idamnjanovic@51 214 kend = jcx[1];
idamnjanovic@51 215 if (kend==0) { /* x is empty */
idamnjanovic@51 216 return;
idamnjanovic@51 217 }
idamnjanovic@51 218
idamnjanovic@51 219 for (k=0; k<kend; ++k) {
idamnjanovic@51 220 i = irx[k];
idamnjanovic@51 221 j1 = jc[i]; j2 = jc[i+1];
idamnjanovic@51 222 for (j=j1; j<j2; ++j) {
idamnjanovic@51 223 y[ir[j]] += alpha * pr[j] * prx[k];
idamnjanovic@51 224 }
idamnjanovic@51 225 }
idamnjanovic@51 226
idamnjanovic@51 227 }
idamnjanovic@51 228
idamnjanovic@51 229
idamnjanovic@51 230 /* compute y := alpha*A'*x */
idamnjanovic@51 231
idamnjanovic@51 232 void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m)
idamnjanovic@51 233 {
idamnjanovic@51 234
idamnjanovic@51 235 mwIndex i, j, k, jend, kend, jadd, kadd, delta;
idamnjanovic@51 236
idamnjanovic@51 237 for (i=0; i<m; ++i) {
idamnjanovic@51 238 y[i] = 0;
idamnjanovic@51 239 }
idamnjanovic@51 240
idamnjanovic@51 241 kend = jcx[1];
idamnjanovic@51 242 if (kend==0) { /* x is empty */
idamnjanovic@51 243 return;
idamnjanovic@51 244 }
idamnjanovic@51 245
idamnjanovic@51 246 for (i=0; i<m; ++i) {
idamnjanovic@51 247 j = jc[i];
idamnjanovic@51 248 jend = jc[i+1];
idamnjanovic@51 249 k = 0;
idamnjanovic@51 250 while (j<jend && k<kend) {
idamnjanovic@51 251
idamnjanovic@51 252 delta = ir[j] - irx[k];
idamnjanovic@51 253
idamnjanovic@51 254 if (delta) { /* if indices differ - increment the smaller one */
idamnjanovic@51 255 jadd = delta<0;
idamnjanovic@51 256 kadd = 1-jadd;
idamnjanovic@51 257 j += jadd;
idamnjanovic@51 258 k += kadd;
idamnjanovic@51 259 }
idamnjanovic@51 260
idamnjanovic@51 261 else { /* indices are equal - add to result and increment both */
idamnjanovic@51 262 y[i] += alpha * pr[j] * prx[k];
idamnjanovic@51 263 j++; k++;
idamnjanovic@51 264 }
idamnjanovic@51 265 }
idamnjanovic@51 266 }
idamnjanovic@51 267
idamnjanovic@51 268 }
idamnjanovic@51 269
idamnjanovic@51 270
idamnjanovic@51 271 /* matrix-matrix multiplication */
idamnjanovic@51 272
idamnjanovic@51 273 void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
idamnjanovic@51 274 {
idamnjanovic@51 275 mwIndex i1, i2, i3, iX, iA, i2_n;
idamnjanovic@51 276 double b;
idamnjanovic@51 277
idamnjanovic@51 278 for (i1=0; i1<n*k; i1++) {
idamnjanovic@51 279 X[i1] = 0;
idamnjanovic@51 280 }
idamnjanovic@51 281
idamnjanovic@51 282 for (i2=0; i2<m; ++i2) {
idamnjanovic@51 283 i2_n = i2*n;
idamnjanovic@51 284 iX = 0;
idamnjanovic@51 285 for (i3=0; i3<k; ++i3) {
idamnjanovic@51 286 iA = i2_n;
idamnjanovic@51 287 b = B[i2+i3*m];
idamnjanovic@51 288 for (i1=0; i1<n; ++i1) {
idamnjanovic@51 289 X[iX++] += A[iA++]*b;
idamnjanovic@51 290 }
idamnjanovic@51 291 }
idamnjanovic@51 292 }
idamnjanovic@51 293
idamnjanovic@51 294 for (i1=0; i1<n*k; i1++) {
idamnjanovic@51 295 X[i1] *= alpha;
idamnjanovic@51 296 }
idamnjanovic@51 297 }
idamnjanovic@51 298
idamnjanovic@51 299
idamnjanovic@51 300 /* matrix-transpose-matrix multiplication */
idamnjanovic@51 301
idamnjanovic@51 302 void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
idamnjanovic@51 303 {
idamnjanovic@51 304 mwIndex i1, i2, i3, iX, iA, i2_n;
idamnjanovic@51 305 double *x, sum0, sum1, sum2, sum3;
idamnjanovic@51 306
idamnjanovic@51 307 for (i2=0; i2<m; ++i2) {
idamnjanovic@51 308 for (i3=0; i3<k; ++i3) {
idamnjanovic@51 309 sum0 = sum1 = sum2 = sum3 = 0;
idamnjanovic@51 310 for (i1=0; i1+4<n; i1+=4) {
idamnjanovic@51 311 sum0 += A[i1+0+i2*n]*B[i1+0+i3*n];
idamnjanovic@51 312 sum1 += A[i1+1+i2*n]*B[i1+1+i3*n];
idamnjanovic@51 313 sum2 += A[i1+2+i2*n]*B[i1+2+i3*n];
idamnjanovic@51 314 sum3 += A[i1+3+i2*n]*B[i1+3+i3*n];
idamnjanovic@51 315 }
idamnjanovic@51 316 X[i2+i3*m] = (sum0+sum1) + (sum2+sum3);
idamnjanovic@51 317 while(i1<n) {
idamnjanovic@51 318 X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n];
idamnjanovic@51 319 i1++;
idamnjanovic@51 320 }
idamnjanovic@51 321 }
idamnjanovic@51 322 }
idamnjanovic@51 323
idamnjanovic@51 324 for (i1=0; i1<m*k; i1++) {
idamnjanovic@51 325 X[i1] *= alpha;
idamnjanovic@51 326 }
idamnjanovic@51 327 }
idamnjanovic@51 328
idamnjanovic@51 329
idamnjanovic@51 330 /* tensor-matrix product */
idamnjanovic@51 331
idamnjanovic@51 332 void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
idamnjanovic@51 333 {
idamnjanovic@51 334 mwIndex i1, i2, i3, i4, i2_n, nml;
idamnjanovic@51 335 double b;
idamnjanovic@51 336
idamnjanovic@51 337 nml = n*m*l;
idamnjanovic@51 338 for (i1=0; i1<nml; ++i1) {
idamnjanovic@51 339 X[i1] = 0;
idamnjanovic@51 340 }
idamnjanovic@51 341
idamnjanovic@51 342 for (i2=0; i2<m; ++i2) {
idamnjanovic@51 343 i2_n = i2*n;
idamnjanovic@51 344 for (i3=0; i3<k; ++i3) {
idamnjanovic@51 345 for (i4=0; i4<l; ++i4) {
idamnjanovic@51 346 b = B[i4+i3*l];
idamnjanovic@51 347 for (i1=0; i1<n; ++i1) {
idamnjanovic@51 348 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
idamnjanovic@51 349 }
idamnjanovic@51 350 }
idamnjanovic@51 351 }
idamnjanovic@51 352 }
idamnjanovic@51 353
idamnjanovic@51 354 for (i1=0; i1<nml; ++i1) {
idamnjanovic@51 355 X[i1] *= alpha;
idamnjanovic@51 356 }
idamnjanovic@51 357 }
idamnjanovic@51 358
idamnjanovic@51 359
idamnjanovic@51 360 /* tensor-matrix-transpose product */
idamnjanovic@51 361
idamnjanovic@51 362 void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l)
idamnjanovic@51 363 {
idamnjanovic@51 364 mwIndex i1, i2, i3, i4, i2_n, nml;
idamnjanovic@51 365 double b;
idamnjanovic@51 366
idamnjanovic@51 367 nml = n*m*l;
idamnjanovic@51 368 for (i1=0; i1<nml; ++i1) {
idamnjanovic@51 369 X[i1] = 0;
idamnjanovic@51 370 }
idamnjanovic@51 371
idamnjanovic@51 372 for (i2=0; i2<m; ++i2) {
idamnjanovic@51 373 i2_n = i2*n;
idamnjanovic@51 374 for (i4=0; i4<l; ++i4) {
idamnjanovic@51 375 for (i3=0; i3<k; ++i3) {
idamnjanovic@51 376 b = B[i3+i4*k];
idamnjanovic@51 377 for (i1=0; i1<n; ++i1) {
idamnjanovic@51 378 X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b;
idamnjanovic@51 379 }
idamnjanovic@51 380 }
idamnjanovic@51 381 }
idamnjanovic@51 382 }
idamnjanovic@51 383
idamnjanovic@51 384 for (i1=0; i1<nml; ++i1) {
idamnjanovic@51 385 X[i1] *= alpha;
idamnjanovic@51 386 }
idamnjanovic@51 387 }
idamnjanovic@51 388
idamnjanovic@51 389
idamnjanovic@51 390 /* dot product */
idamnjanovic@51 391
idamnjanovic@51 392 double dotprod(double a[], double b[], mwSize n)
idamnjanovic@51 393 {
idamnjanovic@51 394 double sum = 0;
idamnjanovic@51 395 mwIndex i;
idamnjanovic@51 396 for (i=0; i<n; ++i)
idamnjanovic@51 397 sum += a[i]*b[i];
idamnjanovic@51 398 return sum;
idamnjanovic@51 399 }
idamnjanovic@51 400
idamnjanovic@51 401
idamnjanovic@51 402 /* find maximum of vector */
idamnjanovic@51 403
idamnjanovic@51 404 mwIndex maxpos(double c[], mwSize m)
idamnjanovic@51 405 {
idamnjanovic@51 406 mwIndex maxid=0, k;
idamnjanovic@51 407 double val, maxval = *c;
idamnjanovic@51 408
idamnjanovic@51 409 for (k=1; k<m; ++k) {
idamnjanovic@51 410 val = c[k];
idamnjanovic@51 411 if (val > maxval) {
idamnjanovic@51 412 maxval = val;
idamnjanovic@51 413 maxid = k;
idamnjanovic@51 414 }
idamnjanovic@51 415 }
idamnjanovic@51 416 return maxid;
idamnjanovic@51 417 }
idamnjanovic@51 418
idamnjanovic@51 419
idamnjanovic@51 420 /* solve L*x = b */
idamnjanovic@51 421
idamnjanovic@51 422 void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@51 423 {
idamnjanovic@51 424 mwIndex i, j;
idamnjanovic@51 425 double rhs;
idamnjanovic@51 426
idamnjanovic@51 427 for (i=0; i<k; ++i) {
idamnjanovic@51 428 rhs = b[i];
idamnjanovic@51 429 for (j=0; j<i; ++j) {
idamnjanovic@51 430 rhs -= L[j*n+i]*x[j];
idamnjanovic@51 431 }
idamnjanovic@51 432 x[i] = rhs/L[i*n+i];
idamnjanovic@51 433 }
idamnjanovic@51 434 }
idamnjanovic@51 435
idamnjanovic@51 436
idamnjanovic@51 437 /* solve L'*x = b */
idamnjanovic@51 438
idamnjanovic@51 439 void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@51 440 {
idamnjanovic@51 441 mwIndex i, j;
idamnjanovic@51 442 double rhs;
idamnjanovic@51 443
idamnjanovic@51 444 for (i=k; i>=1; --i) {
idamnjanovic@51 445 rhs = b[i-1];
idamnjanovic@51 446 for (j=i; j<k; ++j) {
idamnjanovic@51 447 rhs -= L[(i-1)*n+j]*x[j];
idamnjanovic@51 448 }
idamnjanovic@51 449 x[i-1] = rhs/L[(i-1)*n+i-1];
idamnjanovic@51 450 }
idamnjanovic@51 451 }
idamnjanovic@51 452
idamnjanovic@51 453
idamnjanovic@51 454 /* solve U*x = b */
idamnjanovic@51 455
idamnjanovic@51 456 void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@51 457 {
idamnjanovic@51 458 mwIndex i, j;
idamnjanovic@51 459 double rhs;
idamnjanovic@51 460
idamnjanovic@51 461 for (i=k; i>=1; --i) {
idamnjanovic@51 462 rhs = b[i-1];
idamnjanovic@51 463 for (j=i; j<k; ++j) {
idamnjanovic@51 464 rhs -= U[j*n+i-1]*x[j];
idamnjanovic@51 465 }
idamnjanovic@51 466 x[i-1] = rhs/U[(i-1)*n+i-1];
idamnjanovic@51 467 }
idamnjanovic@51 468 }
idamnjanovic@51 469
idamnjanovic@51 470
idamnjanovic@51 471 /* solve U'*x = b */
idamnjanovic@51 472
idamnjanovic@51 473 void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@51 474 {
idamnjanovic@51 475 mwIndex i, j;
idamnjanovic@51 476 double rhs;
idamnjanovic@51 477
idamnjanovic@51 478 for (i=0; i<k; ++i) {
idamnjanovic@51 479 rhs = b[i];
idamnjanovic@51 480 for (j=0; j<i; ++j) {
idamnjanovic@51 481 rhs -= U[i*n+j]*x[j];
idamnjanovic@51 482 }
idamnjanovic@51 483 x[i] = rhs/U[i*n+i];
idamnjanovic@51 484 }
idamnjanovic@51 485 }
idamnjanovic@51 486
idamnjanovic@51 487
idamnjanovic@51 488 /* back substitution solver */
idamnjanovic@51 489
idamnjanovic@51 490 void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@51 491 {
idamnjanovic@51 492 if (tolower(ul) == 'u') {
idamnjanovic@51 493 backsubst_U(A, b, x, n, k);
idamnjanovic@51 494 }
idamnjanovic@51 495 else if (tolower(ul) == 'l') {
idamnjanovic@51 496 backsubst_L(A, b, x, n, k);
idamnjanovic@51 497 }
idamnjanovic@51 498 else {
idamnjanovic@51 499 mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''");
idamnjanovic@51 500 }
idamnjanovic@51 501 }
idamnjanovic@51 502
idamnjanovic@51 503
idamnjanovic@51 504 /* solve equation set using cholesky decomposition */
idamnjanovic@51 505
idamnjanovic@51 506 void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k)
idamnjanovic@51 507 {
idamnjanovic@51 508 double *tmp;
idamnjanovic@51 509
idamnjanovic@51 510 tmp = mxMalloc(k*sizeof(double));
idamnjanovic@51 511
idamnjanovic@51 512 if (tolower(ul) == 'l') {
idamnjanovic@51 513 backsubst_L(A, b, tmp, n, k);
idamnjanovic@51 514 backsubst_Lt(A, tmp, x, n, k);
idamnjanovic@51 515 }
idamnjanovic@51 516 else if (tolower(ul) == 'u') {
idamnjanovic@51 517 backsubst_Ut(A, b, tmp, n, k);
idamnjanovic@51 518 backsubst_U(A, tmp, x, n, k);
idamnjanovic@51 519 }
idamnjanovic@51 520 else {
idamnjanovic@51 521 mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''");
idamnjanovic@51 522 }
idamnjanovic@51 523
idamnjanovic@51 524 mxFree(tmp);
idamnjanovic@51 525 }
idamnjanovic@51 526
idamnjanovic@51 527
idamnjanovic@51 528 /* perform a permutation assignment y := x(ind(1:k)) */
idamnjanovic@51 529
idamnjanovic@51 530 void vec_assign(double y[], double x[], mwIndex ind[], mwSize k)
idamnjanovic@51 531 {
idamnjanovic@51 532 mwIndex i;
idamnjanovic@51 533
idamnjanovic@51 534 for (i=0; i<k; ++i)
idamnjanovic@51 535 y[i] = x[ind[i]];
idamnjanovic@51 536 }
idamnjanovic@51 537
idamnjanovic@51 538
idamnjanovic@51 539 /* matrix transpose */
idamnjanovic@51 540
idamnjanovic@51 541 void transpose(double X[], double Y[], mwSize n, mwSize m)
idamnjanovic@51 542 {
idamnjanovic@51 543 mwIndex i, j, i_m, j_n;
idamnjanovic@51 544
idamnjanovic@51 545 if (n<m) {
idamnjanovic@51 546 for (j=0; j<m; ++j) {
idamnjanovic@51 547 j_n = j*n;
idamnjanovic@51 548 for (i=0; i<n; ++i) {
idamnjanovic@51 549 Y[j+i*m] = X[i+j_n];
idamnjanovic@51 550 }
idamnjanovic@51 551 }
idamnjanovic@51 552 }
idamnjanovic@51 553 else {
idamnjanovic@51 554 for (i=0; i<n; ++i) {
idamnjanovic@51 555 i_m = i*m;
idamnjanovic@51 556 for (j=0; j<m; ++j) {
idamnjanovic@51 557 Y[j+i_m] = X[i+j*n];
idamnjanovic@51 558 }
idamnjanovic@51 559 }
idamnjanovic@51 560 }
idamnjanovic@51 561 }
idamnjanovic@51 562
idamnjanovic@51 563
idamnjanovic@51 564 /* print contents of matrix */
idamnjanovic@51 565
idamnjanovic@51 566 void printmat(double A[], int n, int m, char* matname)
idamnjanovic@51 567 {
idamnjanovic@51 568 int i, j;
idamnjanovic@51 569 mexPrintf("\n%s = \n\n", matname);
idamnjanovic@51 570
idamnjanovic@51 571 if (n*m==0) {
idamnjanovic@51 572 mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m);
idamnjanovic@51 573 return;
idamnjanovic@51 574 }
idamnjanovic@51 575
idamnjanovic@51 576 for (i=0; i<n; ++i) {
idamnjanovic@51 577 for (j=0; j<m; ++j)
idamnjanovic@51 578 mexPrintf(" %lf", A[j*n+i]);
idamnjanovic@51 579 mexPrintf("\n");
idamnjanovic@51 580 }
idamnjanovic@51 581 mexPrintf("\n");
idamnjanovic@51 582 }
idamnjanovic@51 583
idamnjanovic@51 584
idamnjanovic@51 585 /* print contents of sparse matrix */
idamnjanovic@51 586
idamnjanovic@51 587 void printspmat(mxArray *a, char* matname)
idamnjanovic@51 588 {
idamnjanovic@51 589 mwIndex *aJc = mxGetJc(a);
idamnjanovic@51 590 mwIndex *aIr = mxGetIr(a);
idamnjanovic@51 591 double *aPr = mxGetPr(a);
idamnjanovic@51 592
idamnjanovic@51 593 int i;
idamnjanovic@51 594
idamnjanovic@51 595 mexPrintf("\n%s = \n\n", matname);
idamnjanovic@51 596
idamnjanovic@51 597 for (i=0; i<aJc[1]; ++i)
idamnjanovic@51 598 printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]);
idamnjanovic@51 599
idamnjanovic@51 600 mexPrintf("\n");
idamnjanovic@51 601 }
idamnjanovic@51 602
idamnjanovic@51 603
idamnjanovic@51 604
idamnjanovic@51 605 /* matrix multiplication using Winograd's algorithm */
idamnjanovic@51 606
idamnjanovic@51 607 /*
idamnjanovic@51 608 void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k)
idamnjanovic@51 609 {
idamnjanovic@51 610
idamnjanovic@51 611 mwIndex i1, i2, i3, iX, iA, i2_n;
idamnjanovic@51 612 double b, *AA, *BB;
idamnjanovic@51 613
idamnjanovic@51 614 AA = mxCalloc(n,sizeof(double));
idamnjanovic@51 615 BB = mxCalloc(k,sizeof(double));
idamnjanovic@51 616
idamnjanovic@51 617 for (i1=0; i1<n*k; i1++) {
idamnjanovic@51 618 X[i1] = 0;
idamnjanovic@51 619 }
idamnjanovic@51 620
idamnjanovic@51 621 for (i1=0; i1<n; ++i1) {
idamnjanovic@51 622 for (i2=0; i2<m/2; ++i2) {
idamnjanovic@51 623 AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n];
idamnjanovic@51 624 }
idamnjanovic@51 625 }
idamnjanovic@51 626
idamnjanovic@51 627 for (i2=0; i2<k; ++i2) {
idamnjanovic@51 628 for (i1=0; i1<m/2; ++i1) {
idamnjanovic@51 629 BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m];
idamnjanovic@51 630 }
idamnjanovic@51 631 }
idamnjanovic@51 632
idamnjanovic@51 633 for (i2=0; i2<k; ++i2) {
idamnjanovic@51 634 for (i3=0; i3<m/2; ++i3) {
idamnjanovic@51 635 for (i1=0; i1<n; ++i1) {
idamnjanovic@51 636 X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]);
idamnjanovic@51 637 }
idamnjanovic@51 638 }
idamnjanovic@51 639 }
idamnjanovic@51 640
idamnjanovic@51 641 if (m%2) {
idamnjanovic@51 642 for (i2=0; i2<k; ++i2) {
idamnjanovic@51 643 for (i1=0; i1<n; ++i1) {
idamnjanovic@51 644 X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m];
idamnjanovic@51 645 }
idamnjanovic@51 646 }
idamnjanovic@51 647 }
idamnjanovic@51 648
idamnjanovic@51 649 for (i2=0; i2<k; ++i2) {
idamnjanovic@51 650 for (i1=0; i1<n; ++i1) {
idamnjanovic@51 651 X[i1+i2*n] -= (AA[i1] + BB[i2]);
idamnjanovic@51 652 X[i1+i2*n] *= alpha;
idamnjanovic@51 653 }
idamnjanovic@51 654 }
idamnjanovic@51 655
idamnjanovic@51 656 mxFree(AA);
idamnjanovic@51 657 mxFree(BB);
idamnjanovic@51 658 }
idamnjanovic@51 659 */
idamnjanovic@51 660
idamnjanovic@51 661
idamnjanovic@51 662
idamnjanovic@51 663