Mercurial > hg > smallbox
diff Problems/private/myblas.c @ 51:217a33ac374e
(none)
author | idamnjanovic |
---|---|
date | Mon, 14 Mar 2011 16:52:27 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Problems/private/myblas.c Mon Mar 14 16:52:27 2011 +0000 @@ -0,0 +1,663 @@ +/************************************************************************** + * + * File name: myblas.c + * + * Ron Rubinstein + * Computer Science Department + * Technion, Haifa 32000 Israel + * ronrubin@cs + * + * Version: 1.1 + * Last updated: 13.8.2009 + * + *************************************************************************/ + + +#include "myblas.h" +#include <ctype.h> + + +/* find maximum of absolute values */ + +mwIndex maxabs(double c[], mwSize m) +{ + mwIndex maxid=0, k; + double absval, maxval = SQR(*c); /* use square which is quicker than absolute value */ + + for (k=1; k<m; ++k) { + absval = SQR(c[k]); + if (absval > maxval) { + maxval = absval; + maxid = k; + } + } + return maxid; +} + + +/* compute y := alpha*x + y */ + +void vec_sum(double alpha, double x[], double y[], mwSize n) +{ + mwIndex i; + + for (i=0; i<n; ++i) { + y[i] += alpha*x[i]; + } +} + + +/* compute y := alpha*A*x */ + +void mat_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m) +{ + mwIndex i, j, i_n; + double *Ax; + + Ax = mxCalloc(n,sizeof(double)); + + for (i=0; i<m; ++i) { + i_n = i*n; + for (j=0; j<n; ++j) { + Ax[j] += A[i_n+j] * x[i]; + } + } + + for (j=0; j<n; ++j) { + y[j] = alpha*Ax[j]; + } + + mxFree(Ax); +} + + +/* compute y := alpha*A'*x */ + +void matT_vec(double alpha, double A[], double x[], double y[], mwSize n, mwSize m) +{ + mwIndex i, j, n_i; + double sum0, sum1, sum2, sum3; + + for (j=0; j<m; ++j) { + y[j] = 0; + } + + /* use loop unrolling to accelerate computation */ + + for (i=0; i<m; ++i) { + n_i = n*i; + sum0 = sum1 = sum2 = sum3 = 0; + for (j=0; j+4<n; j+=4) { + sum0 += A[n_i+j]*x[j]; + sum1 += A[n_i+j+1]*x[j+1]; + sum2 += A[n_i+j+2]*x[j+2]; + sum3 += A[n_i+j+3]*x[j+3]; + } + y[i] += alpha * ((sum0 + sum1) + (sum2 + sum3)); + while (j<n) { + y[i] += alpha*A[n_i+j]*x[j]; + j++; + } + } +} + + +/* compute y := alpha*A*x */ + +void mat_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m) +{ + + mwIndex i, j, j1, j2; + + for (i=0; i<n; ++i) { + y[i] = 0; + } + + j2 = jc[0]; + for (i=0; i<m; ++i) { + j1 = j2; j2 = jc[i+1]; + for (j=j1; j<j2; ++j) { + y[ir[j]] += alpha * pr[j] * x[i]; + } + } + +} + + +/* compute y := alpha*A'*x */ + +void matT_sp_vec(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double x[], double y[], mwSize n, mwSize m) +{ + + mwIndex i, j, j1, j2; + + for (i=0; i<m; ++i) { + y[i] = 0; + } + + j2 = jc[0]; + for (i=0; i<m; ++i) { + j1 = j2; j2 = jc[i+1]; + for (j=j1; j<j2; ++j) { + y[i] += alpha * pr[j] * x[ir[j]]; + } + } + +} + + +/* compute y := alpha*A*x */ + +void mat_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m) +{ + + mwIndex i, j, j_n, k, kend; + + for (i=0; i<n; ++i) { + y[i] = 0; + } + + kend = jc[1]; + if (kend==0) { /* x is empty */ + return; + } + + for (k=0; k<kend; ++k) { + j = ir[k]; + j_n = j*n; + for (i=0; i<n; ++i) { + y[i] += alpha * A[i+j_n] * pr[k]; + } + } + +} + + +/* compute y := alpha*A'*x */ + +void matT_vec_sp(double alpha, double A[], double pr[], mwIndex ir[], mwIndex jc[], double y[], mwSize n, mwSize m) +{ + + mwIndex i, j, j_n, k, kend; + + for (i=0; i<m; ++i) { + y[i] = 0; + } + + kend = jc[1]; + if (kend==0) { /* x is empty */ + return; + } + + for (j=0; j<m; ++j) { + j_n = j*n; + for (k=0; k<kend; ++k) { + i = ir[k]; + y[j] += alpha * A[i+j_n] * pr[k]; + } + } + +} + + +/* compute y := alpha*A*x */ + +void mat_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m) +{ + + mwIndex i, j, k, kend, j1, j2; + + for (i=0; i<n; ++i) { + y[i] = 0; + } + + kend = jcx[1]; + if (kend==0) { /* x is empty */ + return; + } + + for (k=0; k<kend; ++k) { + i = irx[k]; + j1 = jc[i]; j2 = jc[i+1]; + for (j=j1; j<j2; ++j) { + y[ir[j]] += alpha * pr[j] * prx[k]; + } + } + +} + + +/* compute y := alpha*A'*x */ + +void matT_sp_vec_sp(double alpha, double pr[], mwIndex ir[], mwIndex jc[], double prx[], mwIndex irx[], mwIndex jcx[], double y[], mwSize n, mwSize m) +{ + + mwIndex i, j, k, jend, kend, jadd, kadd, delta; + + for (i=0; i<m; ++i) { + y[i] = 0; + } + + kend = jcx[1]; + if (kend==0) { /* x is empty */ + return; + } + + for (i=0; i<m; ++i) { + j = jc[i]; + jend = jc[i+1]; + k = 0; + while (j<jend && k<kend) { + + delta = ir[j] - irx[k]; + + if (delta) { /* if indices differ - increment the smaller one */ + jadd = delta<0; + kadd = 1-jadd; + j += jadd; + k += kadd; + } + + else { /* indices are equal - add to result and increment both */ + y[i] += alpha * pr[j] * prx[k]; + j++; k++; + } + } + } + +} + + +/* matrix-matrix multiplication */ + +void mat_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) +{ + mwIndex i1, i2, i3, iX, iA, i2_n; + double b; + + for (i1=0; i1<n*k; i1++) { + X[i1] = 0; + } + + for (i2=0; i2<m; ++i2) { + i2_n = i2*n; + iX = 0; + for (i3=0; i3<k; ++i3) { + iA = i2_n; + b = B[i2+i3*m]; + for (i1=0; i1<n; ++i1) { + X[iX++] += A[iA++]*b; + } + } + } + + for (i1=0; i1<n*k; i1++) { + X[i1] *= alpha; + } +} + + +/* matrix-transpose-matrix multiplication */ + +void matT_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) +{ + mwIndex i1, i2, i3, iX, iA, i2_n; + double *x, sum0, sum1, sum2, sum3; + + for (i2=0; i2<m; ++i2) { + for (i3=0; i3<k; ++i3) { + sum0 = sum1 = sum2 = sum3 = 0; + for (i1=0; i1+4<n; i1+=4) { + sum0 += A[i1+0+i2*n]*B[i1+0+i3*n]; + sum1 += A[i1+1+i2*n]*B[i1+1+i3*n]; + sum2 += A[i1+2+i2*n]*B[i1+2+i3*n]; + sum3 += A[i1+3+i2*n]*B[i1+3+i3*n]; + } + X[i2+i3*m] = (sum0+sum1) + (sum2+sum3); + while(i1<n) { + X[i2+i3*m] += A[i1+i2*n]*B[i1+i3*n]; + i1++; + } + } + } + + for (i1=0; i1<m*k; i1++) { + X[i1] *= alpha; + } +} + + +/* tensor-matrix product */ + +void tens_mat(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l) +{ + mwIndex i1, i2, i3, i4, i2_n, nml; + double b; + + nml = n*m*l; + for (i1=0; i1<nml; ++i1) { + X[i1] = 0; + } + + for (i2=0; i2<m; ++i2) { + i2_n = i2*n; + for (i3=0; i3<k; ++i3) { + for (i4=0; i4<l; ++i4) { + b = B[i4+i3*l]; + for (i1=0; i1<n; ++i1) { + X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b; + } + } + } + } + + for (i1=0; i1<nml; ++i1) { + X[i1] *= alpha; + } +} + + +/* tensor-matrix-transpose product */ + +void tens_matT(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k, mwSize l) +{ + mwIndex i1, i2, i3, i4, i2_n, nml; + double b; + + nml = n*m*l; + for (i1=0; i1<nml; ++i1) { + X[i1] = 0; + } + + for (i2=0; i2<m; ++i2) { + i2_n = i2*n; + for (i4=0; i4<l; ++i4) { + for (i3=0; i3<k; ++i3) { + b = B[i3+i4*k]; + for (i1=0; i1<n; ++i1) { + X[i1 + i2_n + i4*n*m] += A[i1 + i2_n + i3*n*m] * b; + } + } + } + } + + for (i1=0; i1<nml; ++i1) { + X[i1] *= alpha; + } +} + + +/* dot product */ + +double dotprod(double a[], double b[], mwSize n) +{ + double sum = 0; + mwIndex i; + for (i=0; i<n; ++i) + sum += a[i]*b[i]; + return sum; +} + + +/* find maximum of vector */ + +mwIndex maxpos(double c[], mwSize m) +{ + mwIndex maxid=0, k; + double val, maxval = *c; + + for (k=1; k<m; ++k) { + val = c[k]; + if (val > maxval) { + maxval = val; + maxid = k; + } + } + return maxid; +} + + +/* solve L*x = b */ + +void backsubst_L(double L[], double b[], double x[], mwSize n, mwSize k) +{ + mwIndex i, j; + double rhs; + + for (i=0; i<k; ++i) { + rhs = b[i]; + for (j=0; j<i; ++j) { + rhs -= L[j*n+i]*x[j]; + } + x[i] = rhs/L[i*n+i]; + } +} + + +/* solve L'*x = b */ + +void backsubst_Lt(double L[], double b[], double x[], mwSize n, mwSize k) +{ + mwIndex i, j; + double rhs; + + for (i=k; i>=1; --i) { + rhs = b[i-1]; + for (j=i; j<k; ++j) { + rhs -= L[(i-1)*n+j]*x[j]; + } + x[i-1] = rhs/L[(i-1)*n+i-1]; + } +} + + +/* solve U*x = b */ + +void backsubst_U(double U[], double b[], double x[], mwSize n, mwSize k) +{ + mwIndex i, j; + double rhs; + + for (i=k; i>=1; --i) { + rhs = b[i-1]; + for (j=i; j<k; ++j) { + rhs -= U[j*n+i-1]*x[j]; + } + x[i-1] = rhs/U[(i-1)*n+i-1]; + } +} + + +/* solve U'*x = b */ + +void backsubst_Ut(double U[], double b[], double x[], mwSize n, mwSize k) +{ + mwIndex i, j; + double rhs; + + for (i=0; i<k; ++i) { + rhs = b[i]; + for (j=0; j<i; ++j) { + rhs -= U[i*n+j]*x[j]; + } + x[i] = rhs/U[i*n+i]; + } +} + + +/* back substitution solver */ + +void backsubst(char ul, double A[], double b[], double x[], mwSize n, mwSize k) +{ + if (tolower(ul) == 'u') { + backsubst_U(A, b, x, n, k); + } + else if (tolower(ul) == 'l') { + backsubst_L(A, b, x, n, k); + } + else { + mexErrMsgTxt("Invalid triangular matrix type: must be ''U'' or ''L''"); + } +} + + +/* solve equation set using cholesky decomposition */ + +void cholsolve(char ul, double A[], double b[], double x[], mwSize n, mwSize k) +{ + double *tmp; + + tmp = mxMalloc(k*sizeof(double)); + + if (tolower(ul) == 'l') { + backsubst_L(A, b, tmp, n, k); + backsubst_Lt(A, tmp, x, n, k); + } + else if (tolower(ul) == 'u') { + backsubst_Ut(A, b, tmp, n, k); + backsubst_U(A, tmp, x, n, k); + } + else { + mexErrMsgTxt("Invalid triangular matrix type: must be either ''U'' or ''L''"); + } + + mxFree(tmp); +} + + +/* perform a permutation assignment y := x(ind(1:k)) */ + +void vec_assign(double y[], double x[], mwIndex ind[], mwSize k) +{ + mwIndex i; + + for (i=0; i<k; ++i) + y[i] = x[ind[i]]; +} + + +/* matrix transpose */ + +void transpose(double X[], double Y[], mwSize n, mwSize m) +{ + mwIndex i, j, i_m, j_n; + + if (n<m) { + for (j=0; j<m; ++j) { + j_n = j*n; + for (i=0; i<n; ++i) { + Y[j+i*m] = X[i+j_n]; + } + } + } + else { + for (i=0; i<n; ++i) { + i_m = i*m; + for (j=0; j<m; ++j) { + Y[j+i_m] = X[i+j*n]; + } + } + } +} + + +/* print contents of matrix */ + +void printmat(double A[], int n, int m, char* matname) +{ + int i, j; + mexPrintf("\n%s = \n\n", matname); + + if (n*m==0) { + mexPrintf(" Empty matrix: %d-by-%d\n\n", n, m); + return; + } + + for (i=0; i<n; ++i) { + for (j=0; j<m; ++j) + mexPrintf(" %lf", A[j*n+i]); + mexPrintf("\n"); + } + mexPrintf("\n"); +} + + +/* print contents of sparse matrix */ + +void printspmat(mxArray *a, char* matname) +{ + mwIndex *aJc = mxGetJc(a); + mwIndex *aIr = mxGetIr(a); + double *aPr = mxGetPr(a); + + int i; + + mexPrintf("\n%s = \n\n", matname); + + for (i=0; i<aJc[1]; ++i) + printf(" (%d,1) = %lf\n", aIr[i]+1,aPr[i]); + + mexPrintf("\n"); +} + + + +/* matrix multiplication using Winograd's algorithm */ + +/* +void mat_mat2(double alpha, double A[], double B[], double X[], mwSize n, mwSize m, mwSize k) +{ + + mwIndex i1, i2, i3, iX, iA, i2_n; + double b, *AA, *BB; + + AA = mxCalloc(n,sizeof(double)); + BB = mxCalloc(k,sizeof(double)); + + for (i1=0; i1<n*k; i1++) { + X[i1] = 0; + } + + for (i1=0; i1<n; ++i1) { + for (i2=0; i2<m/2; ++i2) { + AA[i1] += A[i1+2*i2*n]*A[i1+(2*i2+1)*n]; + } + } + + for (i2=0; i2<k; ++i2) { + for (i1=0; i1<m/2; ++i1) { + BB[i2] += B[2*i1+i2*m]*B[2*i1+1+i2*m]; + } + } + + for (i2=0; i2<k; ++i2) { + for (i3=0; i3<m/2; ++i3) { + for (i1=0; i1<n; ++i1) { + X[i1+i2*n] += (A[i1+(2*i3)*n]+B[2*i3+1+i2*m])*(A[i1+(2*i3+1)*n]+B[2*i3+i2*m]); + } + } + } + + if (m%2) { + for (i2=0; i2<k; ++i2) { + for (i1=0; i1<n; ++i1) { + X[i1+i2*n] += A[i1+(m-1)*n]*B[m-1+i2*m]; + } + } + } + + for (i2=0; i2<k; ++i2) { + for (i1=0; i1<n; ++i1) { + X[i1+i2*n] -= (AA[i1] + BB[i2]); + X[i1+i2*n] *= alpha; + } + } + + mxFree(AA); + mxFree(BB); +} +*/ + + + +