Mercurial > hg > nnls-chroma
changeset 29:da3195577172 matthiasm-plugin
* Revert to the previous version of nnls.c[c], which used single-precision floats (better for us).
But we don't like the static local variables -- change those for thread-safety.
author | Chris Cannam |
---|---|
date | Thu, 21 Oct 2010 20:50:22 +0100 |
parents | 52b6dbd61553 |
children | 608b0c8ad3f8 9fe53dd1c41a |
files | Makefile.cc-linux NNLSChroma.cpp nnls.c nnls.h |
diffstat | 4 files changed, 540 insertions(+), 671 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile.cc-linux Thu Oct 21 20:19:29 2010 +0100 +++ b/Makefile.cc-linux Thu Oct 21 20:50:22 2010 +0100 @@ -27,7 +27,7 @@ VAMP_SDK_DIR = ../vamp-plugin-sdk -ARCHFLAGS = -g +ARCHFLAGS = -O3 -ftree-vectorize -ffast-math CFLAGS = $(ARCHFLAGS) -I$(VAMP_SDK_DIR) -Wall -fPIC CXXFLAGS = $(ARCHFLAGS) -I$(VAMP_SDK_DIR) -Wall -fPIC
--- a/NNLSChroma.cpp Thu Oct 21 20:19:29 2010 +0100 +++ b/NNLSChroma.cpp Thu Oct 21 20:50:22 2010 +0100 @@ -827,7 +827,7 @@ f6.hasTimestamp = true; f6.timestamp = f2.timestamp; - double b[256]; + float b[256]; bool some_b_greater_zero = false; float sumb = 0; @@ -861,7 +861,7 @@ } } else { - double x[84+1000]; + float x[84+1000]; for (int i = 1; i < 1084; ++i) x[i] = 1.0; vector<int> signifIndex; int index=0; @@ -875,22 +875,20 @@ f3.values.push_back(0); // fill the values, change later index++; } - double rnorm; - double w[84+1000]; - double zz[84+1000]; + float rnorm; + float w[84+1000]; + float zz[84+1000]; int indx[84+1000]; int mode; int dictsize = 256*signifIndex.size(); // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl; - double *curr_dict = new double[dictsize]; + float *curr_dict = new float[dictsize]; for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { for (unsigned iBin = 0; iBin < 256; iBin++) { curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin]; } } - int sz = signifIndex.size(); - int nn = nNote; - NNLS(curr_dict, &nn, &nn, &sz, b, x, &rnorm, w, zz, indx, &mode); + nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); delete [] curr_dict; for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) { f3.values[signifIndex[iNote]] = x[iNote];
--- a/nnls.c Thu Oct 21 20:19:29 2010 +0100 +++ b/nnls.c Thu Oct 21 20:50:22 2010 +0100 @@ -1,705 +1,578 @@ - #include "nnls.h" -typedef int integer; -typedef unsigned int uinteger; -typedef char *address; -typedef short int shortint; -typedef float real; -typedef double doublereal; +#include <math.h> -#define abs(x) ((x) >= 0 ? (x) : -(x)) -#define dabs(x) (doublereal)abs(x) -#define min(a,b) ((a) <= (b) ? (a) : (b)) -#define max(a,b) ((a) >= (b) ? (a) : (b)) -#define dmin(a,b) (doublereal)min(a,b) -#define dmax(a,b) (doublereal)max(a,b) -#define bit_test(a,b) ((a) >> (b) & 1) -#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) -#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) +#define nnls_max(a,b) ((a) >= (b) ? (a) : (b)) +#define nnls_abs(x) ((x) >= 0 ? (x) : -(x)) -double d_sign(doublereal *a, doublereal *b) +float d_sign(float a, float b) { - double x; - x = (*a >= 0 ? *a : - *a); - return (*b >= 0 ? x : -x); + float x; + x = (a >= 0 ? a : - a); + return (b >= 0 ? x : -x); } /* Table of constant values */ -static integer c__1 = 1; -static integer c__0 = 0; -static integer c__2 = 2; +int c__1 = 1; +int c__0 = 0; +int c__2 = 2; -doublereal diff_(doublereal *x, doublereal *y) + +int g1(float* a, float* b, float* cterm, float* sterm, float* sig) { - /* System generated locals */ - doublereal ret_val; + /* System generated locals */ + float d; + float xr, yr; -/* Function used in tests that depend on machine precision. */ -/* The original version of this code was developed by */ -/* Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory */ -/* 1973 JUN 7, and published in the book */ -/* "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974. */ -/* Revised FEB 1995 to accompany reprinting of the book by SIAM. */ - - ret_val = *x - *y; - return ret_val; -} /* diff_ */ - -/* Subroutine */ int g1_(doublereal *a, doublereal *b, doublereal *cterm, - doublereal *sterm, doublereal *sig) -{ - /* System generated locals */ - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static doublereal xr, yr; - - -/* COMPUTE ORTHOGONAL ROTATION MATRIX.. */ - -/* The original version of this code was developed by */ -/* Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory */ -/* 1973 JUN 12, and published in the book */ -/* "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974. */ -/* Revised FEB 1995 to accompany reprinting of the book by SIAM. */ - -/* COMPUTE.. MATRIX (C, S) SO THAT (C, S)(A) = (SQRT(A**2+B**2)) */ -/* (-S,C) (-S,C)(B) ( 0 ) */ -/* COMPUTE SIG = SQRT(A**2+B**2) */ -/* SIG IS COMPUTED LAST TO ALLOW FOR THE POSSIBILITY THAT */ -/* SIG MAY BE IN THE SAME LOCATION AS A OR B . */ -/* ------------------------------------------------------------------ */ -/* ------------------------------------------------------------------ */ - if (abs(*a) > abs(*b)) { - xr = *b / *a; -/* Computing 2nd power */ - d__1 = xr; - yr = sqrt(d__1 * d__1 + 1.); - d__1 = 1. / yr; - *cterm = d_sign(&d__1, a); - *sterm = *cterm * xr; - *sig = abs(*a) * yr; - return 0; - } - if (*b != 0.) { - xr = *a / *b; -/* Computing 2nd power */ - d__1 = xr; - yr = sqrt(d__1 * d__1 + 1.); - d__1 = 1. / yr; - *sterm = d_sign(&d__1, b); - *cterm = *sterm * xr; - *sig = abs(*b) * yr; - return 0; - } - *sig = 0.; - *cterm = 0.; - *sterm = 1.; + if (nnls_abs(*a) > nnls_abs(*b)) { + xr = *b / *a; + /* Computing 2nd power */ + d = xr; + yr = sqrt(d * d + 1.); + d = 1. / yr; + *cterm = d_sign(d, *a); + *sterm = *cterm * xr; + *sig = nnls_abs(*a) * yr; return 0; + } + if (*b != 0.) { + xr = *a / *b; + /* Computing 2nd power */ + d = xr; + yr = sqrt(d * d + 1.); + d = 1. / yr; + *sterm = d_sign(d, *b); + *cterm = *sterm * xr; + *sig = nnls_abs(*b) * yr; + return 0; + } + *sig = 0.; + *cterm = 0.; + *sterm = 1.; + return 0; } /* g1_ */ -/* SUBROUTINE H12 (MODE,LPIVOT,L1,M,U,IUE,UP,C,ICE,ICV,NCV) */ +int h12(int mode, int* lpivot, int* l1, + int m, float* u, int* iue, float* up, float* c__, + int* ice, int* icv, int* ncv) +{ + /* System generated locals */ + int u_dim1, u_offset, idx1, idx2; + float d, d2; -/* CONSTRUCTION AND/OR APPLICATION OF A SINGLE */ -/* HOUSEHOLDER TRANSFORMATION.. Q = I + U*(U**T)/B */ + /* Local variables */ + int incr; + float b; + int i__, j; + float clinv; + int i2, i3, i4; + float cl, sm; -/* The original version of this code was developed by */ -/* Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory */ -/* 1973 JUN 12, and published in the book */ -/* "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974. */ -/* Revised FEB 1995 to accompany reprinting of the book by SIAM. */ -/* ------------------------------------------------------------------ */ -/* Subroutine Arguments */ + /* ------------------------------------------------------------------ + */ + /* float precision U(IUE,M) */ + /* ------------------------------------------------------------------ + */ + /* Parameter adjustments */ + u_dim1 = *iue; + u_offset = u_dim1 + 1; + u -= u_offset; + --c__; -/* MODE = 1 OR 2 Selects Algorithm H1 to construct and apply a */ -/* Householder transformation, or Algorithm H2 to apply a */ -/* previously constructed transformation. */ -/* LPIVOT IS THE INDEX OF THE PIVOT ELEMENT. */ -/* L1,M IF L1 .LE. M THE TRANSFORMATION WILL BE CONSTRUCTED TO */ -/* ZERO ELEMENTS INDEXED FROM L1 THROUGH M. IF L1 GT. M */ -/* THE SUBROUTINE DOES AN IDENTITY TRANSFORMATION. */ -/* U(),IUE,UP On entry with MODE = 1, U() contains the pivot */ -/* vector. IUE is the storage increment between elements. */ -/* On exit when MODE = 1, U() and UP contain quantities */ -/* defining the vector U of the Householder transformation. */ -/* on entry with MODE = 2, U() and UP should contain */ -/* quantities previously computed with MODE = 1. These will */ -/* not be modified during the entry with MODE = 2. */ -/* C() ON ENTRY with MODE = 1 or 2, C() CONTAINS A MATRIX WHICH */ -/* WILL BE REGARDED AS A SET OF VECTORS TO WHICH THE */ -/* HOUSEHOLDER TRANSFORMATION IS TO BE APPLIED. */ -/* ON EXIT C() CONTAINS THE SET OF TRANSFORMED VECTORS. */ -/* ICE STORAGE INCREMENT BETWEEN ELEMENTS OF VECTORS IN C(). */ -/* ICV STORAGE INCREMENT BETWEEN VECTORS IN C(). */ -/* NCV NUMBER OF VECTORS IN C() TO BE TRANSFORMED. IF NCV .LE. 0 */ -/* NO OPERATIONS WILL BE DONE ON C(). */ -/* ------------------------------------------------------------------ */ -/* Subroutine */ int h12_(integer *mode, integer *lpivot, integer *l1, - integer *m, doublereal *u, integer *iue, doublereal *up, doublereal * - c__, integer *ice, integer *icv, integer *ncv) + /* Function Body */ + if (0 >= *lpivot || *lpivot >= *l1 || *l1 > m) { + return 0; + } + cl = (d = u[*lpivot * u_dim1 + 1], nnls_abs(d)); + if (mode == 2) { + goto L60; + } + /* ****** CONSTRUCT THE TRANSFORMATION. ****** + */ + idx1 = m; + for (j = *l1; j <= idx1; ++j) { + /* L10: */ + /* Computing MAX */ + d2 = (d = u[j * u_dim1 + 1], nnls_abs(d)); + cl = nnls_max(d2,cl); + } + if (cl <= 0.) { + goto L130; + } else { + goto L20; + } + L20: + clinv = 1. / cl; + /* Computing 2nd power */ + d = u[*lpivot * u_dim1 + 1] * clinv; + sm = d * d; + idx1 = m; + for (j = *l1; j <= idx1; ++j) { + /* L30: */ + /* Computing 2nd power */ + d = u[j * u_dim1 + 1] * clinv; + sm += d * d; + } + cl *= sqrt(sm); + if (u[*lpivot * u_dim1 + 1] <= 0.) { + goto L50; + } else { + goto L40; + } + L40: + cl = -cl; + L50: + *up = u[*lpivot * u_dim1 + 1] - cl; + u[*lpivot * u_dim1 + 1] = cl; + goto L70; + /* ****** APPLY THE TRANSFORMATION I+U*(U**T)/B TO C. ****** + */ + + L60: + if (cl <= 0.) { + goto L130; + } else { + goto L70; + } + L70: + if (*ncv <= 0) { + return 0; + } + b = *up * u[*lpivot * u_dim1 + 1]; + /* B MUST BE NONPOSITIVE HERE. IF B = 0., RETURN. + */ + + if (b >= 0.) { + goto L130; + } else { + goto L80; + } + L80: + b = 1. / b; + i2 = 1 - *icv + *ice * (*lpivot - 1); + incr = *ice * (*l1 - *lpivot); + idx1 = *ncv; + for (j = 1; j <= idx1; ++j) { + i2 += *icv; + i3 = i2 + incr; + i4 = i3; + sm = c__[i2] * *up; + idx2 = m; + for (i__ = *l1; i__ <= idx2; ++i__) { + sm += c__[i3] * u[i__ * u_dim1 + 1]; + /* L90: */ + i3 += *ice; + } + if (sm != 0.) { + goto L100; + } else { + goto L120; + } + L100: + sm *= b; + c__[i2] += sm * *up; + idx2 = m; + for (i__ = *l1; i__ <= idx2; ++i__) { + c__[i4] += sm * u[i__ * u_dim1 + 1]; + /* L110: */ + i4 += *ice; + } + L120: + ; + } + L130: + return 0; +} /* h12 */ + +int nnls(float* a, int mda, int m, int n, float* b, + float* x, float* rnorm, float* w, float* zz, int* index, + int* mode) { - /* System generated locals */ - integer u_dim1, u_offset, i__1, i__2; - doublereal d__1, d__2; + /* System generated locals */ + int a_dim1, a_offset, idx1, idx2; + float d1, d2; - /* Builtin functions */ - double sqrt(doublereal); - /* Local variables */ - static doublereal b; - static integer i__, j, i2, i3, i4; - static doublereal cl, sm; - static integer incr; - static doublereal clinv; + /* Local variables */ + int iter; + float temp, wmax; + int i__, j, l; + float t, alpha, asave; + int itmax, izmax, nsetp; + float unorm, ztest, cc; + float dummy[2]; + int ii, jj, ip; + float sm; + int iz, jz; + float up, ss; + int rtnkey, iz1, iz2, npp1; -/* ------------------------------------------------------------------ */ -/* double precision U(IUE,M) */ -/* ------------------------------------------------------------------ */ - /* Parameter adjustments */ - u_dim1 = *iue; - u_offset = 1 + u_dim1; - u -= u_offset; - --c__; + /* ------------------------------------------------------------------ + */ + /* int INDEX(N) */ + /* float precision A(MDA,N), B(M), W(N), X(N), ZZ(M) */ + /* ------------------------------------------------------------------ + */ + /* Parameter adjustments */ + a_dim1 = mda; + a_offset = a_dim1 + 1; + a -= a_offset; + --b; + --x; + --w; + --zz; + --index; - /* Function Body */ - if (0 >= *lpivot || *lpivot >= *l1 || *l1 > *m) { - return 0; + /* Function Body */ + *mode = 1; + if (m <= 0 || n <= 0) { + *mode = 2; + return 0; + } + iter = 0; + itmax = n * 3; + + /* INITIALIZE THE ARRAYS INDEX() AND X(). */ + + idx1 = n; + for (i__ = 1; i__ <= idx1; ++i__) { + x[i__] = 0.; + /* L20: */ + index[i__] = i__; + } + + iz2 = n; + iz1 = 1; + nsetp = 0; + npp1 = 1; + /* ****** MAIN LOOP BEGINS HERE ****** */ + L30: + /* QUIT IF ALL COEFFICIENTS ARE ALREADY IN THE SOLUTION. + */ + /* OR IF M COLS OF A HAVE BEEN TRIANGULARIZED. */ + + if (iz1 > iz2 || nsetp >= m) { + goto L350; + } + + /* COMPUTE COMPONENTS OF THE DUAL (NEGATIVE GRADIENT) VECTOR W(). + */ + + idx1 = iz2; + for (iz = iz1; iz <= idx1; ++iz) { + j = index[iz]; + sm = 0.; + idx2 = m; + for (l = npp1; l <= idx2; ++l) { + /* L40: */ + sm += a[l + j * a_dim1] * b[l]; } - cl = (d__1 = u[*lpivot * u_dim1 + 1], abs(d__1)); - if (*mode == 2) { - goto L60; + w[j] = sm; + /* L50: */ + } + /* FIND LARGEST POSITIVE W(J). */ + L60: + wmax = 0.; + idx1 = iz2; + for (iz = iz1; iz <= idx1; ++iz) { + j = index[iz]; + if (w[j] > wmax) { + wmax = w[j]; + izmax = iz; } -/* ****** CONSTRUCT THE TRANSFORMATION. ****** */ - i__1 = *m; - for (j = *l1; j <= i__1; ++j) { -/* L10: */ -/* Computing MAX */ - d__2 = (d__1 = u[j * u_dim1 + 1], abs(d__1)); - cl = max(d__2,cl); + /* L70: */ + } + + /* IF WMAX .LE. 0. GO TO TERMINATION. */ + /* THIS INDICATES SATISFACTION OF THE KUHN-TUCKER CONDITIONS. + */ + + if (wmax <= 0.) { + goto L350; + } + iz = izmax; + j = index[iz]; + + /* THE SIGN OF W(J) IS OK FOR J TO BE MOVED TO SET P. */ + /* BEGIN THE TRANSFORMATION AND CHECK NEW DIAGONAL ELEMENT TO AVOID */ + /* NEAR LINEAR DEPENDENCE. */ + + asave = a[npp1 + j * a_dim1]; + idx1 = npp1 + 1; + h12(c__1, &npp1, &idx1, m, &a[j * a_dim1 + 1], &c__1, &up, dummy, & + c__1, &c__1, &c__0); + unorm = 0.; + if (nsetp != 0) { + idx1 = nsetp; + for (l = 1; l <= idx1; ++l) { + /* L90: */ + /* Computing 2nd power */ + d1 = a[l + j * a_dim1]; + unorm += d1 * d1; } - if (cl <= 0.) { - goto L130; - } else { - goto L20; + } + unorm = sqrt(unorm); + d2 = unorm + (d1 = a[npp1 + j * a_dim1], nnls_abs(d1)) * .01; + if ((d2- unorm) > 0.) { + + /* COL J IS SUFFICIENTLY INDEPENDENT. COPY B INTO ZZ, UPDATE Z + Z */ + /* AND SOLVE FOR ZTEST ( = PROPOSED NEW VALUE FOR X(J) ). */ + + idx1 = m; + for (l = 1; l <= idx1; ++l) { + /* L120: */ + zz[l] = b[l]; } -L20: - clinv = 1. / cl; -/* Computing 2nd power */ - d__1 = u[*lpivot * u_dim1 + 1] * clinv; - sm = d__1 * d__1; - i__1 = *m; - for (j = *l1; j <= i__1; ++j) { -/* L30: */ -/* Computing 2nd power */ - d__1 = u[j * u_dim1 + 1] * clinv; - sm += d__1 * d__1; + idx1 = npp1 + 1; + h12(c__2, &npp1, &idx1, m, &a[j * a_dim1 + 1], &c__1, &up, (zz+1), & + c__1, &c__1, &c__1); + ztest = zz[npp1] / a[npp1 + j * a_dim1]; + + /* SEE IF ZTEST IS POSITIVE */ + + if (ztest > 0.) { + goto L140; } - cl *= sqrt(sm); - if (u[*lpivot * u_dim1 + 1] <= 0.) { - goto L50; - } else { - goto L40; + } + + /* REJECT J AS A CANDIDATE TO BE MOVED FROM SET Z TO SET P. */ + /* RESTORE A(NPP1,J), SET W(J)=0., AND LOOP BACK TO TEST DUAL */ + /* COEFFS AGAIN. */ + + a[npp1 + j * a_dim1] = asave; + w[j] = 0.; + goto L60; + + /* THE INDEX J=INDEX(IZ) HAS BEEN SELECTED TO BE MOVED FROM */ + /* SET Z TO SET P. UPDATE B, UPDATE INDICES, APPLY HOUSEHOLDER */ + /* TRANSFORMATIONS TO COLS IN NEW SET Z, ZERO SUBDIAGONAL ELTS IN */ + /* COL J, SET W(J)=0. */ + + L140: + idx1 = m; + for (l = 1; l <= idx1; ++l) { + /* L150: */ + b[l] = zz[l]; + } + + index[iz] = index[iz1]; + index[iz1] = j; + ++iz1; + nsetp = npp1; + ++npp1; + + if (iz1 <= iz2) { + idx1 = iz2; + for (jz = iz1; jz <= idx1; ++jz) { + jj = index[jz]; + h12(c__2, &nsetp, &npp1, m, + &a[j * a_dim1 + 1], &c__1, &up, + &a[jj * a_dim1 + 1], &c__1, &mda, &c__1); + /* L160: */ } -L40: - cl = -cl; -L50: - *up = u[*lpivot * u_dim1 + 1] - cl; - u[*lpivot * u_dim1 + 1] = cl; - goto L70; -/* ****** APPLY THE TRANSFORMATION I+U*(U**T)/B TO C. ****** */ + } -L60: - if (cl <= 0.) { - goto L130; - } else { - goto L70; + if (nsetp != m) { + idx1 = m; + for (l = npp1; l <= idx1; ++l) { + /* L180: */ + // SS: CHECK THIS DAMAGE.... + a[l + j * a_dim1] = 0.; } -L70: - if (*ncv <= 0) { - return 0; + } + + w[j] = 0.; + /* SOLVE THE TRIANGULAR SYSTEM. */ + /* STORE THE SOLUTION TEMPORARILY IN ZZ(). + */ + rtnkey = 1; + goto L400; + L200: + + /* ****** SECONDARY LOOP BEGINS HERE ****** */ + + /* ITERATION COUNTER. */ + + L210: + ++iter; + if (iter > itmax) { + *mode = 3; + goto L350; + } + + /* SEE IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE. */ + /* IF NOT COMPUTE ALPHA. */ + + alpha = 2.; + idx1 = nsetp; + for (ip = 1; ip <= idx1; ++ip) { + l = index[ip]; + if (zz[ip] <= 0.) { + t = -x[l] / (zz[ip] - x[l]); + if (alpha > t) { + alpha = t; + jj = ip; + } } - b = *up * u[*lpivot * u_dim1 + 1]; -/* B MUST BE NONPOSITIVE HERE. IF B = 0., RETURN. */ + /* L240: */ + } - if (b >= 0.) { - goto L130; - } else { - goto L80; + /* IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE THEN ALPHA WILL */ + /* STILL = 2. IF SO EXIT FROM SECONDARY LOOP TO MAIN LOOP. */ + + if (alpha == 2.) { + goto L330; + } + + /* OTHERWISE USE ALPHA WHICH WILL BE BETWEEN 0. AND 1. TO */ + /* INTERPOLATE BETWEEN THE OLD X AND THE NEW ZZ. */ + + idx1 = nsetp; + for (ip = 1; ip <= idx1; ++ip) { + l = index[ip]; + x[l] += alpha * (zz[ip] - x[l]); + /* L250: */ + } + + /* MODIFY A AND B AND THE INDEX ARRAYS TO MOVE COEFFICIENT I */ + /* FROM SET P TO SET Z. */ + + i__ = index[jj]; + L260: + x[i__] = 0.; + + if (jj != nsetp) { + ++jj; + idx1 = nsetp; + for (j = jj; j <= idx1; ++j) { + ii = index[j]; + index[j - 1] = ii; + g1(&a[j - 1 + ii * a_dim1], &a[j + ii * a_dim1], + &cc, &ss, &a[j - 1 + ii * a_dim1]); + // SS: CHECK THIS DAMAGE... + a[j + ii * a_dim1] = 0.; + idx2 = n; + for (l = 1; l <= idx2; ++l) { + if (l != ii) { + + /* Apply procedure G2 (CC,SS,A(J-1,L),A(J, + L)) */ + + temp = a[j - 1 + l * a_dim1]; + // SS: CHECK THIS DAMAGE + a[j - 1 + l * a_dim1] = cc * temp + ss * a[j + l * a_dim1]; + a[j + l * a_dim1] = -ss * temp + cc * a[j + l * a_dim1]; + } + /* L270: */ + } + + /* Apply procedure G2 (CC,SS,B(J-1),B(J)) */ + + temp = b[j - 1]; + b[j - 1] = cc * temp + ss * b[j]; + b[j] = -ss * temp + cc * b[j]; + /* L280: */ } -L80: - b = 1. / b; - i2 = 1 - *icv + *ice * (*lpivot - 1); - incr = *ice * (*l1 - *lpivot); - i__1 = *ncv; - for (j = 1; j <= i__1; ++j) { - i2 += *icv; - i3 = i2 + incr; - i4 = i3; - sm = c__[i2] * *up; - i__2 = *m; - for (i__ = *l1; i__ <= i__2; ++i__) { - sm += c__[i3] * u[i__ * u_dim1 + 1]; -/* L90: */ - i3 += *ice; - } - if (sm != 0.) { - goto L100; - } else { - goto L120; - } -L100: - sm *= b; - c__[i2] += sm * *up; - i__2 = *m; - for (i__ = *l1; i__ <= i__2; ++i__) { - c__[i4] += sm * u[i__ * u_dim1 + 1]; -/* L110: */ - i4 += *ice; - } -L120: - ; + } + + npp1 = nsetp; + --nsetp; + --iz1; + index[iz1] = i__; + + /* SEE IF THE REMAINING COEFFS IN SET P ARE FEASIBLE. THEY SHOULD + */ + /* BE BECAUSE OF THE WAY ALPHA WAS DETERMINED. */ + /* IF ANY ARE INFEASIBLE IT IS DUE TO ROUND-OFF ERROR. ANY */ + /* THAT ARE NONPOSITIVE WILL BE SET TO ZERO */ + /* AND MOVED FROM SET P TO SET Z. */ + + idx1 = nsetp; + for (jj = 1; jj <= idx1; ++jj) { + i__ = index[jj]; + if (x[i__] <= 0.) { + goto L260; } -L130: - return 0; -} /* h12_ */ + /* L300: */ + } -/* SUBROUTINE NNLS (A,MDA,M,N,B,X,RNORM,W,ZZ,INDEX,MODE) */ + /* COPY B( ) INTO ZZ( ). THEN SOLVE AGAIN AND LOOP BACK. */ -/* Algorithm NNLS: NONNEGATIVE LEAST SQUARES */ + idx1 = m; + for (i__ = 1; i__ <= idx1; ++i__) { + /* L310: */ + zz[i__] = b[i__]; + } + rtnkey = 2; + goto L400; + L320: + goto L210; + /* ****** END OF SECONDARY LOOP ****** */ -/* The original version of this code was developed by */ -/* Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory */ -/* 1973 JUN 15, and published in the book */ -/* "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974. */ -/* Revised FEB 1995 to accompany reprinting of the book by SIAM. */ + L330: + idx1 = nsetp; + for (ip = 1; ip <= idx1; ++ip) { + i__ = index[ip]; + /* L340: */ + x[i__] = zz[ip]; + } + /* ALL NEW COEFFS ARE POSITIVE. LOOP BACK TO BEGINNING. */ + goto L30; -/* GIVEN AN M BY N MATRIX, A, AND AN M-VECTOR, B, COMPUTE AN */ -/* N-VECTOR, X, THAT SOLVES THE LEAST SQUARES PROBLEM */ + /* ****** END OF MAIN LOOP ****** */ -/* A * X = B SUBJECT TO X .GE. 0 */ -/* ------------------------------------------------------------------ */ -/* Subroutine Arguments */ + /* COME TO HERE FOR TERMINATION. */ + /* COMPUTE THE NORM OF THE FINAL RESIDUAL VECTOR. */ -/* A(),MDA,M,N MDA IS THE FIRST DIMENSIONING PARAMETER FOR THE */ -/* ARRAY, A(). ON ENTRY A() CONTAINS THE M BY N */ -/* MATRIX, A. ON EXIT A() CONTAINS */ -/* THE PRODUCT MATRIX, Q*A , WHERE Q IS AN */ -/* M BY M ORTHOGONAL MATRIX GENERATED IMPLICITLY BY */ -/* THIS SUBROUTINE. */ -/* B() ON ENTRY B() CONTAINS THE M-VECTOR, B. ON EXIT B() CON- */ -/* TAINS Q*B. */ -/* X() ON ENTRY X() NEED NOT BE INITIALIZED. ON EXIT X() WILL */ -/* CONTAIN THE SOLUTION VECTOR. */ -/* RNORM ON EXIT RNORM CONTAINS THE EUCLIDEAN NORM OF THE */ -/* RESIDUAL VECTOR. */ -/* W() AN N-ARRAY OF WORKING SPACE. ON EXIT W() WILL CONTAIN */ -/* THE DUAL SOLUTION VECTOR. W WILL SATISFY W(I) = 0. */ -/* FOR ALL I IN SET P AND W(I) .LE. 0. FOR ALL I IN SET Z */ -/* ZZ() AN M-ARRAY OF WORKING SPACE. */ -/* INDEX() AN INTEGER WORKING ARRAY OF LENGTH AT LEAST N. */ -/* ON EXIT THE CONTENTS OF THIS ARRAY DEFINE THE SETS */ -/* P AND Z AS FOLLOWS.. */ + L350: + sm = 0.; + if (npp1 <= m) { + idx1 = m; + for (i__ = npp1; i__ <= idx1; ++i__) { + /* L360: */ + /* Computing 2nd power */ + d1 = b[i__]; + sm += d1 * d1; + } + } else { + idx1 = n; + for (j = 1; j <= idx1; ++j) { + /* L380: */ + w[j] = 0.; + } + } + *rnorm = sqrt(sm); + return 0; -/* INDEX(1) THRU INDEX(NSETP) = SET P. */ -/* INDEX(IZ1) THRU INDEX(IZ2) = SET Z. */ -/* IZ1 = NSETP + 1 = NPP1 */ -/* IZ2 = N */ -/* MODE THIS IS A SUCCESS-FAILURE FLAG WITH THE FOLLOWING */ -/* MEANINGS. */ -/* 1 THE SOLUTION HAS BEEN COMPUTED SUCCESSFULLY. */ -/* 2 THE DIMENSIONS OF THE PROBLEM ARE BAD. */ -/* EITHER M .LE. 0 OR N .LE. 0. */ -/* 3 ITERATION COUNT EXCEEDED. MORE THAN 3*N ITERATIONS. */ + /* THE FOLLOWING BLOCK OF CODE IS USED AS AN INTERNAL SUBROUTINE */ + /* TO SOLVE THE TRIANGULAR SYSTEM, PUTTING THE SOLUTION IN ZZ(). */ -/* ------------------------------------------------------------------ */ -/* Subroutine */ int nnls_(doublereal *a, integer *mda, integer *m, integer * - n, doublereal *b, doublereal *x, doublereal *rnorm, doublereal *w, - doublereal *zz, integer *index, integer *mode) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublereal d__1, d__2; + L400: + idx1 = nsetp; + for (l = 1; l <= idx1; ++l) { + ip = nsetp + 1 - l; + if (l != 1) { + idx2 = ip; + for (ii = 1; ii <= idx2; ++ii) { + zz[ii] -= a[ii + jj * a_dim1] * zz[ip + 1]; + /* L410: */ + } + } + jj = index[ip]; + zz[ip] /= a[ip + jj * a_dim1]; + /* L430: */ + } + switch ((int)rtnkey) { + case 1: goto L200; + case 2: goto L320; + } - /* Builtin functions */ - double sqrt(doublereal); + return 0; - /* Local variables */ - static integer i__, j, l; - static doublereal t; - extern /* Subroutine */ int g1_(doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *); - static doublereal cc; - extern /* Subroutine */ int h12_(integer *, integer *, integer *, integer - *, doublereal *, integer *, doublereal *, doublereal *, integer *, - integer *, integer *); - static integer ii, jj, ip; - static doublereal sm; - static integer iz, jz; - static doublereal up, ss; - static integer iz1, iz2, npp1; - extern doublereal diff_(doublereal *, doublereal *); - static integer iter; - static doublereal temp, wmax, alpha, asave; - static integer itmax, izmax, nsetp; - static doublereal dummy, unorm, ztest; - static integer rtnkey; - -/* ------------------------------------------------------------------ */ -/* integer INDEX(N) */ -/* double precision A(MDA,N), B(M), W(N), X(N), ZZ(M) */ -/* ------------------------------------------------------------------ */ - /* Parameter adjustments */ - a_dim1 = *mda; - a_offset = 1 + a_dim1; - a -= a_offset; - --b; - --x; - --w; - --zz; - --index; - - /* Function Body */ - *mode = 1; - if (*m <= 0 || *n <= 0) { - *mode = 2; - return 0; - } - iter = 0; - itmax = *n * 3; - -/* INITIALIZE THE ARRAYS INDEX() AND X(). */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - x[i__] = 0.; -/* L20: */ - index[i__] = i__; - } - - iz2 = *n; - iz1 = 1; - nsetp = 0; - npp1 = 1; -/* ****** MAIN LOOP BEGINS HERE ****** */ -L30: -/* QUIT IF ALL COEFFICIENTS ARE ALREADY IN THE SOLUTION. */ -/* OR IF M COLS OF A HAVE BEEN TRIANGULARIZED. */ - - if (iz1 > iz2 || nsetp >= *m) { - goto L350; - } - -/* COMPUTE COMPONENTS OF THE DUAL (NEGATIVE GRADIENT) VECTOR W(). */ - - i__1 = iz2; - for (iz = iz1; iz <= i__1; ++iz) { - j = index[iz]; - sm = 0.; - i__2 = *m; - for (l = npp1; l <= i__2; ++l) { -/* L40: */ - sm += a[l + j * a_dim1] * b[l]; - } - w[j] = sm; -/* L50: */ - } -/* FIND LARGEST POSITIVE W(J). */ -L60: - wmax = 0.; - i__1 = iz2; - for (iz = iz1; iz <= i__1; ++iz) { - j = index[iz]; - if (w[j] > wmax) { - wmax = w[j]; - izmax = iz; - } -/* L70: */ - } - -/* IF WMAX .LE. 0. GO TO TERMINATION. */ -/* THIS INDICATES SATISFACTION OF THE KUHN-TUCKER CONDITIONS. */ - - if (wmax <= 0.) { - goto L350; - } - iz = izmax; - j = index[iz]; - -/* THE SIGN OF W(J) IS OK FOR J TO BE MOVED TO SET P. */ -/* BEGIN THE TRANSFORMATION AND CHECK NEW DIAGONAL ELEMENT TO AVOID */ -/* NEAR LINEAR DEPENDENCE. */ - - asave = a[npp1 + j * a_dim1]; - i__1 = npp1 + 1; - h12_(&c__1, &npp1, &i__1, m, &a[j * a_dim1 + 1], &c__1, &up, &dummy, & - c__1, &c__1, &c__0); - unorm = 0.; - if (nsetp != 0) { - i__1 = nsetp; - for (l = 1; l <= i__1; ++l) { -/* L90: */ -/* Computing 2nd power */ - d__1 = a[l + j * a_dim1]; - unorm += d__1 * d__1; - } - } - unorm = sqrt(unorm); - d__2 = unorm + (d__1 = a[npp1 + j * a_dim1], abs(d__1)) * .01; - if (diff_(&d__2, &unorm) > 0.) { - -/* COL J IS SUFFICIENTLY INDEPENDENT. COPY B INTO ZZ, UPDATE ZZ */ -/* AND SOLVE FOR ZTEST ( = PROPOSED NEW VALUE FOR X(J) ). */ - - i__1 = *m; - for (l = 1; l <= i__1; ++l) { -/* L120: */ - zz[l] = b[l]; - } - i__1 = npp1 + 1; - h12_(&c__2, &npp1, &i__1, m, &a[j * a_dim1 + 1], &c__1, &up, &zz[1], & - c__1, &c__1, &c__1); - ztest = zz[npp1] / a[npp1 + j * a_dim1]; - -/* SEE IF ZTEST IS POSITIVE */ - - if (ztest > 0.) { - goto L140; - } - } - -/* REJECT J AS A CANDIDATE TO BE MOVED FROM SET Z TO SET P. */ -/* RESTORE A(NPP1,J), SET W(J)=0., AND LOOP BACK TO TEST DUAL */ -/* COEFFS AGAIN. */ - - a[npp1 + j * a_dim1] = asave; - w[j] = 0.; - goto L60; - -/* THE INDEX J=INDEX(IZ) HAS BEEN SELECTED TO BE MOVED FROM */ -/* SET Z TO SET P. UPDATE B, UPDATE INDICES, APPLY HOUSEHOLDER */ -/* TRANSFORMATIONS TO COLS IN NEW SET Z, ZERO SUBDIAGONAL ELTS IN */ -/* COL J, SET W(J)=0. */ - -L140: - i__1 = *m; - for (l = 1; l <= i__1; ++l) { -/* L150: */ - b[l] = zz[l]; - } - - index[iz] = index[iz1]; - index[iz1] = j; - ++iz1; - nsetp = npp1; - ++npp1; - - if (iz1 <= iz2) { - i__1 = iz2; - for (jz = iz1; jz <= i__1; ++jz) { - jj = index[jz]; - h12_(&c__2, &nsetp, &npp1, m, &a[j * a_dim1 + 1], &c__1, &up, &a[ - jj * a_dim1 + 1], &c__1, mda, &c__1); -/* L160: */ - } - } - - if (nsetp != *m) { - i__1 = *m; - for (l = npp1; l <= i__1; ++l) { -/* L180: */ - a[l + j * a_dim1] = 0.; - } - } - - w[j] = 0.; -/* SOLVE THE TRIANGULAR SYSTEM. */ -/* STORE THE SOLUTION TEMPORARILY IN ZZ(). */ - rtnkey = 1; - goto L400; -L200: - -/* ****** SECONDARY LOOP BEGINS HERE ****** */ - -/* ITERATION COUNTER. */ - -L210: - ++iter; - if (iter > itmax) { - *mode = 3; -/* write (*,'(/a)') ' NNLS quitting on iteration count.' */ - goto L350; - } - -/* SEE IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE. */ -/* IF NOT COMPUTE ALPHA. */ - - alpha = 2.; - i__1 = nsetp; - for (ip = 1; ip <= i__1; ++ip) { - l = index[ip]; - if (zz[ip] <= 0.) { - t = -x[l] / (zz[ip] - x[l]); - if (alpha > t) { - alpha = t; - jj = ip; - } - } -/* L240: */ - } - -/* IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE THEN ALPHA WILL */ -/* STILL = 2. IF SO EXIT FROM SECONDARY LOOP TO MAIN LOOP. */ - - if (alpha == 2.) { - goto L330; - } - -/* OTHERWISE USE ALPHA WHICH WILL BE BETWEEN 0. AND 1. TO */ -/* INTERPOLATE BETWEEN THE OLD X AND THE NEW ZZ. */ - - i__1 = nsetp; - for (ip = 1; ip <= i__1; ++ip) { - l = index[ip]; - x[l] += alpha * (zz[ip] - x[l]); -/* L250: */ - } - -/* MODIFY A AND B AND THE INDEX ARRAYS TO MOVE COEFFICIENT I */ -/* FROM SET P TO SET Z. */ - - i__ = index[jj]; -L260: - x[i__] = 0.; - - if (jj != nsetp) { - ++jj; - i__1 = nsetp; - for (j = jj; j <= i__1; ++j) { - ii = index[j]; - index[j - 1] = ii; - g1_(&a[j - 1 + ii * a_dim1], &a[j + ii * a_dim1], &cc, &ss, &a[j - - 1 + ii * a_dim1]); - a[j + ii * a_dim1] = 0.; - i__2 = *n; - for (l = 1; l <= i__2; ++l) { - if (l != ii) { - -/* Apply procedure G2 (CC,SS,A(J-1,L),A(J,L)) */ - - temp = a[j - 1 + l * a_dim1]; - a[j - 1 + l * a_dim1] = cc * temp + ss * a[j + l * a_dim1] - ; - a[j + l * a_dim1] = -ss * temp + cc * a[j + l * a_dim1]; - } -/* L270: */ - } - -/* Apply procedure G2 (CC,SS,B(J-1),B(J)) */ - - temp = b[j - 1]; - b[j - 1] = cc * temp + ss * b[j]; - b[j] = -ss * temp + cc * b[j]; -/* L280: */ - } - } - - npp1 = nsetp; - --nsetp; - --iz1; - index[iz1] = i__; - -/* SEE IF THE REMAINING COEFFS IN SET P ARE FEASIBLE. THEY SHOULD */ -/* BE BECAUSE OF THE WAY ALPHA WAS DETERMINED. */ -/* IF ANY ARE INFEASIBLE IT IS DUE TO ROUND-OFF ERROR. ANY */ -/* THAT ARE NONPOSITIVE WILL BE SET TO ZERO */ -/* AND MOVED FROM SET P TO SET Z. */ - - i__1 = nsetp; - for (jj = 1; jj <= i__1; ++jj) { - i__ = index[jj]; - if (x[i__] <= 0.) { - goto L260; - } -/* L300: */ - } - -/* COPY B( ) INTO ZZ( ). THEN SOLVE AGAIN AND LOOP BACK. */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { -/* L310: */ - zz[i__] = b[i__]; - } - rtnkey = 2; - goto L400; -L320: - goto L210; -/* ****** END OF SECONDARY LOOP ****** */ - -L330: - i__1 = nsetp; - for (ip = 1; ip <= i__1; ++ip) { - i__ = index[ip]; -/* L340: */ - x[i__] = zz[ip]; - } -/* ALL NEW COEFFS ARE POSITIVE. LOOP BACK TO BEGINNING. */ - goto L30; - -/* ****** END OF MAIN LOOP ****** */ - -/* COME TO HERE FOR TERMINATION. */ -/* COMPUTE THE NORM OF THE FINAL RESIDUAL VECTOR. */ - -L350: - sm = 0.; - if (npp1 <= *m) { - i__1 = *m; - for (i__ = npp1; i__ <= i__1; ++i__) { -/* L360: */ -/* Computing 2nd power */ - d__1 = b[i__]; - sm += d__1 * d__1; - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* L380: */ - w[j] = 0.; - } - } - *rnorm = sqrt(sm); - return 0; - -/* THE FOLLOWING BLOCK OF CODE IS USED AS AN INTERNAL SUBROUTINE */ -/* TO SOLVE THE TRIANGULAR SYSTEM, PUTTING THE SOLUTION IN ZZ(). */ - -L400: - i__1 = nsetp; - for (l = 1; l <= i__1; ++l) { - ip = nsetp + 1 - l; - if (l != 1) { - i__2 = ip; - for (ii = 1; ii <= i__2; ++ii) { - zz[ii] -= a[ii + jj * a_dim1] * zz[ip + 1]; -/* L410: */ - } - } - jj = index[ip]; - zz[ip] /= a[ip + jj * a_dim1]; -/* L430: */ - } - switch (rtnkey) { - case 1: goto L200; - case 2: goto L320; - } - return 0; } /* nnls_ */ +
--- a/nnls.h Thu Oct 21 20:19:29 2010 +0100 +++ b/nnls.h Thu Oct 21 20:50:22 2010 +0100 @@ -5,11 +5,9 @@ extern "C" { #endif -int nnls_(double *a, int *mda, int *m, int *n, - double *b, double *x, double *rnorm, - double *w, double *zz, int *index, int *mode); - -#define NNLS nnls_ +int nnls(float *a, int mda, int m, int n, + float *b, float *x, float *rnorm, + float *w, float *zz, int *index, int *mode); #ifdef __cplusplus }