changeset 29:da3195577172 matthiasm-plugin

* Revert to the previous version of nnls.c[c], which used single-precision floats (better for us). But we don't like the static local variables -- change those for thread-safety.
author Chris Cannam
date Thu, 21 Oct 2010 20:50:22 +0100
parents 52b6dbd61553
children 608b0c8ad3f8 9fe53dd1c41a
files Makefile.cc-linux NNLSChroma.cpp nnls.c nnls.h
diffstat 4 files changed, 540 insertions(+), 671 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile.cc-linux	Thu Oct 21 20:19:29 2010 +0100
+++ b/Makefile.cc-linux	Thu Oct 21 20:50:22 2010 +0100
@@ -27,7 +27,7 @@
 VAMP_SDK_DIR = ../vamp-plugin-sdk
 
 
-ARCHFLAGS = -g
+ARCHFLAGS = -O3 -ftree-vectorize -ffast-math
 
 CFLAGS = $(ARCHFLAGS) -I$(VAMP_SDK_DIR) -Wall -fPIC
 CXXFLAGS = $(ARCHFLAGS) -I$(VAMP_SDK_DIR) -Wall -fPIC
--- a/NNLSChroma.cpp	Thu Oct 21 20:19:29 2010 +0100
+++ b/NNLSChroma.cpp	Thu Oct 21 20:50:22 2010 +0100
@@ -827,7 +827,7 @@
         f6.hasTimestamp = true;
         f6.timestamp = f2.timestamp;
 	        
-        double b[256];
+			float b[256];
 	
         bool some_b_greater_zero = false;
         float sumb = 0;
@@ -861,7 +861,7 @@
                 }
 		        
             } else {
-                double x[84+1000];
+					float x[84+1000];
                 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
                 vector<int> signifIndex;
                 int index=0;
@@ -875,22 +875,20 @@
                     f3.values.push_back(0); // fill the values, change later
                     index++;
                 }
-                double rnorm;
-                double w[84+1000];
-                double zz[84+1000];
+				    float rnorm;
+				    float w[84+1000];
+				    float zz[84+1000];
                 int indx[84+1000];
                 int mode;
                 int dictsize = 256*signifIndex.size();
                 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
-                double *curr_dict = new double[dictsize];
+					float *curr_dict = new float[dictsize];
                 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
                     for (unsigned iBin = 0; iBin < 256; iBin++) {
                         curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
                     }
                 }
-                int sz = signifIndex.size();
-                int nn = nNote;
-                NNLS(curr_dict, &nn, &nn, &sz, b, x, &rnorm, w, zz, indx, &mode);
+					nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
                 delete [] curr_dict;
                 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
                     f3.values[signifIndex[iNote]] = x[iNote];
--- a/nnls.c	Thu Oct 21 20:19:29 2010 +0100
+++ b/nnls.c	Thu Oct 21 20:50:22 2010 +0100
@@ -1,705 +1,578 @@
-
 #include "nnls.h"
 
-typedef int integer;
-typedef unsigned int uinteger;
-typedef char *address;
-typedef short int shortint;
-typedef float real;
-typedef double doublereal;
+#include <math.h>
 
-#define abs(x) ((x) >= 0 ? (x) : -(x))
-#define dabs(x) (doublereal)abs(x)
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-#define dmin(a,b) (doublereal)min(a,b)
-#define dmax(a,b) (doublereal)max(a,b)
-#define bit_test(a,b)	((a) >> (b) & 1)
-#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
-#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+#define nnls_max(a,b) ((a) >= (b) ? (a) : (b))
+#define nnls_abs(x) ((x) >= 0 ? (x) : -(x))
 
-double d_sign(doublereal *a, doublereal *b)
+float d_sign(float a, float b)
 {
-    double x;
-    x = (*a >= 0 ? *a : - *a);
-    return (*b >= 0 ? x : -x);
+  float x;
+  x = (a >= 0 ? a : - a);
+  return (b >= 0 ? x : -x);
 }
 
 /* Table of constant values */
 
-static integer c__1 = 1;
-static integer c__0 = 0;
-static integer c__2 = 2;
+int c__1 = 1;
+int c__0 = 0;
+int c__2 = 2;
 
-doublereal diff_(doublereal *x, doublereal *y)
+
+int g1(float* a, float* b, float* cterm, float* sterm, float* sig)
 {
-    /* System generated locals */
-    doublereal ret_val;
+  /* System generated locals */
+  float d;
 
+  float xr, yr;
 
-/*  Function used in tests that depend on machine precision. */
 
-/*  The original version of this code was developed by */
-/*  Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory */
-/*  1973 JUN 7, and published in the book */
-/*  "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974. */
-/*  Revised FEB 1995 to accompany reprinting of the book by SIAM. */
-
-    ret_val = *x - *y;
-    return ret_val;
-} /* diff_ */
-
-/* Subroutine */ int g1_(doublereal *a, doublereal *b, doublereal *cterm, 
-	doublereal *sterm, doublereal *sig)
-{
-    /* System generated locals */
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static doublereal xr, yr;
-
-
-/*     COMPUTE ORTHOGONAL ROTATION MATRIX.. */
-
-/*  The original version of this code was developed by */
-/*  Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory */
-/*  1973 JUN 12, and published in the book */
-/*  "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974. */
-/*  Revised FEB 1995 to accompany reprinting of the book by SIAM. */
-
-/*     COMPUTE.. MATRIX   (C, S) SO THAT (C, S)(A) = (SQRT(A**2+B**2)) */
-/*                        (-S,C)         (-S,C)(B)   (   0          ) */
-/*     COMPUTE SIG = SQRT(A**2+B**2) */
-/*        SIG IS COMPUTED LAST TO ALLOW FOR THE POSSIBILITY THAT */
-/*        SIG MAY BE IN THE SAME LOCATION AS A OR B . */
-/*     ------------------------------------------------------------------ */
-/*     ------------------------------------------------------------------ */
-    if (abs(*a) > abs(*b)) {
-	xr = *b / *a;
-/* Computing 2nd power */
-	d__1 = xr;
-	yr = sqrt(d__1 * d__1 + 1.);
-	d__1 = 1. / yr;
-	*cterm = d_sign(&d__1, a);
-	*sterm = *cterm * xr;
-	*sig = abs(*a) * yr;
-	return 0;
-    }
-    if (*b != 0.) {
-	xr = *a / *b;
-/* Computing 2nd power */
-	d__1 = xr;
-	yr = sqrt(d__1 * d__1 + 1.);
-	d__1 = 1. / yr;
-	*sterm = d_sign(&d__1, b);
-	*cterm = *sterm * xr;
-	*sig = abs(*b) * yr;
-	return 0;
-    }
-    *sig = 0.;
-    *cterm = 0.;
-    *sterm = 1.;
+  if (nnls_abs(*a) > nnls_abs(*b)) {
+    xr = *b / *a;
+    /* Computing 2nd power */
+    d = xr;
+    yr = sqrt(d * d + 1.);
+    d = 1. / yr;
+    *cterm = d_sign(d, *a);
+    *sterm = *cterm * xr;
+    *sig = nnls_abs(*a) * yr;
     return 0;
+  }
+  if (*b != 0.) {
+    xr = *a / *b;
+    /* Computing 2nd power */
+    d = xr;
+    yr = sqrt(d * d + 1.);
+    d = 1. / yr;
+    *sterm = d_sign(d, *b);
+    *cterm = *sterm * xr;
+    *sig = nnls_abs(*b) * yr;
+    return 0;
+  }
+  *sig = 0.;
+  *cterm = 0.;
+  *sterm = 1.;
+  return 0;
 } /* g1_ */
 
-/*     SUBROUTINE H12 (MODE,LPIVOT,L1,M,U,IUE,UP,C,ICE,ICV,NCV) */
+int h12(int mode, int* lpivot, int* l1,
+        int m, float* u, int* iue, float* up, float* c__,
+        int* ice, int* icv, int* ncv)
+{
+  /* System generated locals */
+  int u_dim1, u_offset, idx1, idx2;
+  float d, d2;
 
-/*  CONSTRUCTION AND/OR APPLICATION OF A SINGLE */
-/*  HOUSEHOLDER TRANSFORMATION..     Q = I + U*(U**T)/B */
+  /* Local variables */
+  int incr;
+  float b;
+  int i__, j;
+  float clinv;
+  int i2, i3, i4;
+  float cl, sm;
 
-/*  The original version of this code was developed by */
-/*  Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory */
-/*  1973 JUN 12, and published in the book */
-/*  "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974. */
-/*  Revised FEB 1995 to accompany reprinting of the book by SIAM. */
-/*     ------------------------------------------------------------------ */
-/*                     Subroutine Arguments */
+  /*     ------------------------------------------------------------------
+   */
+  /*     float precision U(IUE,M) */
+  /*     ------------------------------------------------------------------
+   */
+  /* Parameter adjustments */
+  u_dim1 = *iue;
+  u_offset = u_dim1 + 1;
+  u -= u_offset;
+  --c__;
 
-/*     MODE   = 1 OR 2   Selects Algorithm H1 to construct and apply a */
-/*            Householder transformation, or Algorithm H2 to apply a */
-/*            previously constructed transformation. */
-/*     LPIVOT IS THE INDEX OF THE PIVOT ELEMENT. */
-/*     L1,M   IF L1 .LE. M   THE TRANSFORMATION WILL BE CONSTRUCTED TO */
-/*            ZERO ELEMENTS INDEXED FROM L1 THROUGH M.   IF L1 GT. M */
-/*            THE SUBROUTINE DOES AN IDENTITY TRANSFORMATION. */
-/*     U(),IUE,UP    On entry with MODE = 1, U() contains the pivot */
-/*            vector.  IUE is the storage increment between elements. */
-/*            On exit when MODE = 1, U() and UP contain quantities */
-/*            defining the vector U of the Householder transformation. */
-/*            on entry with MODE = 2, U() and UP should contain */
-/*            quantities previously computed with MODE = 1.  These will */
-/*            not be modified during the entry with MODE = 2. */
-/*     C()    ON ENTRY with MODE = 1 or 2, C() CONTAINS A MATRIX WHICH */
-/*            WILL BE REGARDED AS A SET OF VECTORS TO WHICH THE */
-/*            HOUSEHOLDER TRANSFORMATION IS TO BE APPLIED. */
-/*            ON EXIT C() CONTAINS THE SET OF TRANSFORMED VECTORS. */
-/*     ICE    STORAGE INCREMENT BETWEEN ELEMENTS OF VECTORS IN C(). */
-/*     ICV    STORAGE INCREMENT BETWEEN VECTORS IN C(). */
-/*     NCV    NUMBER OF VECTORS IN C() TO BE TRANSFORMED. IF NCV .LE. 0 */
-/*            NO OPERATIONS WILL BE DONE ON C(). */
-/*     ------------------------------------------------------------------ */
-/* Subroutine */ int h12_(integer *mode, integer *lpivot, integer *l1, 
-	integer *m, doublereal *u, integer *iue, doublereal *up, doublereal *
-	c__, integer *ice, integer *icv, integer *ncv)
+  /* Function Body */
+  if (0 >= *lpivot || *lpivot >= *l1 || *l1 > m) {
+    return 0;
+  }
+  cl = (d = u[*lpivot * u_dim1 + 1], nnls_abs(d));
+  if (mode == 2) {
+    goto L60;
+  }
+  /*                            ****** CONSTRUCT THE TRANSFORMATION. ******
+   */
+  idx1 = m;
+  for (j = *l1; j <= idx1; ++j) {
+    /* L10: */
+    /* Computing MAX */
+    d2 = (d = u[j * u_dim1 + 1], nnls_abs(d));
+    cl = nnls_max(d2,cl);
+  }
+  if (cl <= 0.) {
+    goto L130;
+  } else {
+    goto L20;
+  }
+ L20:
+  clinv = 1. / cl;
+  /* Computing 2nd power */
+  d = u[*lpivot * u_dim1 + 1] * clinv;
+  sm = d * d;
+  idx1 = m;
+  for (j = *l1; j <= idx1; ++j) {
+    /* L30: */
+    /* Computing 2nd power */
+    d = u[j * u_dim1 + 1] * clinv;
+    sm += d * d;
+  }
+  cl *= sqrt(sm);
+  if (u[*lpivot * u_dim1 + 1] <= 0.) {
+    goto L50;
+  } else {
+    goto L40;
+  }
+ L40:
+  cl = -cl;
+ L50:
+  *up = u[*lpivot * u_dim1 + 1] - cl;
+  u[*lpivot * u_dim1 + 1] = cl;
+  goto L70;
+  /*            ****** APPLY THE TRANSFORMATION  I+U*(U**T)/B  TO C. ******
+   */
+
+ L60:
+  if (cl <= 0.) {
+    goto L130;
+  } else {
+    goto L70;
+  }
+ L70:
+  if (*ncv <= 0) {
+    return 0;
+  }
+  b = *up * u[*lpivot * u_dim1 + 1];
+  /*                       B  MUST BE NONPOSITIVE HERE.  IF B = 0., RETURN.
+   */
+
+  if (b >= 0.) {
+    goto L130;
+  } else {
+    goto L80;
+  }
+ L80:
+  b = 1. / b;
+  i2 = 1 - *icv + *ice * (*lpivot - 1);
+  incr = *ice * (*l1 - *lpivot);
+  idx1 = *ncv;
+  for (j = 1; j <= idx1; ++j) {
+    i2 += *icv;
+    i3 = i2 + incr;
+    i4 = i3;
+    sm = c__[i2] * *up;
+    idx2 = m;
+    for (i__ = *l1; i__ <= idx2; ++i__) {
+      sm += c__[i3] * u[i__ * u_dim1 + 1];
+      /* L90: */
+      i3 += *ice;
+    }
+    if (sm != 0.) {
+      goto L100;
+    } else {
+      goto L120;
+    }
+  L100:
+    sm *= b;
+    c__[i2] += sm * *up;
+    idx2 = m;
+    for (i__ = *l1; i__ <= idx2; ++i__) {
+      c__[i4] += sm * u[i__ * u_dim1 + 1];
+      /* L110: */
+      i4 += *ice;
+    }
+  L120:
+    ;
+  }
+ L130:
+  return 0;
+} /* h12 */
+
+int nnls(float* a,  int mda,  int m,  int n, float* b,
+         float* x, float* rnorm, float* w, float* zz, int* index,
+         int* mode)
 {
-    /* System generated locals */
-    integer u_dim1, u_offset, i__1, i__2;
-    doublereal d__1, d__2;
+  /* System generated locals */
+  int a_dim1, a_offset, idx1, idx2;
+  float d1, d2;
 
-    /* Builtin functions */
-    double sqrt(doublereal);
 
-    /* Local variables */
-    static doublereal b;
-    static integer i__, j, i2, i3, i4;
-    static doublereal cl, sm;
-    static integer incr;
-    static doublereal clinv;
+  /* Local variables */
+  int iter;
+  float temp, wmax;
+  int i__, j, l;
+  float t, alpha, asave;
+  int itmax, izmax, nsetp;
+  float unorm, ztest, cc;
+  float dummy[2];
+  int ii, jj, ip;
+  float sm;
+  int iz, jz;
+  float up, ss;
+  int rtnkey, iz1, iz2, npp1;
 
-/*     ------------------------------------------------------------------ */
-/*     double precision U(IUE,M) */
-/*     ------------------------------------------------------------------ */
-    /* Parameter adjustments */
-    u_dim1 = *iue;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    --c__;
+  /*     ------------------------------------------------------------------
+   */
+  /*     int INDEX(N) */
+  /*     float precision A(MDA,N), B(M), W(N), X(N), ZZ(M) */
+  /*     ------------------------------------------------------------------
+   */
+  /* Parameter adjustments */
+  a_dim1 = mda;
+  a_offset = a_dim1 + 1;
+  a -= a_offset;
+  --b;
+  --x;
+  --w;
+  --zz;
+  --index;
 
-    /* Function Body */
-    if (0 >= *lpivot || *lpivot >= *l1 || *l1 > *m) {
-	return 0;
+  /* Function Body */
+  *mode = 1;
+  if (m <= 0 || n <= 0) {
+    *mode = 2;
+    return 0;
+  }
+  iter = 0;
+  itmax = n * 3;
+
+  /*                    INITIALIZE THE ARRAYS INDEX() AND X(). */
+
+  idx1 = n;
+  for (i__ = 1; i__ <= idx1; ++i__) {
+    x[i__] = 0.;
+    /* L20: */
+    index[i__] = i__;
+  }
+
+  iz2 = n;
+  iz1 = 1;
+  nsetp = 0;
+  npp1 = 1;
+  /*                             ******  MAIN LOOP BEGINS HERE  ****** */
+ L30:
+  /*                  QUIT IF ALL COEFFICIENTS ARE ALREADY IN THE SOLUTION.
+   */
+  /*                        OR IF M COLS OF A HAVE BEEN TRIANGULARIZED. */
+
+  if (iz1 > iz2 || nsetp >= m) {
+    goto L350;
+  }
+
+  /*         COMPUTE COMPONENTS OF THE DUAL (NEGATIVE GRADIENT) VECTOR W().
+   */
+
+  idx1 = iz2;
+  for (iz = iz1; iz <= idx1; ++iz) {
+    j = index[iz];
+    sm = 0.;
+    idx2 = m;
+    for (l = npp1; l <= idx2; ++l) {
+      /* L40: */
+      sm += a[l + j * a_dim1] * b[l];
     }
-    cl = (d__1 = u[*lpivot * u_dim1 + 1], abs(d__1));
-    if (*mode == 2) {
-	goto L60;
+    w[j] = sm;
+    /* L50: */
+  }
+  /*                                   FIND LARGEST POSITIVE W(J). */
+ L60:
+  wmax = 0.;
+  idx1 = iz2;
+  for (iz = iz1; iz <= idx1; ++iz) {
+    j = index[iz];
+    if (w[j] > wmax) {
+      wmax = w[j];
+      izmax = iz;
     }
-/*                            ****** CONSTRUCT THE TRANSFORMATION. ****** */
-    i__1 = *m;
-    for (j = *l1; j <= i__1; ++j) {
-/* L10: */
-/* Computing MAX */
-	d__2 = (d__1 = u[j * u_dim1 + 1], abs(d__1));
-	cl = max(d__2,cl);
+    /* L70: */
+  }
+
+  /*             IF WMAX .LE. 0. GO TO TERMINATION. */
+  /*             THIS INDICATES SATISFACTION OF THE KUHN-TUCKER CONDITIONS.
+   */
+
+  if (wmax <= 0.) {
+    goto L350;
+  }
+  iz = izmax;
+  j = index[iz];
+
+  /*     THE SIGN OF W(J) IS OK FOR J TO BE MOVED TO SET P. */
+  /*     BEGIN THE TRANSFORMATION AND CHECK NEW DIAGONAL ELEMENT TO AVOID */
+  /*     NEAR LINEAR DEPENDENCE. */
+
+  asave = a[npp1 + j * a_dim1];
+  idx1 = npp1 + 1;
+  h12(c__1, &npp1, &idx1, m, &a[j * a_dim1 + 1], &c__1, &up, dummy, &
+      c__1, &c__1, &c__0);
+  unorm = 0.;
+  if (nsetp != 0) {
+    idx1 = nsetp;
+    for (l = 1; l <= idx1; ++l) {
+      /* L90: */
+      /* Computing 2nd power */
+      d1 = a[l + j * a_dim1];
+      unorm += d1 * d1;
     }
-    if (cl <= 0.) {
-	goto L130;
-    } else {
-	goto L20;
+  }
+  unorm = sqrt(unorm);
+  d2 = unorm + (d1 = a[npp1 + j * a_dim1], nnls_abs(d1)) * .01;
+  if ((d2- unorm) > 0.) {
+
+    /*        COL J IS SUFFICIENTLY INDEPENDENT.  COPY B INTO ZZ, UPDATE Z
+              Z */
+    /*        AND SOLVE FOR ZTEST ( = PROPOSED NEW VALUE FOR X(J) ). */
+
+    idx1 = m;
+    for (l = 1; l <= idx1; ++l) {
+      /* L120: */
+      zz[l] = b[l];
     }
-L20:
-    clinv = 1. / cl;
-/* Computing 2nd power */
-    d__1 = u[*lpivot * u_dim1 + 1] * clinv;
-    sm = d__1 * d__1;
-    i__1 = *m;
-    for (j = *l1; j <= i__1; ++j) {
-/* L30: */
-/* Computing 2nd power */
-	d__1 = u[j * u_dim1 + 1] * clinv;
-	sm += d__1 * d__1;
+    idx1 = npp1 + 1;
+    h12(c__2, &npp1, &idx1, m, &a[j * a_dim1 + 1], &c__1, &up, (zz+1), &
+        c__1, &c__1, &c__1);
+    ztest = zz[npp1] / a[npp1 + j * a_dim1];
+
+    /*                                     SEE IF ZTEST IS POSITIVE */
+
+    if (ztest > 0.) {
+      goto L140;
     }
-    cl *= sqrt(sm);
-    if (u[*lpivot * u_dim1 + 1] <= 0.) {
-	goto L50;
-    } else {
-	goto L40;
+  }
+
+  /*     REJECT J AS A CANDIDATE TO BE MOVED FROM SET Z TO SET P. */
+  /*     RESTORE A(NPP1,J), SET W(J)=0., AND LOOP BACK TO TEST DUAL */
+  /*     COEFFS AGAIN. */
+
+  a[npp1 + j * a_dim1] = asave;
+  w[j] = 0.;
+  goto L60;
+
+  /*     THE INDEX  J=INDEX(IZ)  HAS BEEN SELECTED TO BE MOVED FROM */
+  /*     SET Z TO SET P.    UPDATE B,  UPDATE INDICES,  APPLY HOUSEHOLDER */
+  /*     TRANSFORMATIONS TO COLS IN NEW SET Z,  ZERO SUBDIAGONAL ELTS IN */
+  /*     COL J,  SET W(J)=0. */
+
+ L140:
+  idx1 = m;
+  for (l = 1; l <= idx1; ++l) {
+    /* L150: */
+    b[l] = zz[l];
+  }
+
+  index[iz] = index[iz1];
+  index[iz1] = j;
+  ++iz1;
+  nsetp = npp1;
+  ++npp1;
+
+  if (iz1 <= iz2) {
+    idx1 = iz2;
+    for (jz = iz1; jz <= idx1; ++jz) {
+      jj = index[jz];
+      h12(c__2, &nsetp, &npp1, m,
+          &a[j * a_dim1 + 1], &c__1, &up,
+          &a[jj * a_dim1 + 1], &c__1, &mda, &c__1);
+      /* L160: */
     }
-L40:
-    cl = -cl;
-L50:
-    *up = u[*lpivot * u_dim1 + 1] - cl;
-    u[*lpivot * u_dim1 + 1] = cl;
-    goto L70;
-/*            ****** APPLY THE TRANSFORMATION  I+U*(U**T)/B  TO C. ****** */
+  }
 
-L60:
-    if (cl <= 0.) {
-	goto L130;
-    } else {
-	goto L70;
+  if (nsetp != m) {
+    idx1 = m;
+    for (l = npp1; l <= idx1; ++l) {
+      /* L180: */
+      // SS: CHECK THIS DAMAGE....
+      a[l + j * a_dim1] = 0.;
     }
-L70:
-    if (*ncv <= 0) {
-	return 0;
+  }
+
+  w[j] = 0.;
+  /*                                SOLVE THE TRIANGULAR SYSTEM. */
+  /*                                STORE THE SOLUTION TEMPORARILY IN ZZ().
+   */
+  rtnkey = 1;
+  goto L400;
+ L200:
+
+  /*                       ******  SECONDARY LOOP BEGINS HERE ****** */
+
+  /*                          ITERATION COUNTER. */
+
+ L210:
+  ++iter;
+  if (iter > itmax) {
+    *mode = 3;
+    goto L350;
+  }
+
+  /*                    SEE IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE. */
+  /*                                  IF NOT COMPUTE ALPHA. */
+
+  alpha = 2.;
+  idx1 = nsetp;
+  for (ip = 1; ip <= idx1; ++ip) {
+    l = index[ip];
+    if (zz[ip] <= 0.) {
+      t = -x[l] / (zz[ip] - x[l]);
+      if (alpha > t) {
+        alpha = t;
+        jj = ip;
+      }
     }
-    b = *up * u[*lpivot * u_dim1 + 1];
-/*                       B  MUST BE NONPOSITIVE HERE.  IF B = 0., RETURN. */
+    /* L240: */
+  }
 
-    if (b >= 0.) {
-	goto L130;
-    } else {
-	goto L80;
+  /*          IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE THEN ALPHA WILL */
+  /*          STILL = 2.    IF SO EXIT FROM SECONDARY LOOP TO MAIN LOOP. */
+
+  if (alpha == 2.) {
+    goto L330;
+  }
+
+  /*          OTHERWISE USE ALPHA WHICH WILL BE BETWEEN 0. AND 1. TO */
+  /*          INTERPOLATE BETWEEN THE OLD X AND THE NEW ZZ. */
+
+  idx1 = nsetp;
+  for (ip = 1; ip <= idx1; ++ip) {
+    l = index[ip];
+    x[l] += alpha * (zz[ip] - x[l]);
+    /* L250: */
+  }
+
+  /*        MODIFY A AND B AND THE INDEX ARRAYS TO MOVE COEFFICIENT I */
+  /*        FROM SET P TO SET Z. */
+
+  i__ = index[jj];
+ L260:
+  x[i__] = 0.;
+
+  if (jj != nsetp) {
+    ++jj;
+    idx1 = nsetp;
+    for (j = jj; j <= idx1; ++j) {
+      ii = index[j];
+      index[j - 1] = ii;
+      g1(&a[j - 1 + ii * a_dim1], &a[j + ii * a_dim1],
+         &cc, &ss, &a[j - 1 + ii * a_dim1]);
+      // SS: CHECK THIS DAMAGE...
+      a[j + ii * a_dim1] = 0.;
+      idx2 = n;
+      for (l = 1; l <= idx2; ++l) {
+        if (l != ii) {
+
+          /*                 Apply procedure G2 (CC,SS,A(J-1,L),A(J,
+                             L)) */
+
+          temp = a[j - 1 + l * a_dim1];
+          // SS: CHECK THIS DAMAGE
+          a[j - 1 + l * a_dim1] = cc * temp + ss * a[j + l * a_dim1];
+          a[j + l * a_dim1] = -ss * temp + cc * a[j + l * a_dim1];
+        }
+        /* L270: */
+      }
+
+      /*                 Apply procedure G2 (CC,SS,B(J-1),B(J)) */
+
+      temp = b[j - 1];
+      b[j - 1] = cc * temp + ss * b[j];
+      b[j] = -ss * temp + cc * b[j];
+      /* L280: */
     }
-L80:
-    b = 1. / b;
-    i2 = 1 - *icv + *ice * (*lpivot - 1);
-    incr = *ice * (*l1 - *lpivot);
-    i__1 = *ncv;
-    for (j = 1; j <= i__1; ++j) {
-	i2 += *icv;
-	i3 = i2 + incr;
-	i4 = i3;
-	sm = c__[i2] * *up;
-	i__2 = *m;
-	for (i__ = *l1; i__ <= i__2; ++i__) {
-	    sm += c__[i3] * u[i__ * u_dim1 + 1];
-/* L90: */
-	    i3 += *ice;
-	}
-	if (sm != 0.) {
-	    goto L100;
-	} else {
-	    goto L120;
-	}
-L100:
-	sm *= b;
-	c__[i2] += sm * *up;
-	i__2 = *m;
-	for (i__ = *l1; i__ <= i__2; ++i__) {
-	    c__[i4] += sm * u[i__ * u_dim1 + 1];
-/* L110: */
-	    i4 += *ice;
-	}
-L120:
-	;
+  }
+
+  npp1 = nsetp;
+  --nsetp;
+  --iz1;
+  index[iz1] = i__;
+
+  /*        SEE IF THE REMAINING COEFFS IN SET P ARE FEASIBLE.  THEY SHOULD
+   */
+  /*        BE BECAUSE OF THE WAY ALPHA WAS DETERMINED. */
+  /*        IF ANY ARE INFEASIBLE IT IS DUE TO ROUND-OFF ERROR.  ANY */
+  /*        THAT ARE NONPOSITIVE WILL BE SET TO ZERO */
+  /*        AND MOVED FROM SET P TO SET Z. */
+
+  idx1 = nsetp;
+  for (jj = 1; jj <= idx1; ++jj) {
+    i__ = index[jj];
+    if (x[i__] <= 0.) {
+      goto L260;
     }
-L130:
-    return 0;
-} /* h12_ */
+    /* L300: */
+  }
 
-/*     SUBROUTINE NNLS  (A,MDA,M,N,B,X,RNORM,W,ZZ,INDEX,MODE) */
+  /*         COPY B( ) INTO ZZ( ).  THEN SOLVE AGAIN AND LOOP BACK. */
 
-/*  Algorithm NNLS: NONNEGATIVE LEAST SQUARES */
+  idx1 = m;
+  for (i__ = 1; i__ <= idx1; ++i__) {
+    /* L310: */
+    zz[i__] = b[i__];
+  }
+  rtnkey = 2;
+  goto L400;
+ L320:
+  goto L210;
+  /*                      ******  END OF SECONDARY LOOP  ****** */
 
-/*  The original version of this code was developed by */
-/*  Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory */
-/*  1973 JUN 15, and published in the book */
-/*  "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974. */
-/*  Revised FEB 1995 to accompany reprinting of the book by SIAM. */
+ L330:
+  idx1 = nsetp;
+  for (ip = 1; ip <= idx1; ++ip) {
+    i__ = index[ip];
+    /* L340: */
+    x[i__] = zz[ip];
+  }
+  /*        ALL NEW COEFFS ARE POSITIVE.  LOOP BACK TO BEGINNING. */
+  goto L30;
 
-/*     GIVEN AN M BY N MATRIX, A, AND AN M-VECTOR, B,  COMPUTE AN */
-/*     N-VECTOR, X, THAT SOLVES THE LEAST SQUARES PROBLEM */
+  /*                        ******  END OF MAIN LOOP  ****** */
 
-/*                      A * X = B  SUBJECT TO X .GE. 0 */
-/*     ------------------------------------------------------------------ */
-/*                     Subroutine Arguments */
+  /*                        COME TO HERE FOR TERMINATION. */
+  /*                     COMPUTE THE NORM OF THE FINAL RESIDUAL VECTOR. */
 
-/*     A(),MDA,M,N     MDA IS THE FIRST DIMENSIONING PARAMETER FOR THE */
-/*                     ARRAY, A().   ON ENTRY A() CONTAINS THE M BY N */
-/*                     MATRIX, A.           ON EXIT A() CONTAINS */
-/*                     THE PRODUCT MATRIX, Q*A , WHERE Q IS AN */
-/*                     M BY M ORTHOGONAL MATRIX GENERATED IMPLICITLY BY */
-/*                     THIS SUBROUTINE. */
-/*     B()     ON ENTRY B() CONTAINS THE M-VECTOR, B.   ON EXIT B() CON- */
-/*             TAINS Q*B. */
-/*     X()     ON ENTRY X() NEED NOT BE INITIALIZED.  ON EXIT X() WILL */
-/*             CONTAIN THE SOLUTION VECTOR. */
-/*     RNORM   ON EXIT RNORM CONTAINS THE EUCLIDEAN NORM OF THE */
-/*             RESIDUAL VECTOR. */
-/*     W()     AN N-ARRAY OF WORKING SPACE.  ON EXIT W() WILL CONTAIN */
-/*             THE DUAL SOLUTION VECTOR.   W WILL SATISFY W(I) = 0. */
-/*             FOR ALL I IN SET P  AND W(I) .LE. 0. FOR ALL I IN SET Z */
-/*     ZZ()     AN M-ARRAY OF WORKING SPACE. */
-/*     INDEX()     AN INTEGER WORKING ARRAY OF LENGTH AT LEAST N. */
-/*                 ON EXIT THE CONTENTS OF THIS ARRAY DEFINE THE SETS */
-/*                 P AND Z AS FOLLOWS.. */
+ L350:
+  sm = 0.;
+  if (npp1 <= m) {
+    idx1 = m;
+    for (i__ = npp1; i__ <= idx1; ++i__) {
+      /* L360: */
+      /* Computing 2nd power */
+      d1 = b[i__];
+      sm += d1 * d1;
+    }
+  } else {
+    idx1 = n;
+    for (j = 1; j <= idx1; ++j) {
+      /* L380: */
+      w[j] = 0.;
+    }
+  }
+  *rnorm = sqrt(sm);
+  return 0;
 
-/*                 INDEX(1)   THRU INDEX(NSETP) = SET P. */
-/*                 INDEX(IZ1) THRU INDEX(IZ2)   = SET Z. */
-/*                 IZ1 = NSETP + 1 = NPP1 */
-/*                 IZ2 = N */
-/*     MODE    THIS IS A SUCCESS-FAILURE FLAG WITH THE FOLLOWING */
-/*             MEANINGS. */
-/*             1     THE SOLUTION HAS BEEN COMPUTED SUCCESSFULLY. */
-/*             2     THE DIMENSIONS OF THE PROBLEM ARE BAD. */
-/*                   EITHER M .LE. 0 OR N .LE. 0. */
-/*             3    ITERATION COUNT EXCEEDED.  MORE THAN 3*N ITERATIONS. */
+  /*     THE FOLLOWING BLOCK OF CODE IS USED AS AN INTERNAL SUBROUTINE */
+  /*     TO SOLVE THE TRIANGULAR SYSTEM, PUTTING THE SOLUTION IN ZZ(). */
 
-/*     ------------------------------------------------------------------ */
-/* Subroutine */ int nnls_(doublereal *a, integer *mda, integer *m, integer *
-	n, doublereal *b, doublereal *x, doublereal *rnorm, doublereal *w, 
-	doublereal *zz, integer *index, integer *mode)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal d__1, d__2;
+ L400:
+  idx1 = nsetp;
+  for (l = 1; l <= idx1; ++l) {
+    ip = nsetp + 1 - l;
+    if (l != 1) {
+      idx2 = ip;
+      for (ii = 1; ii <= idx2; ++ii) {
+        zz[ii] -= a[ii + jj * a_dim1] * zz[ip + 1];
+        /* L410: */
+      }
+    }
+    jj = index[ip];
+    zz[ip] /= a[ip + jj * a_dim1];
+    /* L430: */
+  }
+  switch ((int)rtnkey) {
+  case 1:  goto L200;
+  case 2:  goto L320;
+  }
 
-    /* Builtin functions */
-    double sqrt(doublereal);
+  return 0;
 
-    /* Local variables */
-    static integer i__, j, l;
-    static doublereal t;
-    extern /* Subroutine */ int g1_(doublereal *, doublereal *, doublereal *, 
-	    doublereal *, doublereal *);
-    static doublereal cc;
-    extern /* Subroutine */ int h12_(integer *, integer *, integer *, integer 
-	    *, doublereal *, integer *, doublereal *, doublereal *, integer *,
-	     integer *, integer *);
-    static integer ii, jj, ip;
-    static doublereal sm;
-    static integer iz, jz;
-    static doublereal up, ss;
-    static integer iz1, iz2, npp1;
-    extern doublereal diff_(doublereal *, doublereal *);
-    static integer iter;
-    static doublereal temp, wmax, alpha, asave;
-    static integer itmax, izmax, nsetp;
-    static doublereal dummy, unorm, ztest;
-    static integer rtnkey;
-
-/*     ------------------------------------------------------------------ */
-/*     integer INDEX(N) */
-/*     double precision A(MDA,N), B(M), W(N), X(N), ZZ(M) */
-/*     ------------------------------------------------------------------ */
-    /* Parameter adjustments */
-    a_dim1 = *mda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --b;
-    --x;
-    --w;
-    --zz;
-    --index;
-
-    /* Function Body */
-    *mode = 1;
-    if (*m <= 0 || *n <= 0) {
-	*mode = 2;
-	return 0;
-    }
-    iter = 0;
-    itmax = *n * 3;
-
-/*                    INITIALIZE THE ARRAYS INDEX() AND X(). */
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	x[i__] = 0.;
-/* L20: */
-	index[i__] = i__;
-    }
-
-    iz2 = *n;
-    iz1 = 1;
-    nsetp = 0;
-    npp1 = 1;
-/*                             ******  MAIN LOOP BEGINS HERE  ****** */
-L30:
-/*                  QUIT IF ALL COEFFICIENTS ARE ALREADY IN THE SOLUTION. */
-/*                        OR IF M COLS OF A HAVE BEEN TRIANGULARIZED. */
-
-    if (iz1 > iz2 || nsetp >= *m) {
-	goto L350;
-    }
-
-/*         COMPUTE COMPONENTS OF THE DUAL (NEGATIVE GRADIENT) VECTOR W(). */
-
-    i__1 = iz2;
-    for (iz = iz1; iz <= i__1; ++iz) {
-	j = index[iz];
-	sm = 0.;
-	i__2 = *m;
-	for (l = npp1; l <= i__2; ++l) {
-/* L40: */
-	    sm += a[l + j * a_dim1] * b[l];
-	}
-	w[j] = sm;
-/* L50: */
-    }
-/*                                   FIND LARGEST POSITIVE W(J). */
-L60:
-    wmax = 0.;
-    i__1 = iz2;
-    for (iz = iz1; iz <= i__1; ++iz) {
-	j = index[iz];
-	if (w[j] > wmax) {
-	    wmax = w[j];
-	    izmax = iz;
-	}
-/* L70: */
-    }
-
-/*             IF WMAX .LE. 0. GO TO TERMINATION. */
-/*             THIS INDICATES SATISFACTION OF THE KUHN-TUCKER CONDITIONS. */
-
-    if (wmax <= 0.) {
-	goto L350;
-    }
-    iz = izmax;
-    j = index[iz];
-
-/*     THE SIGN OF W(J) IS OK FOR J TO BE MOVED TO SET P. */
-/*     BEGIN THE TRANSFORMATION AND CHECK NEW DIAGONAL ELEMENT TO AVOID */
-/*     NEAR LINEAR DEPENDENCE. */
-
-    asave = a[npp1 + j * a_dim1];
-    i__1 = npp1 + 1;
-    h12_(&c__1, &npp1, &i__1, m, &a[j * a_dim1 + 1], &c__1, &up, &dummy, &
-	    c__1, &c__1, &c__0);
-    unorm = 0.;
-    if (nsetp != 0) {
-	i__1 = nsetp;
-	for (l = 1; l <= i__1; ++l) {
-/* L90: */
-/* Computing 2nd power */
-	    d__1 = a[l + j * a_dim1];
-	    unorm += d__1 * d__1;
-	}
-    }
-    unorm = sqrt(unorm);
-    d__2 = unorm + (d__1 = a[npp1 + j * a_dim1], abs(d__1)) * .01;
-    if (diff_(&d__2, &unorm) > 0.) {
-
-/*        COL J IS SUFFICIENTLY INDEPENDENT.  COPY B INTO ZZ, UPDATE ZZ */
-/*        AND SOLVE FOR ZTEST ( = PROPOSED NEW VALUE FOR X(J) ). */
-
-	i__1 = *m;
-	for (l = 1; l <= i__1; ++l) {
-/* L120: */
-	    zz[l] = b[l];
-	}
-	i__1 = npp1 + 1;
-	h12_(&c__2, &npp1, &i__1, m, &a[j * a_dim1 + 1], &c__1, &up, &zz[1], &
-		c__1, &c__1, &c__1);
-	ztest = zz[npp1] / a[npp1 + j * a_dim1];
-
-/*                                     SEE IF ZTEST IS POSITIVE */
-
-	if (ztest > 0.) {
-	    goto L140;
-	}
-    }
-
-/*     REJECT J AS A CANDIDATE TO BE MOVED FROM SET Z TO SET P. */
-/*     RESTORE A(NPP1,J), SET W(J)=0., AND LOOP BACK TO TEST DUAL */
-/*     COEFFS AGAIN. */
-
-    a[npp1 + j * a_dim1] = asave;
-    w[j] = 0.;
-    goto L60;
-
-/*     THE INDEX  J=INDEX(IZ)  HAS BEEN SELECTED TO BE MOVED FROM */
-/*     SET Z TO SET P.    UPDATE B,  UPDATE INDICES,  APPLY HOUSEHOLDER */
-/*     TRANSFORMATIONS TO COLS IN NEW SET Z,  ZERO SUBDIAGONAL ELTS IN */
-/*     COL J,  SET W(J)=0. */
-
-L140:
-    i__1 = *m;
-    for (l = 1; l <= i__1; ++l) {
-/* L150: */
-	b[l] = zz[l];
-    }
-
-    index[iz] = index[iz1];
-    index[iz1] = j;
-    ++iz1;
-    nsetp = npp1;
-    ++npp1;
-
-    if (iz1 <= iz2) {
-	i__1 = iz2;
-	for (jz = iz1; jz <= i__1; ++jz) {
-	    jj = index[jz];
-	    h12_(&c__2, &nsetp, &npp1, m, &a[j * a_dim1 + 1], &c__1, &up, &a[
-		    jj * a_dim1 + 1], &c__1, mda, &c__1);
-/* L160: */
-	}
-    }
-
-    if (nsetp != *m) {
-	i__1 = *m;
-	for (l = npp1; l <= i__1; ++l) {
-/* L180: */
-	    a[l + j * a_dim1] = 0.;
-	}
-    }
-
-    w[j] = 0.;
-/*                                SOLVE THE TRIANGULAR SYSTEM. */
-/*                                STORE THE SOLUTION TEMPORARILY IN ZZ(). */
-    rtnkey = 1;
-    goto L400;
-L200:
-
-/*                       ******  SECONDARY LOOP BEGINS HERE ****** */
-
-/*                          ITERATION COUNTER. */
-
-L210:
-    ++iter;
-    if (iter > itmax) {
-	*mode = 3;
-/*         write (*,'(/a)') ' NNLS quitting on iteration count.' */
-	goto L350;
-    }
-
-/*                    SEE IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE. */
-/*                                  IF NOT COMPUTE ALPHA. */
-
-    alpha = 2.;
-    i__1 = nsetp;
-    for (ip = 1; ip <= i__1; ++ip) {
-	l = index[ip];
-	if (zz[ip] <= 0.) {
-	    t = -x[l] / (zz[ip] - x[l]);
-	    if (alpha > t) {
-		alpha = t;
-		jj = ip;
-	    }
-	}
-/* L240: */
-    }
-
-/*          IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE THEN ALPHA WILL */
-/*          STILL = 2.    IF SO EXIT FROM SECONDARY LOOP TO MAIN LOOP. */
-
-    if (alpha == 2.) {
-	goto L330;
-    }
-
-/*          OTHERWISE USE ALPHA WHICH WILL BE BETWEEN 0. AND 1. TO */
-/*          INTERPOLATE BETWEEN THE OLD X AND THE NEW ZZ. */
-
-    i__1 = nsetp;
-    for (ip = 1; ip <= i__1; ++ip) {
-	l = index[ip];
-	x[l] += alpha * (zz[ip] - x[l]);
-/* L250: */
-    }
-
-/*        MODIFY A AND B AND THE INDEX ARRAYS TO MOVE COEFFICIENT I */
-/*        FROM SET P TO SET Z. */
-
-    i__ = index[jj];
-L260:
-    x[i__] = 0.;
-
-    if (jj != nsetp) {
-	++jj;
-	i__1 = nsetp;
-	for (j = jj; j <= i__1; ++j) {
-	    ii = index[j];
-	    index[j - 1] = ii;
-	    g1_(&a[j - 1 + ii * a_dim1], &a[j + ii * a_dim1], &cc, &ss, &a[j 
-		    - 1 + ii * a_dim1]);
-	    a[j + ii * a_dim1] = 0.;
-	    i__2 = *n;
-	    for (l = 1; l <= i__2; ++l) {
-		if (l != ii) {
-
-/*                 Apply procedure G2 (CC,SS,A(J-1,L),A(J,L)) */
-
-		    temp = a[j - 1 + l * a_dim1];
-		    a[j - 1 + l * a_dim1] = cc * temp + ss * a[j + l * a_dim1]
-			    ;
-		    a[j + l * a_dim1] = -ss * temp + cc * a[j + l * a_dim1];
-		}
-/* L270: */
-	    }
-
-/*                 Apply procedure G2 (CC,SS,B(J-1),B(J)) */
-
-	    temp = b[j - 1];
-	    b[j - 1] = cc * temp + ss * b[j];
-	    b[j] = -ss * temp + cc * b[j];
-/* L280: */
-	}
-    }
-
-    npp1 = nsetp;
-    --nsetp;
-    --iz1;
-    index[iz1] = i__;
-
-/*        SEE IF THE REMAINING COEFFS IN SET P ARE FEASIBLE.  THEY SHOULD */
-/*        BE BECAUSE OF THE WAY ALPHA WAS DETERMINED. */
-/*        IF ANY ARE INFEASIBLE IT IS DUE TO ROUND-OFF ERROR.  ANY */
-/*        THAT ARE NONPOSITIVE WILL BE SET TO ZERO */
-/*        AND MOVED FROM SET P TO SET Z. */
-
-    i__1 = nsetp;
-    for (jj = 1; jj <= i__1; ++jj) {
-	i__ = index[jj];
-	if (x[i__] <= 0.) {
-	    goto L260;
-	}
-/* L300: */
-    }
-
-/*         COPY B( ) INTO ZZ( ).  THEN SOLVE AGAIN AND LOOP BACK. */
-
-    i__1 = *m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* L310: */
-	zz[i__] = b[i__];
-    }
-    rtnkey = 2;
-    goto L400;
-L320:
-    goto L210;
-/*                      ******  END OF SECONDARY LOOP  ****** */
-
-L330:
-    i__1 = nsetp;
-    for (ip = 1; ip <= i__1; ++ip) {
-	i__ = index[ip];
-/* L340: */
-	x[i__] = zz[ip];
-    }
-/*        ALL NEW COEFFS ARE POSITIVE.  LOOP BACK TO BEGINNING. */
-    goto L30;
-
-/*                        ******  END OF MAIN LOOP  ****** */
-
-/*                        COME TO HERE FOR TERMINATION. */
-/*                     COMPUTE THE NORM OF THE FINAL RESIDUAL VECTOR. */
-
-L350:
-    sm = 0.;
-    if (npp1 <= *m) {
-	i__1 = *m;
-	for (i__ = npp1; i__ <= i__1; ++i__) {
-/* L360: */
-/* Computing 2nd power */
-	    d__1 = b[i__];
-	    sm += d__1 * d__1;
-	}
-    } else {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* L380: */
-	    w[j] = 0.;
-	}
-    }
-    *rnorm = sqrt(sm);
-    return 0;
-
-/*     THE FOLLOWING BLOCK OF CODE IS USED AS AN INTERNAL SUBROUTINE */
-/*     TO SOLVE THE TRIANGULAR SYSTEM, PUTTING THE SOLUTION IN ZZ(). */
-
-L400:
-    i__1 = nsetp;
-    for (l = 1; l <= i__1; ++l) {
-	ip = nsetp + 1 - l;
-	if (l != 1) {
-	    i__2 = ip;
-	    for (ii = 1; ii <= i__2; ++ii) {
-		zz[ii] -= a[ii + jj * a_dim1] * zz[ip + 1];
-/* L410: */
-	    }
-	}
-	jj = index[ip];
-	zz[ip] /= a[ip + jj * a_dim1];
-/* L430: */
-    }
-    switch (rtnkey) {
-	case 1:  goto L200;
-	case 2:  goto L320;
-    }
-    return 0;
 } /* nnls_ */
 
+
--- a/nnls.h	Thu Oct 21 20:19:29 2010 +0100
+++ b/nnls.h	Thu Oct 21 20:50:22 2010 +0100
@@ -5,11 +5,9 @@
 extern "C" {
 #endif
 
-int nnls_(double *a, int *mda, int *m, int *n, 
-	  double *b, double *x, double *rnorm, 
-	  double *w, double *zz, int *index, int *mode);
-
-#define NNLS nnls_
+int nnls(float *a, int mda, int m, int n, 
+	 float *b, float *x, float *rnorm, 
+	 float *w, float *zz, int *index, int *mode);
 
 #ifdef __cplusplus
 }