view Matrix.cpp @ 1:6422640a802f

first upload
author Wen X <xue.wen@elec.qmul.ac.uk>
date Tue, 05 Oct 2010 10:45:57 +0100
parents
children
line wrap: on
line source
//---------------------------------------------------------------------------
#include <math.h>
#include <memory.h>
#include "Matrix.h"
//---------------------------------------------------------------------------
/*
  function BalanceSim: applies a similarity transformation to matrix a so that a is "balanced". This is
  used by various eigenvalue evaluation routines.

  In: matrix A[n][n]
  Out: balanced matrix a

  No return value.
*/
void BalanceSim(int n, double** A)
{
  if (n<2) return;
  const int radix=2;
  double sqrdx;
  sqrdx=radix*radix;
  bool finish=false;
  while (!finish)
  {
    finish=true;
    for (int i=0; i<n; i++)
    {
      double s, sr=0, sc=0, ar, ac;
      for (int j=0; j<n; j++)
        if (j!=i)
        {
          sc+=fabs(A[j][i]);
          sr+=fabs(A[i][j]);
        }
      if (sc!=0 && sr!=0)
      {
        ar=sr/radix;
        ac=1.0;
        s=sr+sc;
        while (sc<ar)
        {
          ac*=radix;
          sc*=sqrdx;
        }
        ar=sr*radix;
        while (sc>ar)
        {
          ac/=radix;
          sc/=sqrdx;
        }
      }
      if ((sc+sr)/ac<0.95*s)
      {
        finish=false;
        ar=1.0/ac;
        for (int j=0; j<n; j++) A[i][j]*=ar;
        for (int j=0; j<n; j++) A[j][i]*=ac;
      }
    }
  }
}//BalanceSim

//---------------------------------------------------------------------------
/*
  function Choleski: Choleski factorization A=LL', where L is lower triangular. The symmetric matrix
  A[N][N] is positive definite iff A can be factored as LL', where L is lower triangular with nonzero
  diagonl entries.

  In: matrix A[N][N]
  Out: mstrix L[N][N].

  Returns 0 if successful. On return content of matrix a is not changed.
*/
int Choleski(int N, double** L, double** A)
{
  if (A[0][0]==0) return 1;
  L[0][0]=sqrt(A[0][0]);
  memset(&L[0][1], 0, sizeof(double)*(N-1));
  for (int j=1; j<N; j++) L[j][0]=A[j][0]/L[0][0];
  for (int i=1; i<N-1; i++)
  {
    L[i][i]=A[i][i]; for (int k=0; k<i; k++) L[i][i]-=L[i][k]*L[i][k]; L[i][i]=sqrt(L[i][i]);
    if (L[i][i]==0) return 1;
    for (int j=i+1; j<N; j++)
    {
      L[j][i]=A[j][i]; for (int k=0; k<i; k++) L[j][i]-=L[j][k]*L[i][k]; L[j][i]/=L[i][i];
    }
    memset(&L[i][i+1], 0, sizeof(double)*(N-1-i));
  }
  L[N-1][N-1]=A[N-1][N-1]; for (int k=0; k<N-1; k++) L[N-1][N-1]-=L[N-1][k]*L[N-1][k]; L[N-1][N-1]=sqrt(L[N-1][N-1]);
  return 0;
}//Choleski

//---------------------------------------------------------------------------
//matrix duplication routines

/*
  function Copy: duplicate the matrix A as matrix Z.

  In: matrix A[M][N]
  Out: matrix Z[M][N]

  Returns pointer to Z. Z is created anew if Z=0 is supplied on start.
*/
double** Copy(int M, int N, double** Z, double** A, MList* List)
{
  if (!Z) {Allocate2(double, M, N, Z); if (List) List->Add(Z, 2);}
  int sizeN=sizeof(double)*N;
  for (int m=0; m<M; m++) memcpy(Z[m], A[m], sizeN);
  return Z;
}//Copy
//complex version
cdouble** Copy(int M, int N, cdouble** Z, cdouble** A, MList* List)
{
  if (!Z) {Allocate2(cdouble, M, N, Z); if (List) List->Add(Z, 2);}
  int sizeN=sizeof(cdouble)*N;
  for (int m=0; m<M; m++) memcpy(Z[m], A[m], sizeN);
  return Z;
}//Copy
//version without specifying pre-allocated z
double** Copy(int M, int N, double** A, MList* List){return Copy(M, N, 0, A, List);}
cdouble** Copy(int M, int N, cdouble** A, MList* List){return Copy(M, N, 0, A, List);}
//for square matrices
double** Copy(int N, double** Z, double ** A, MList* List){return Copy(N, N, Z, A, List);}
double** Copy(int N, double** A, MList* List){return Copy(N, N, 0, A, List);}
cdouble** Copy(int N, cdouble** Z, cdouble** A, MList* List){return Copy(N, N, Z, A, List);}
cdouble** Copy(int N, cdouble** A, MList* List){return Copy(N, N, 0, A, List);}

//---------------------------------------------------------------------------
//vector duplication routines

/*
  function Copy: duplicating vector a as vector z

  In: vector a[N]
  Out: vector z[N]

  Returns pointer to z. z is created anew is z=0 is specified on start.
*/
double* Copy(int N, double* z, double* a, MList* List)
{
  if (!z){z=new double[N]; if (List) List->Add(z, 1);}
  memcpy(z, a, sizeof(double)*N);
  return z;
}//Copy
cdouble* Copy(int N, cdouble* z, cdouble* a, MList* List)
{
  if (!z){z=new cdouble[N]; if (List) List->Add(z, 1);}
  memcpy(z, a, sizeof(cdouble)*N);
  return z;
}//Copy
//version without specifying pre-allocated z
double* Copy(int N, double* a, MList* List){return Copy(N, 0, a, List);}
cdouble* Copy(int N, cdouble* a, MList* List){return Copy(N, 0, a, List);}

//---------------------------------------------------------------------------
/*
  function det: computes determinant by Gaussian elimination method with column pivoting

  In: matrix A[N][N]

  Returns det(A). On return content of matrix A is unchanged if mode=0.
*/
double det(int N, double** A, int mode)
{
  int c, p, ip, *rp=new int[N]; for (int i=0; i<N; i++) rp[i]=i;
  double m, **b, result=1;

  if (mode==0)
  {
    int sizeN=sizeof(double)*N;
    b=new double*[N]; b[0]=new double[N*N]; for (int i=0; i<N; i++) {b[i]=&b[0][i*N]; memcpy(b[i], A[i], sizeN);}
    A=b;
  }

  //Gaussian eliminating
  for (int i=0; i<N-1; i++)
  {
    p=i, ip=i+1;
    while (ip<N){if (fabs(A[rp[ip]][i])>fabs(A[rp[p]][i])) p=ip; ip++;}
    if (A[rp[p]][i]==0) {result=0; goto ret;}
    if (p!=i) {c=rp[i]; rp[i]=rp[p]; rp[p]=c; result=-result;}
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[j]][i]/A[rp[i]][i];
      A[rp[j]][i]=0;
      for (int k=i+1; k<N; k++) A[rp[j]][k]-=m*A[rp[i]][k];
    }
  }
  if (A[rp[N-1]][N-1]==0) {result=0; goto ret;}

	for (int i=0; i<N; i++)
    result*=A[rp[i]][i];

ret:
  if (mode==0) {delete[] b[0]; delete[] b;}
  delete[] rp;
  return result;
}//det
//complex version
cdouble det(int N, cdouble** A, int mode)
{
  int c, p, ip, *rp=new int[N]; for (int i=0; i<N; i++) rp[i]=i;
  double mm, mp;
  cdouble m, **b, result=1;

  if (mode==0)
  {
    int sizeN=sizeof(cdouble)*N;
    b=new cdouble*[N]; b[0]=new cdouble[N*N];
    for (int i=0; i<N; i++) {b[i]=&b[0][i*N]; memcpy(b[i], A[i], sizeN);}
    A=b;
  }

  //Gaussian elimination
  for (int i=0; i<N-1; i++)
  {
    p=i, ip=i+1; m=A[rp[p]][i]; mp=~m;
    while (ip<N){m=A[rp[ip]][i]; mm=~m; if (mm>mp) mp=mm, p=ip; ip++;}
    if (mp==0) {result=0; goto ret;}
    if (p!=i) {c=rp[i]; rp[i]=rp[p]; rp[p]=c;}
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[j]][i]/A[rp[i]][i];
      A[rp[j]][i]=0;
      for (int k=i+1; k<N; k++) A[rp[j]][k]-=m*A[rp[i]][k];
    }
  }
  if (operator==(A[rp[N-1]][N-1],0)) {result=0; goto ret;}

  for (int i=0; i<N; i++) result=result*A[rp[i]][i];
ret:
  if (mode==0) {delete[] b[0]; delete[] b;}
  delete[] rp;
  return result;
}//det

//---------------------------------------------------------------------------
/*
  function EigPower: power method for solving dominant eigenvalue and eigenvector

  In: matrix A[N][N], initial arbitrary vector x[N].
  Out: eigenvalue l, eigenvector x[N].

  Returns 0 is successful. Content of matrix A is unchangd on return. Initial x[N] must not be zero.
*/
int EigPower(int N, double& l, double* x, double** A, double ep, int maxiter)
{
  int k=0;
  int p=0; for (int i=1; i<N; i++) if (fabs(x[p])<fabs(x[i])) p=i;
  Multiply(N, x, x, 1/x[p]);
  double e, ty,te, *y=new double[N];

  while (k<maxiter)
  {
    MultiplyXy(N, N, y, A, x);
    l=y[p];
    int p=0; for (int i=1; i<N; i++) if (fabs(y[p])<fabs(y[i])) p=i;
    if (y[p]==0) {l=0; delete[] y; return 0;}
    ty=y[0]/y[p]; e=fabs(x[0]-ty); x[0]=ty;
    for (int i=1; i<N; i++)
    {
      ty=y[i]/y[p]; te=fabs(x[i]-ty); if (e<te) e=te; x[i]=ty;
    }
    if (e<ep) {delete[] y; return 0;}
    k++;
  }
  delete[] y; return 1;
}//EigPower

//---------------------------------------------------------------------------
/*
  function EigPowerA: EigPower with Aitken acceleration

  In: matrix A[N][N], initial arbitrary vector x[N].
  Out: eigenvalue l, eigenvector x[N].

  Returns 0 is successful. Content of matrix A is unchangd on return. Initial x[N] must not be zero.
*/
int EigPowerA(int N, double& l, double* x, double** A, double ep, int maxiter)
{
  int k=0;
  int p=0; for (int i=1; i<N; i++) if (fabs(x[p])<fabs(x[i])) p=i;
  Multiply(N, x, x, 1/x[p]);
  double m, m0=0, m1=0, e, ty,te, *y=new double[N];

  while (k<maxiter)
  {
    MultiplyXy(N, N, y, A, x);
    m=y[p];
    int p=0; for (int i=1; i<N; i++) if (fabs(y[p])<fabs(y[i])) p=i;
    if (y[p]==0) {l=0; delete[] y; return 0;}
    ty=y[0]/y[p]; e=fabs(x[0]-ty); x[0]=ty;
    for (int i=1; i<N; i++)
    {
      ty=y[i]/y[p]; te=fabs(x[i]-ty); if (e<te) e=te; x[i]=ty;
    }
    if (e<ep && k>2) {l=m0-(m1-m0)*(m1-m0)/(m-2*m1+m0); delete[] y; return 0;}
    k++; m0=m1; m1=m;
  }
  delete[] y; return 1;
}//EigPowerA

//---------------------------------------------------------------------------
/*
  function EigPowerI: Inverse power method for solving the eigenvalue given an approximate non-zero
  eigenvector.

  In: matrix A[N][N], approximate eigenvector x[N].
  Out: eigenvalue l, eigenvector x[N].

  Returns 0 is successful. Content of matrix A is unchangd on return. Initial x[N] must not be zero.
*/
int EigPowerI(int N, double& l, double* x, double** A, double ep, int maxiter)
{
  int sizeN=sizeof(double)*N;
  double* y=new double[N]; MultiplyXy(N, N, y, A, x);
  double q=Inner(N, x, y)/Inner(N, x, x), dt;
  double** aa=new double*[N]; aa[0]=new double[N*N];
  for (int i=0; i<N; i++) {aa[i]=&aa[0][i*N]; memcpy(aa[i], A[i], sizeN); aa[i][i]-=q;}
  dt=GISCP(N, aa);
  if (dt==0) {l=q; delete[] aa[0]; delete[] aa; delete[] y; return 0;}

  int k=0;
  int p=0; for (int i=1; i<N; i++) if (fabs(x[p])<fabs(x[i])) p=i;
  Multiply(N, x, x, 1/x[p]);

  double m, e, ty, te;
  while (k<N)
  {
    MultiplyXy(N, N, y, aa, x);
    m=y[p];
    p=0; for (int i=1; i<N; i++) if (fabs(y[p])<fabs(y[i])) p=i;
    ty=y[0]/y[p]; te=x[0]-ty; e=fabs(te); x[0]=ty;
    for (int i=1; i<N; i++)
    {
      ty=y[i]/y[p]; te=fabs(x[i]-ty); if (e<te) e=te; x[i]=ty;
    }
    if (e<ep) {l=1/m+q; delete[] aa[0]; delete[] aa; delete[] y; return 0;}
  }
  delete[] aa[0]; delete[] aa;
  delete[] y; return 1;
}//EigPowerI

//---------------------------------------------------------------------------
/*
  function EigPowerS: symmetric power method for solving the dominant eigenvalue with its eigenvector

  In: matrix A[N][N], initial arbitrary vector x[N].
  Out: eigenvalue l, eigenvector x[N].

  Returns 0 is successful. Content of matrix A is unchangd on return. Initial x[N] must not be zero.
*/
int EigPowerS(int N, double& l, double* x, double** A, double ep, int maxiter)
{
  int k=0;
  Multiply(N, x, x, 1/sqrt(Inner(N, x, x)));
  double y2, e, ty, te, *y=new double[N];
  while (k<maxiter)
  {
    MultiplyXy(N, N, y, A, x);
    l=Inner(N, x, y);
    y2=sqrt(Inner(N, y, y));
    if (y2==0) {l=0; delete[] y; return 0;}
    ty=y[0]/y2; te=x[0]-ty; e=te*te; x[0]=ty;
    for (int i=1; i<N; i++)
    {
      ty=y[i]/y2; te=x[i]-ty; e+=te*te; x[i]=ty;
    }
    e=sqrt(e);
    if (e<ep) {delete[] y; return 0;}
    k++;
  }
  delete[] y;
  return 1;
}//EigPowerS

//---------------------------------------------------------------------------
/*
  function EigPowerWielandt: Wielandt's deflation algorithm for solving a second dominant eigenvalue and
  eigenvector (m,u) given the dominant eigenvalue and eigenvector (l,v).

  In: matrix A[N][N], first eigenvalue l with eigenvector v[N]
  Out: second eigenvalue m with eigenvector u

  Returns 0 if successful. Content of matrix A is unchangd on return. Initial u[N] must not be zero.
*/
int EigPowerWielandt(int N, double& m, double* u, double l, double* v, double** A, double ep, int maxiter)
{
  int result;
  double** b=new double*[N-1]; b[0]=new double[(N-1)*(N-1)]; for (int i=1; i<N-1; i++) b[i]=&b[0][i*(N-1)];
  double* w=new double[N];
  int i=0; for (int j=1; j<N; j++) if (fabs(v[i])<fabs(v[j])) i=j;
  if (i!=0)
    for (int k=0; k<i; k++)
      for (int j=0; j<i; j++)
        b[k][j]=A[k][j]-v[k]*A[i][j]/v[i];
  if (i!=0 && i!=N-1)
    for (int k=i; k<N-1; k++)
      for (int j=0; j<i; j++)
        b[k][j]=A[k+1][j]-v[k+1]*A[i][j]/v[i], b[j][k]=A[j][k+1]-v[j]*A[i][k+1]/v[i];
  if (i!=N-1)
    for (int k=i; k<N-1; k++)
      for (int j=i; j<N-1; j++) b[k][j]=A[k+1][j+1]-v[k+1]*A[i][j+1]/v[i];
  memcpy(w, u, sizeof(double)*(N-1));
  if ((result=EigPower(N-1, m, w, b, ep, maxiter))==0)
  { //*
    if (i!=N-1) memmove(&w[i+1], &w[i], sizeof(double)*(N-i-1));
    w[i]=0;
    for (int k=0; k<N; k++) u[k]=(m-l)*w[k]+Inner(N, A[i], w)*v[k]/v[i];   //*/
  }
  delete[] w; delete[] b[0]; delete[] b;
  return result;
}//EigPowerWielandt

//---------------------------------------------------------------------------
//NR versions of eigensystem

/*
  function EigenValues: solves for eigenvalues of general system

  In: matrix A[N][N]
  Out: eigenvalues ev[N]

  Returns 0 if successful. Content of matrix A is destroyed on return.
*/
int EigenValues(int N, double** A, cdouble* ev)
{
  BalanceSim(N, A);
  Hessenb(N, A);
  return QR(N, A, ev);
}//EigenValues

/*
  function EigSym: Solves real symmetric eigensystem A

  In: matrix A[N][N]
  Out: eigenvalues d[N], transform matrix Q[N][N], so that diag(d)=Q'AQ, A=Q diag(d) Q', AQ=Q diag(d)

  Returns 0 if successful. Content of matrix A is unchanged on return.
*/
int EigSym(int N, double** A, double* d, double** Q)
{
  Copy(N, Q, A);
  double* t=new double[N];
  HouseHolder(5, Q, d, t);
  double result=QL(5, d, t, Q);
  delete[] t;
  return result;
}//EigSym

//---------------------------------------------------------------------------
/*
  function GEB: Gaussian elimination with backward substitution for solving linear system Ax=b.

  In: coefficient matrix A[N][N], vector b[N]
  Out: vector x[N]

  Returns 0 if successful. Contents of matrix A and vector b are destroyed on return.
*/
int GEB(int N, double* x, double** A, double* b)
{
  //Gaussian eliminating
  int c, p, *rp=new int[N]; for (int i=0; i<N; i++) rp[i]=i;
  double m;
  for (int i=0; i<N-1; i++)
  {
    p=i;
    while (p<N && A[rp[p]][i]==0) p++;
    if (p>=N) {delete[] rp; return 1;}
    if (p!=i){c=rp[i]; rp[i]=rp[p]; rp[p]=c;}
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[j]][i]/A[rp[i]][i];
      A[rp[j]][i]=0;
      for (int k=i+1; k<N; k++) A[rp[j]][k]-=m*A[rp[i]][k];
      b[rp[j]]-=m*b[rp[i]];
    }
  }
  if (A[rp[N-1]][N-1]==0) {delete[] rp; return 1;}
  else
  {
    //backward substitution
    x[N-1]=b[rp[N-1]]/A[rp[N-1]][N-1];
    for (int i=N-2; i>=0; i--)
    {
      x[i]=b[rp[i]]; for (int j=i+1; j<N; j++) x[i]-=A[rp[i]][j]*x[j]; x[i]/=A[rp[i]][i];
    }
  }
  delete[] rp;
  return 0;
}//GEB

//---------------------------------------------------------------------------
/*
  function GESCP: Gaussian elimination with scaled column pivoting for solving linear system Ax=b

  In: matrix A[N][N], vector b[N]
  Out: vector x[N]

  Returns 0 is successful. Contents of matrix A and vector b are destroyed on return.
*/
int GESCP(int N, double* x, double** A, double *b)
{
  int c, p, ip, *rp=new int[N];
  double m, *s=new double[N];
  for (int i=0; i<N; i++)
  {
    s[i]=fabs(A[i][0]);
    for (int j=1; j<N; j++) if (s[i]<fabs(A[i][j])) s[i]=fabs(A[i][j]);
    if (s[i]==0) {delete[] s; delete[] rp; return 1;}
    rp[i]=i;
  }
  //Gaussian eliminating
  for (int i=0; i<N-1; i++)
  {
    p=i, ip=i+1;
    while (ip<N){if (fabs(A[rp[ip]][i])/s[rp[ip]]>fabs(A[rp[p]][i])/s[rp[p]]) p=ip; ip++;}
    if (A[rp[p]][i]==0) {delete[] s; delete[] rp; return 1;}
    if (p!=i) {c=rp[i]; rp[i]=rp[p]; rp[p]=c;}
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[j]][i]/A[rp[i]][i];
      A[rp[j]][i]=0;
      for (int k=i+1; k<N; k++) A[rp[j]][k]-=m*A[rp[i]][k];
      b[rp[j]]-=m*b[rp[i]];
    }
  }
  if (A[rp[N-1]][N-1]==0) {delete[] s; delete[] rp; return 1;}
  //backward substitution
  x[N-1]=b[rp[N-1]]/A[rp[N-1]][N-1];
  for (int i=N-2; i>=0; i--)
  {
    x[i]=b[rp[i]]; for (int j=i+1; j<N; j++) x[i]-=A[rp[i]][j]*x[j]; x[i]/=A[rp[i]][i];
  }
  delete[] s; delete[] rp;
  return 0;
}//GESCP

//---------------------------------------------------------------------------
/*
  function GExL: solves linear system xL=a, L being lower-triangular. This is used in LU factorization
  for solving linear systems.

  In: lower-triangular matrix L[N][N], vector a[N]
  Out: vector x[N]

  No return value. Contents of matrix L and vector a are unchanged at return.
*/
void GExL(int N, double* x, double** L, double* a)
{
  for (int n=N-1; n>=0; n--)
  {
    double xn=a[n];
    for (int m=n+1; m<N; m++) xn-=x[m]*L[m][n];
    x[n]=xn/L[n][n];
  }
}//GExL

/*
  function GExLAdd: solves linear system *L=a, L being lower-triangular, and add the solution * to x[].

  In: lower-triangular matrix L[N][N], vector a[N]
  Out: updated vector x[N]

  No return value. Contents of matrix L and vector a are unchanged at return.
*/
void GExLAdd(int N, double* x, double** L, double* a)
{
  double* lx=new double[N];
  GExL(N, lx, L, a);
  for (int i=0; i<N; i++) x[i]+=lx[i];
  delete[] lx;
}//GExLAdd

/*
  function GExL1: solves linear system xL=(0, 0, ..., 0, a)', L being lower-triangular.

  In: lower-triangular matrix L[N][N], a
  Out: vector x[N]

  No return value. Contents of matrix L and vector a are unchanged at return.
*/
void GExL1(int N, double* x, double** L, double a)
{
  double xn=a;
  for (int n=N-1; n>=0; n--)
  {
    for (int m=n+1; m<N; m++) xn-=x[m]*L[m][n];
    x[n]=xn/L[n][n];
    xn=0;
  }
}//GExL1

/*
  function GExL1Add: solves linear system *L=(0, 0, ..., 0, a)', L being lower-triangular, and add the
  solution * to x[].

  In: lower-triangular matrix L[N][N], vector a
  Out: updated vector x[N]

  No return value. Contents of matrix L and vector a are unchanged at return.
*/
void GExL1Add(int N, double* x, double** L, double a)
{
  double* lx=new double[N];
  GExL1(N, lx, L, a);
  for (int i=0; i<N; i++) x[i]+=lx[i];
  delete[] lx;
}//GExL1Add

//---------------------------------------------------------------------------
/*
  function GICP: matrix inverse using Gaussian elimination with column pivoting: inv(A)->A.

  In: matrix A[N][N]
  Out: matrix A[N][N]

  Returns the determinant of the inverse matrix, 0 on failure.
*/
double GICP(int N, double** A)
{
  int c, p, ip, *rp=new int[N]; for (int i=0; i<N; i++) rp[i]=i;
  double m, result=1;

  //Gaussian eliminating
  for (int i=0; i<N-1; i++)
  {
    p=i, ip=i+1;
    while (ip<N){if (fabs(A[rp[ip]][i])>fabs(A[rp[p]][i])) p=ip; ip++;}
    if (A[rp[p]][i]==0) {delete[] rp; return 0;}
    if (p!=i) {c=rp[i]; rp[i]=rp[p]; rp[p]=c; result=-result;}
    result/=A[rp[i]][i];
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[j]][i]/A[rp[i]][i];
      A[rp[j]][i]=-m;
      for (int k=i+1; k<N; k++) A[rp[j]][k]-=m*A[rp[i]][k];
      for (int k=0; k<i; k++) A[rp[j]][k]-=m*A[rp[i]][k];
    }
  }
  if (A[rp[N-1]][N-1]==0) {delete[] rp; return 0;}
  result/=A[rp[N-1]][N-1];
  //backward substitution
  for (int i=0; i<N-1; i++)
  {
    m=A[rp[i]][i]; for (int k=0; k<N; k++) A[rp[i]][k]/=m; A[rp[i]][i]=1/m;
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[i]][j]/A[rp[j]][j]; for (int k=0; k<N; k++) A[rp[i]][k]-=A[rp[j]][k]*m; A[rp[i]][j]=-m;
    }
  }
  m=A[rp[N-1]][N-1]; for (int k=0; k<N-1; k++) A[rp[N-1]][k]/=m; A[rp[N-1]][N-1]=1/m;
  //recover column and row exchange
  double* tm=new double[N]; int sizeN=sizeof(double)*N;
  for (int i=0; i<N; i++) { for (int j=0; j<N; j++) tm[rp[j]]=A[i][j]; memcpy(A[i], tm, sizeN); }
  for (int j=0; j<N; j++) { for (int i=0; i<N; i++) tm[i]=A[rp[i]][j]; for (int i=0; i<N; i++) A[i][j]=tm[i];}

  delete[] tm; delete[] rp;
  return result;
}//GICP
//complex version
cdouble GICP(int N, cdouble** A)
{
  int c, p, ip, *rp=new int[N]; for (int i=0; i<N; i++) rp[i]=i;
  cdouble m, result=1;

  //Gaussian eliminating
  for (int i=0; i<N-1; i++)
  {
    p=i, ip=i+1;
    while (ip<N){if (~A[rp[ip]][i]>~A[rp[p]][i]) p=ip; ip++;}
    if (A[rp[p]][i]==0) {delete[] rp; return 0;}
    if (p!=i) {c=rp[i]; rp[i]=rp[p]; rp[p]=c; result=-result;}
    result=result/(A[rp[i]][i]);
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[j]][i]/A[rp[i]][i];
      A[rp[j]][i]=-m;
      for (int k=i+1; k<N; k++) A[rp[j]][k]-=m*A[rp[i]][k];
      for (int k=0; k<i; k++) A[rp[j]][k]-=m*A[rp[i]][k];
    }
  }
  if (A[rp[N-1]][N-1]==0) {delete[] rp; return 0;}
  result=result/A[rp[N-1]][N-1];
  //backward substitution
  for (int i=0; i<N-1; i++)
  {
    m=A[rp[i]][i]; for (int k=0; k<N; k++) A[rp[i]][k]=A[rp[i]][k]/m; A[rp[i]][i]=cdouble(1)/m;
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[i]][j]/A[rp[j]][j]; for (int k=0; k<N; k++) A[rp[i]][k]-=A[rp[j]][k]*m; A[rp[i]][j]=-m;
    }
  }
  m=A[rp[N-1]][N-1]; for (int k=0; k<N-1; k++) A[rp[N-1]][k]=A[rp[N-1]][k]/m; A[rp[N-1]][N-1]=cdouble(1)/m;
  //recover column and row exchange
  cdouble* tm=new cdouble[N]; int sizeN=sizeof(cdouble)*N;
  for (int i=0; i<N; i++) { for (int j=0; j<N; j++) tm[rp[j]]=A[i][j]; memcpy(A[i], tm, sizeN); }
  for (int j=0; j<N; j++) { for (int i=0; i<N; i++) tm[i]=A[rp[i]][j]; for (int i=0; i<N; i++) A[i][j]=tm[i];}

  delete[] tm; delete[] rp;
  return result;
}//GICP

/*
  function GICP: wrapper function that does not overwrite the input matrix: inv(A)->X.

  In: matrix A[N][N]
  Out: matrix X[N][N]

  Returns the determinant of the inverse matrix, 0 on failure.
*/
double GICP(int N, double** X, double** A)
{
  Copy(N, X, A);
  return GICP(N, X);
}//GICP

//---------------------------------------------------------------------------
/*
  function GILT: inv(lower trangular of A)->lower trangular of A

  In: matrix A[N][N]
  Out: matrix A[N][N]

  Returns the determinant of the lower trangular of A
*/
double GILT(int N, double** A)
{
  double result=1;
  A[0][0]=1/A[0][0];
  for (int i=1; i<N; i++)
  {
    result*=A[i][i];
    double tmp=1/A[i][i];
    for (int k=0; k<i; k++) A[i][k]*=tmp; A[i][i]=tmp;
    for (int j=0; j<i; j++)
    {
      double tmp2=A[i][j];
      for (int k=0; k<j; k++) A[i][k]-=A[j][k]*tmp2; A[i][j]=-A[j][j]*tmp2;
    }
  }
  return result;
}//GILT

/*
  function GIUT: inv(upper trangular of A)->upper trangular of A

  In: matrix A[N][N]
  Out: matrix A[N][N]

  Returns the determinant of the upper trangular of A
*/
double GIUT(int N, double** A)
{
  double result=1;
  A[0][0]=1/A[0][0];
  for (int i=1; i<N; i++)
  {
    result*=A[i][i];
    double tmp=1/A[i][i];
    for (int k=0; k<i; k++) A[k][i]*=tmp; A[i][i]=tmp;
    for (int j=0; j<i; j++)
    {
      double tmp2=A[j][i];
      for (int k=0; k<j; k++) A[k][i]-=A[k][j]*tmp2; A[j][i]=-A[j][j]*tmp2;
    }
  }
  return result;
}//GIUT

//---------------------------------------------------------------------------
/*
  function GISCP: matrix inverse using Gaussian elimination w. scaled column pivoting: inv(A)->A.

  In: matrix A[N][N]
  Out: matrix A[N][N]

  Returns the determinant of the inverse matrix, 0 on failure.
*/
double GISCP(int N, double** A)
{
  int c, p, ip, *rp=new int[N]; for (int i=0; i<N; i++) rp[i]=i;
  double m, result=1, *s=new double[N];

  for (int i=0; i<N; i++)
  {
    s[i]=A[i][0];
    for (int j=1; j<N; j++) if (fabs(s[i])<fabs(A[i][j])) s[i]=A[i][j];
    if (s[i]==0) {delete[] s; delete[] rp; return 0;}
    rp[i]=i;
  }

  //Gaussian eliminating
  for (int i=0; i<N-1; i++)
  {
    p=i, ip=i+1;
    while (ip<N){if (fabs(A[rp[ip]][i]/s[rp[ip]])>fabs(A[rp[p]][i]/s[rp[p]])) p=ip; ip++;}
    if (A[rp[p]][i]==0) {delete[] s; delete[] rp; return 0;}
    if (p!=i) {c=rp[i]; rp[i]=rp[p]; rp[p]=c; result=-result;}
    result/=A[rp[i]][i];
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[j]][i]/A[rp[i]][i];
      A[rp[j]][i]=-m;
      for (int k=i+1; k<N; k++) A[rp[j]][k]-=m*A[rp[i]][k];
      for (int k=0; k<i; k++) A[rp[j]][k]-=m*A[rp[i]][k];
    }
  }
  if (A[rp[N-1]][N-1]==0) {delete[] s; delete[] rp; return 0;}
  result/=A[rp[N-1]][N-1];
  //backward substitution
  for (int i=0; i<N-1; i++)
  {
    m=A[rp[i]][i]; for (int k=0; k<N; k++) A[rp[i]][k]/=m; A[rp[i]][i]=1/m;
    for (int j=i+1; j<N; j++)
    {
      m=A[rp[i]][j]/A[rp[j]][j]; for (int k=0; k<N; k++) A[rp[i]][k]-=A[rp[j]][k]*m; A[rp[i]][j]=-m;
    }
  }
  m=A[rp[N-1]][N-1]; for (int k=0; k<N-1; k++) A[rp[N-1]][k]/=m; A[rp[N-1]][N-1]=1/m;
  //recover column and row exchange
  double* tm=new double[N]; int sizeN=sizeof(double)*N;
  for (int i=0; i<N; i++) { for (int j=0; j<N; j++) tm[rp[j]]=A[i][j]; memcpy(A[i], tm, sizeN); }
  for (int j=0; j<N; j++) { for (int i=0; i<N; i++) tm[i]=A[rp[i]][j]; for (int i=0; i<N; i++) A[i][j]=tm[i];}

  delete[] tm; delete[] s; delete[] rp;
  return result;
}//GISCP

/*
  function GISCP: wrapper function that does not overwrite input matrix A: inv(A)->X.

  In: matrix A[N][N]
  Out: matrix X[N][N]

  Returns the determinant of the inverse matrix, 0 on failure.
*/
double GISCP(int N, double** X, double** A)
{
  Copy(N, X, A);
  return GISCP(N, X);
}//GISCP

//---------------------------------------------------------------------------
/*
  function GSI: Gaussian-Seidel iterative algorithm for solving linear system Ax=b. Breaks down if any
  Aii=0, like the Jocobi method JI(...).

  Gaussian-Seidel iteration is x(k)=(D-L)^(-1)(Ux(k-1)+b), where D is diagonal, L is lower triangular,
  U is upper triangular and A=L+D+U.

  In: matrix A[N][N], vector b[N], initial vector x0[N]
  Out: vector x0[N]

  Returns 0 is successful. Contents of matrix A and vector b remain unchanged on return.
*/
int GSI(int N, double* x0, double** A, double* b, double ep, int maxiter)
{
  double e, *x=new double[N];
  int k=0, sizeN=sizeof(double)*N;
  while (k<maxiter)
  {
    for (int i=0; i<N; i++)
    {
      x[i]=b[i];
      for (int j=0; j<i; j++) x[i]-=A[i][j]*x[j];
      for (int j=i+1; j<N; j++) x[i]-=A[i][j]*x0[j];
      x[i]/=A[i][i];
    }
    e=0; for (int j=0; j<N; j++) e+=fabs(x[j]-x0[j]);
    memcpy(x0, x, sizeN);
    if (e<ep) break;
    k++;
  }
  delete[] x;
  if (k>=maxiter) return 1;
  return 0;
}//GSI

//---------------------------------------------------------------------------
/*
  function Hessenb: reducing a square matrix A to upper Hessenberg form

  In: matrix A[N][N]
  Out: matrix A[N][N], in upper Hessenberg form

  No return value.
*/
void Hessenb(int N, double** A)
{
  double x, y;
  for (int m=1; m<N-1; m++)
  {
    x=0;
    int i=m;
    for (int j=m; j<N; j++)
    {
      if (fabs(A[j][m-1]) > fabs(x))
      {
        x=A[j][m-1];
        i=j;
      }
    }
    if (i!=m)
    {
      for (int j=m-1; j<N; j++)
      {
        double tmp=A[i][j];
        A[i][j]=A[m][j];
        A[m][j]=tmp;
      }
      for (int j=0; j<N; j++)
      {
        double tmp=A[j][i];
        A[j][i]=A[j][m];
        A[j][m]=tmp;
      }
    }
    if (x!=0)
    {
      for (i=m+1; i<N; i++)
      {
        if ((y=A[i][m-1])!=0)
        {
          y/=x;
          A[i][m-1]=0;
          for (int j=m; j<N; j++) A[i][j]-=y*A[m][j];
          for (int j=0; j<N; j++) A[j][m]+=y*A[j][i];
        }
      }
    }
  }
}//Hessenb

//---------------------------------------------------------------------------
/*
  function HouseHolder: house holder method converting a symmetric matrix into a tridiagonal symmetric
  matrix, or a non-symmetric matrix into an upper-Hessenberg matrix, using similarity transformation.

  In: matrix A[N][N]
  Out: matrix A[N][N] after transformation

  No return value.
*/
void HouseHolder(int N, double** A)
{
  double q, alf, prod, r2, *v=new double[N], *u=new double[N], *z=new double[N];
  for (int k=0; k<N-2; k++)
  {
    q=Inner(N-1-k, &A[k][k+1], &A[k][k+1]);

    if (A[k][k+1]==0) alf=sqrt(q);
    else alf=-sqrt(q)*A[k+1][k]/fabs(A[k+1][k]);

    r2=alf*(alf-A[k+1][k]);

    v[k]=0; v[k+1]=A[k][k+1]-alf;
    memcpy(&v[k+2], &A[k][k+2], sizeof(double)*(N-k-2));

    for (int j=k; j<N; j++) u[j]=Inner(N-1-k, &A[j][k+1], &v[k+1])/r2;

    prod=Inner(N-1-k, &v[k+1], &u[k+1]);

    MultiAdd(N-k, &z[k], &u[k], &v[k], -prod/2/r2);

    for (int l=k+1; l<N-1; l++)
    {
      for (int j=l+1; j<N; j++) A[l][j]=A[j][l]=A[j][l]-v[l]*z[j]-v[j]*z[l];
      A[l][l]=A[l][l]-2*v[l]*z[l];
    }

    A[N-1][N-1]=A[N-1][N-1]-2*v[N-1]*z[N-1];

    for (int j=k+2; j<N; j++) A[k][j]=A[j][k]=0;

    A[k][k+1]=A[k+1][k]=A[k+1][k]-v[k+1]*z[k];
  }
  delete[] u; delete[] v; delete[] z;
}//HouseHolder

/*
  function HouseHolder: house holder transformation T=Q'AQ or A=QTQ', where T is tridiagonal and Q is
  unitary i.e. QQ'=I.

  In: matrix A[N][N]
  Out: matrix tridiagonal matrix T[N][N] and unitary matrix Q[N][N]

  No return value. Identical A and T allowed. Content of matrix A is unchanged if A!=T.
*/
void HouseHolder(int N, double** T, double** Q, double** A)
{
  double g, alf, prod, r2, *v=new double[N], *u=new double[N], *z=new double[N];
  int sizeN=sizeof(double)*N;
  if (T!=A) for (int i=0; i<N; i++) memcpy(T[i], A[i], sizeN);
  for (int i=0; i<N; i++) {memset(Q[i], 0, sizeN); Q[i][i]=1;}
  for (int k=0; k<N-2; k++)
  {
    g=Inner(N-1-k, &T[k][k+1], &T[k][k+1]);

    if (T[k][k+1]==0) alf=sqrt(g);
    else alf=-sqrt(g)*T[k+1][k]/fabs(T[k+1][k]);

    r2=alf*(alf-T[k+1][k]);

    v[k]=0; v[k+1]=T[k][k+1]-alf;
    memcpy(&v[k+2], &T[k][k+2], sizeof(double)*(N-k-2));

    for (int j=k; j<N; j++) u[j]=Inner(N-1-k, &T[j][k+1], &v[k+1])/r2;

    prod=Inner(N-1-k, &v[k+1], &u[k+1]);

    MultiAdd(N-k, &z[k], &u[k], &v[k], -prod/2/r2);

    for (int l=k+1; l<N-1; l++)
    {
      for (int j=l+1; j<N; j++) T[l][j]=T[j][l]=T[j][l]-v[l]*z[j]-v[j]*z[l];
      T[l][l]=T[l][l]-2*v[l]*z[l];
    }

    T[N-1][N-1]=T[N-1][N-1]-2*v[N-1]*z[N-1];

    for (int j=k+2; j<N; j++) T[k][j]=T[j][k]=0;

    T[k][k+1]=T[k+1][k]=T[k+1][k]-v[k+1]*z[k];

    for (int i=0; i<N; i++)
      MultiAdd(N-k, &Q[i][k], &Q[i][k], &v[k], -Inner(N-k, &Q[i][k], &v[k])/r2);
  }
  delete[] u; delete[] v; delete[] z;
}//HouseHolder

/*
  function HouseHolder: nr version of householder method for transforming symmetric matrix A to QTQ',
  where T is tridiagonal and Q is orthonormal.

  In: matrix A[N][N]
  Out: A[N][N]: now containing Q
       d[N]: containing diagonal elements of T
       sd[N]: containing subdiagonal elements of T as sd[1:N-1].

  No return value.
*/
void HouseHolder(int N, double **A, double* d, double* sd)
{
  for (int i=N-1; i>=1; i--)
  {
    int l=i-1;
    double h=0, scale=0;
    if (l>0)
    {
      for (int k=0; k<=l; k++) scale+=fabs(A[i][k]);
      if (scale==0.0) sd[i]=A[i][l];
      else
      {
        for (int k=0; k<=l; k++)
        {
          A[i][k]/=scale;
          h+=A[i][k]*A[i][k];
        }
        double f=A[i][l];
        double g=(f>=0?-sqrt(h): sqrt(h));
        sd[i]=scale*g;
        h-=f*g;
        A[i][l]=f-g;
        f=0;
        for (int j=0; j<=l; j++)
        {
          A[j][i]=A[i][j]/h;
          g=0;
          for (int k=0; k<=j; k++) g+=A[j][k]*A[i][k];
          for (int k=j+1; k<=l; k++) g+=A[k][j]*A[i][k];
          sd[j]=g/h;
          f+=sd[j]*A[i][j];
        }
        double hh=f/(h+h);
        for (int j=0; j<=l; j++)
        {
          f=A[i][j];
          sd[j]=g=sd[j]-hh*f;
          for (int k=0; k<=j; k++) A[j][k]-=(f*sd[k]+g*A[i][k]);
        }
      }
    }
    else
      sd[i]=A[i][l];
    d[i]=h;
  }

  d[0]=sd[0]=0;

  for (int i=0; i<N; i++)
  {
    int l=i-1;
    if (d[i])
    {
      for (int j=0; j<=l; j++)
      {
        double g=0.0;
        for (int k=0; k<=l; k++) g+=A[i][k]*A[k][j];
        for (int k=0; k<=l; k++) A[k][j]-=g*A[k][i];
      }
    }
    d[i]=A[i][i];
    A[i][i]=1.0;
    for (int j=0; j<=l; j++) A[j][i]=A[i][j]=0.0;
  }
}//HouseHolder

//---------------------------------------------------------------------------
/*
  function Inner: inner product z=y'x

  In: vectors x[N], y[N]

  Returns inner product of x and y.
*/
double Inner(int N, double* x, double* y)
{
  double result=0;
  for (int i=0; i<N; i++) result+=x[i]*y[i];
  return result;
}//Inner
//complex versions
cdouble Inner(int N, double* x, cdouble* y)
{
  cdouble result=0;
  for (int i=0; i<N; i++) result+=x[i]**y[i];
  return result;
}//Inner
cdouble Inner(int N, cdouble* x, cdouble* y)
{
  cdouble result=0;
  for (int i=0; i<N; i++) result+=x[i]^y[i];
  return result;
}//Inner
cdouble Inner(int N, cfloat* x, cdouble* y)
{
  cdouble result=0;
  for (int i=0; i<N; i++) result+=x[i]^y[i];
  return result;
}//Inner
cfloat Inner(int N, cfloat* x, cfloat* y)
{
  cfloat result=0;
  for (int i=0; i<N; i++) result+=x[i]^y[i];
  return result;
}//Inner

/*
  function Inner: inner product z=tr(Y'X)

  In: matrices X[M][N], Y[M][N]

  Returns inner product of X and Y.
*/
double Inner(int M, int N, double** X, double** Y)
{
	double result=0;
  for (int m=0; m<M; m++) for (int n=0; n<N; n++) result+=X[m][n]*Y[m][n];
	return result;
}//Inner

//---------------------------------------------------------------------------
/*
  function JI: Jacobi interative algorithm for solving linear system Ax=b Breaks down if A[i][i]=0 for
  any i. Reorder A so that this does not happen.

  Jacobi iteration is x(k)=D^(-1)((L+U)x(k-1)+b), D is diagonal, L is lower triangular, U is upper
  triangular and A=L+D+U.

  In: matrix A[N][N], vector b[N], initial vector x0[N]
  Out: vector x0[N]

  Returns 0 if successful. Contents of matrix A and vector b are unchanged on return.
*/
int JI(int N, double* x0, double** A, double* b, double ep, int maxiter)
{
  double e, *x=new double[N];
  int k=0, sizeN=sizeof(double)*N;
  while (k<maxiter)
  {
    for (int i=0; i<N; i++)
    {
      x[i]=b[i]; for (int j=0; j<N; j++) if (j!=i) x[i]-=A[i][j]*x0[j]; x[i]=x[i]/A[i][i];
    }
    e=0; for (int j=0; j<N; j++) e+=fabs(x[j]-x0[j]); //inf-norm used here
    memcpy(x0, x, sizeN);
    if (e<ep) break;
    k++;
  }
  delete[] x;
  if (k>=maxiter) return 1;
  else return 0;
}//JI

//---------------------------------------------------------------------------
/*
  function LDL: LDL' decomposition A=LDL', where L is lower triangular and D is diagonal identical l and
  a allowed.

  The symmetric matrix A is positive definite iff A can be factorized as LDL', where L is lower
  triangular with ones on its diagonal and D is diagonal with positive diagonal entries.

  If a symmetric matrix A can be reduced by Gaussian elimination without row interchanges, then it can
  be factored into LDL', where L is lower triangular with ones on its diagonal and D is diagonal with
  non-zero diagonal entries.

  In: matrix A[N][N]
  Out: lower triangular matrix L[N][N], vector d[N] containing diagonal elements of D

  Returns 0 if successful. Content of matrix A is unchanged on return.
*/
int LDL(int N, double** L, double* d, double** A)
{
  double* v=new double[N];

  if (A[0][0]==0) {delete[] v; return 1;}
  d[0]=A[0][0]; for (int j=1; j<N; j++) L[j][0]=A[j][0]/d[0];
  for (int i=1; i<N; i++)
  {
    for (int j=0; j<i; j++) v[j]=L[i][j]*d[j];
    d[i]=A[i][i]; for (int j=0; j<i; j++) d[i]-=L[i][j]*v[j];
    if (d[i]==0) {delete[] v; return 1;}
    for (int j=i+1; j<N; j++)
    {
      L[j][i]=A[j][i]; for (int k=0; k<i; k++) L[j][i]-=L[j][k]*v[k]; L[j][i]/=d[i];
    }
  }
  delete[] v;

  for (int i=0; i<N; i++) {L[i][i]=1; memset(&L[i][i+1], 0, sizeof(double)*(N-1-i));}
  return 0;
}//LDL

//---------------------------------------------------------------------------
/*
  function LQ_GS: LQ decomposition using Gram-Schmidt method

  In: matrix A[M][N], M<=N
  Out: matrices L[M][M], Q[M][N]

  No return value.
*/
void LQ_GS(int M, int N, double** A, double** L, double** Q)
{
  double *u=new double[N];
  for (int m=0; m<M; m++)
  {
    memset(L[m], 0, sizeof(double)*M);
    memcpy(u, A[m], sizeof(double)*N);
    for (int k=0; k<m; k++)
    {
      double ip=0; for (int n=0; n<N; n++) ip+=Q[k][n]*u[n];
      for (int n=0; n<N; n++) u[n]-=ip*Q[k][n];
      L[m][k]=ip;
    }
    double iu=0; for (int n=0; n<N; n++) iu+=u[n]*u[n]; iu=sqrt(iu);
    L[m][m]=iu; iu=1.0/iu;
    for (int n=0; n<N; n++) Q[m][n]=u[n]*iu;
  }
  delete[] u;
}//LQ_GS

//---------------------------------------------------------------------------
/*
  function LSLinear2: 2-dtage LS solution of A[M][N]x[N][1]=y[M][1], M>=N. Use of this function requires
  the submatrix A[N][N] be invertible.

  In: matrix A[M][N], vector y[M], M>=N.
  Out: vector x[N].

  No return value. Contents of matrix A and vector y are unchanged on return.
*/
void LSLinear2(int M, int N, double* x, double** A, double* y)
{
  double** A1=Copy(N, N, 0, A);
  LU(N, x, A1, y);
  if (M>N)
  {
    double** B=&A[N];
    double* Del=MultiplyXy(M-N, N, B, x);
    MultiAdd(M-N, Del, Del, &y[N], -1);
    double** A2=MultiplyXtX(N, N, A);
    MultiplyXtX(N, M-N, A1, B);
    MultiAdd(N, N, A2, A2, A1, 1);
    double* b2=MultiplyXty(N, M-N, B, Del);
    double* dx=new double[N];
    GESCP(N, dx, A2, b2);
    MultiAdd(N, x, x, dx, -1);
    delete[] dx;
    delete[] Del;
    delete[] b2;
    DeAlloc2(A2);
  }
  DeAlloc2(A1);
}//LSLinear2

//---------------------------------------------------------------------------
/*
  function LU: LU decomposition A=LU, where L is lower triangular with diagonal entries 1 and U is upper
  triangular.

  LU is possible if A can be reduced by Gaussian elimination without row interchanges.

  In: matrix A[N][N]
  Out: matrices L[N][N] and U[N][N], subject to input values of L and U:
        if L euqals NULL, L is not returned
        if U equals NULL or A, U is returned in A, s.t. A is modified
        if L equals A, L is returned in A, s.t. A is modified
        if L equals U, L and U are returned in the same matrix
        when L and U are returned in the same matrix, diagonal of L (all 1) is not returned

  Returns 0 if successful.
*/
int LU(int N, double** L, double** U, double** A)
{
  double* diagl=new double[N];
  for (int i=0; i<N; i++) diagl[i]=1;

  int sizeN=sizeof(double)*N;
  if (U==0) U=A;
  if (U!=A) for (int i=0; i<N; i++) memcpy(U[i], A[i], sizeN);
  int result=LU_Direct(0, N, diagl, U);
  if (result==0)
  {
    if (L!=U)
    {
      if (L!=0) for (int i=0; i<N; i++) {memcpy(L[i], U[i], sizeof(double)*i); L[i][i]=1; memset(&L[i][i+1], 0, sizeof(double)*(N-i-1));}
      for (int i=1; i<N; i++) memset(U[i], 0, sizeof(double)*i);
    }
  }
  delete[] diagl;
  return result;
}//LU

/*
  function LU: Solving linear system Ax=y by LU factorization

  In: matrix A[N][N], vector y[N]
  Out: x[N]

  No return value. On return A contains its LU factorization (with pivoting, diag mode 1), y remains
  unchanged.
*/
void LU(int N, double* x, double** A, double* y, int* ind)
{
  int parity;
  bool allocind=!ind;
  if (allocind) ind=new int[N];
  LUCP(A, N, ind, parity, 1);
  for (int i=0; i<N; i++) x[i]=y[ind[i]];
  for (int i=0; i<N; i++)
  {
    for (int j=i+1; j<N; j++) x[j]=x[j]-x[i]*A[j][i];
  }
  for (int i=N-1; i>=0; i--)
  {
    x[i]/=A[i][i];
    for (int j=0; j<i; j++) x[j]=x[j]-x[i]*A[j][i];
  }
  if (allocind) delete[] ind;
}//LU

//---------------------------------------------------------------------------
/*
  LU_DiagL shows the original procedure for calculating A=LU in separate buffers substitute l and u by a
  gives the stand-still method LU_Direct().
*//*
void LU_DiagL(int N, double** l, double* diagl, double** u, double** a)
{
  l[0][0]=diagl[0]; u[0][0]=a[0][0]/l[0][0]; //here to signal failure if l[00]u[00]=0
  for (int j=1; j<N; j++) u[0][j]=a[0][j]/l[0][0], l[j][0]=a[j][0]/u[0][0];
  memset(&l[0][1], 0, sizeof(double)*(N-1));
  for (int i=1; i<N-1; i++)
  {
    l[i][i]=diagl[i];
    u[i][i]=a[i][i]; for (int k=0; k<i; k++) u[i][i]-=l[i][k]*u[k][i]; u[i][i]/=l[i][i]; //here to signal failure if l[ii]u[ii]=0
    for (int j=i+1; j<N; j++)
    {
      u[i][j]=a[i][j]; for (int k=0; k<i; k++) u[i][j]-=l[i][k]*u[k][j]; u[i][j]/=l[i][i];
      l[j][i]=a[j][i]; for (int k=0; k<i; k++) l[j][i]-=l[j][k]*u[k][i]; l[j][i]/=u[i][i];
    }
    memset(&l[i][i+1], 0, sizeof(double)*(N-1-i)), memset(u[i], 0, sizeof(double)*i);
  }
  l[N-1][N-1]=diagl[N-1];
  u[N-1][N-1]=a[N-1][N-1]; for (int k=0; k<N-1; k++) u[N-1][N-1]-=l[N-1][k]*u[k][N-1]; u[N-1][N-1]/=l[N-1][N-1];
  memset(u[N-1], 0, sizeof(double)*(N-1));
} //LU_DiagL*/

//---------------------------------------------------------------------------
/*
  function LU_Direct: LU factorization A=LU.

  In: matrix A[N][N], vector diag[N] specifying main diagonal of L or U, according to mode (0=LDiag,
        1=UDiag).
  Out: matrix A[N][N] now containing L and U.

  Returns 0 if successful.
*/
int LU_Direct(int mode, int N, double* diag, double** A)
{
  if (mode==0)
  {
    if (A[0][0]==0) return 1;
    A[0][0]=A[0][0]/diag[0];
    for (int j=1; j<N; j++) A[0][j]=A[0][j]/diag[0], A[j][0]=A[j][0]/A[0][0];
    for (int i=1; i<N-1; i++)
    {
      for (int k=0; k<i; k++) A[i][i]-=A[i][k]*A[k][i]; A[i][i]/=diag[i];
      if (A[i][i]==0) return 2;
      for (int j=i+1; j<N; j++)
      {
        for (int k=0; k<i; k++) A[i][j]-=A[i][k]*A[k][j]; A[i][j]/=diag[i];
        for (int k=0; k<i; k++) A[j][i]-=A[j][k]*A[k][i]; A[j][i]/=A[i][i];
      }
    }
    for (int k=0; k<N-1; k++) A[N-1][N-1]-=A[N-1][k]*A[k][N-1]; A[N-1][N-1]/=diag[N-1];
  }
  else if (mode==1)
  {
    A[0][0]=A[0][0]/diag[0];
    if (A[0][0]==0) return 1;
    for (int j=1; j<N; j++) A[0][j]=A[0][j]/A[0][0], A[j][0]=A[j][0]/diag[0];
    for (int i=1; i<N-1; i++)
    {
      for (int k=0; k<i; k++) A[i][i]-=A[i][k]*A[k][i]; A[i][i]/=diag[i];
      if (A[i][i]==0) return 2;
      for (int j=i+1; j<N; j++)
      {
        for (int k=0; k<i; k++) A[i][j]-=A[i][k]*A[k][j]; A[i][j]/=A[i][i];
        for (int k=0; k<i; k++) A[j][i]-=A[j][k]*A[k][i]; A[j][i]/=diag[i];
      }
    }
    for (int k=0; k<N-1; k++) A[N-1][N-1]-=A[N-1][k]*A[k][N-1]; A[N-1][N-1]/=diag[N-1];
  }
  return 0;
}//LU_Direct

//---------------------------------------------------------------------------
/*
  function LU_PD: LU factorization for pentadiagonal A=LU

  In: pentadiagonal matrix A[N][N] stored in a compact format, i.e. A[i][j]->b[i-j, j]
        the main diagonal is b[0][0]~b[0][N-1]
        the 1st upper subdiagonal is b[-1][1]~b[-1][N-1]
        the 2nd upper subdiagonal is b[-2][2]~b[-2][N-1]
        the 1st lower subdiagonal is b[1][0]~b[1][N-2]
        the 2nd lower subdiagonal is b[2][0]~b[2][N-3]

  Out: L[N][N] and U[N][N], main diagonal of L being all 1 (probably), stored in a compact format in
        b[-2:2][N].

  Returns 0 if successful.
*/
int LU_PD(int N, double** b)
{
  if (b[0][0]==0) return 1;
  b[1][0]/=b[0][0], b[2][0]/=b[0][0];

    //i=1, not to double b[*][i-2], b[-2][i]
    b[0][1]-=b[1][0]*b[-1][1];
    if (b[0][1]==0) return 2;
    b[-1][2]-=b[1][0]*b[-2][2];
    b[1][1]-=b[2][0]*b[-1][1];
    b[1][1]/=b[0][1];
    b[2][1]/=b[0][1];

  for (int i=2; i<N-2; i++)
  {
    b[0][i]-=b[2][i-2]*b[-2][i];
    b[0][i]-=b[1][i-1]*b[-1][i];
    if (b[0][i]==0) return 2;
    b[-1][i+1]-=b[1][i-1]*b[-2][i+1];
    b[1][i]-=b[2][i-1]*b[-1][i];
    b[1][i]/=b[0][i];
    b[2][i]/=b[0][i];
  }
    //i=N-2, not to tough b[2][i]
    b[0][N-2]-=b[2][N-4]*b[-2][N-2];
    b[0][N-2]-=b[1][N-3]*b[-1][N-2];
    if (b[0][N-2]==0) return 2;
    b[-1][N-1]-=b[1][N-3]*b[-2][N-1];
    b[1][N-2]-=b[2][N-3]*b[-1][N-2];
    b[1][N-2]/=b[0][N-2];

  b[0][N-1]-=b[2][N-3]*b[-2][N-1];
  b[0][N-1]-=b[1][N-2]*b[-1][N-1];
  return 0;
}//LU_PD

/*
  This old version is kept here as a reference.
*//*
int LU_PD(int N, double** b)
{
  if (b[0][0]==0) return 1;
  for (int j=1; j<3; j++) b[j][0]=b[j][0]/b[0][0];
  for (int i=1; i<N-1; i++)
  {
    for (int k=i-2; k<i; k++) b[0][i]-=b[i-k][k]*b[k-i][i];
    if (b[0][i]==0) return 2;
    for (int j=i+1; j<i+3; j++)
    {
      for (int k=j-2; k<i; k++) b[i-j][j]-=b[i-k][k]*b[k-j][j];
      for (int k=j-2; k<i; k++) b[j-i][i]-=b[j-k][k]*b[k-i][i];
      b[j-i][i]/=b[0][i];
    }
  }
  for (int k=N-3; k<N-1; k++) b[0][N-1]-=b[N-1-k][k]*b[k-N+1][N-1];
  return 0;
}//LU_PD*/

/*
  function LU_PD: solve pentadiagonal system Ax=c

  In: pentadiagonal matrix A[N][N] stored in a compact format in b[-2:2][N], vector c[N]
  Out: vector c now containing x.

  Returns 0 if successful. On return b is in the LU form.
*/
int LU_PD(int N, double** b, double* c)
{
  int result=LU_PD(N, b);
  if (result==0)
  {
    //L loop
    c[1]=c[1]-b[1][0]*c[0];
    for (int i=2; i<N; i++)
      c[i]=c[i]-b[1][i-1]*c[i-1]-b[2][i-2]*c[i-2];
    //U loop
    c[N-1]/=b[0][N-1];
    c[N-2]=(c[N-2]-b[-1][N-1]*c[N-1])/b[0][N-2];
    for (int i=N-3; i>=0; i--)
      c[i]=(c[i]-b[-1][i+1]*c[i+1]-b[-2][i+2]*c[i+2])/b[0][i];
  }
  return result;
}//LU_PD

//---------------------------------------------------------------------------
/*
  function LUCP: LU decomposition A=LU with column pivoting

  In: matrix A[N][N]
  Out: matrix A[N][N] now holding L and U by L_U[i][j]=A[ind[i]][j], where L_U
        hosts L and U according to mode:
        mode=0: L diag=abs(U diag), U diag as return
        mode=1: L diag=1, U diag as return
        mode=2: U diag=1, L diag as return

  Returns the determinant of A.
*/
double LUCP(double **A, int N, int *ind, int &parity, int mode)
{
  double det=1;
  parity=1;

  for (int i=0; i<N; i++) ind[i]=i;
  double vmax, *norm=new double[N]; //norm[n] is the maxima of row n
  for (int i=0; i<N; i++)
  {
    vmax=fabs(A[i][0]);
    double tmp;
    for (int j=1; j<N; j++) if ((tmp=fabs(A[i][j]))>vmax) vmax=tmp;
    if (vmax==0) { parity=0; goto deletenorm; } //det=0 at this point
    norm[i]=1/vmax;
  }

  int maxind;
  for (int j=0; j<N; j++)
  { //Column j
    for (int i=0; i<j; i++)
    {
      //row i, i<j
      double tmp=A[i][j];
      for (int k=0; k<i; k++) tmp-=A[i][k]*A[k][j];
      A[i][j]=tmp;
    }
    for (int i=j; i<N; i++)
    {
      //row i, i>=j
      double tmp=A[i][j]; for (int k=0; k<j; k++) tmp-=A[i][k]*A[k][j]; A[i][j]=tmp;
      double tmp2=norm[i]*fabs(tmp);
      if (i==j || tmp2>=vmax) maxind=i, vmax=tmp2;
    }
    if (vmax==0) { parity=0; goto deletenorm; } //pivot being zero
    if (j!=maxind)
    {
      //do column pivoting: switching rows
      for (int k=0; k<N; k++) { double tmp=A[maxind][k]; A[maxind][k]=A[j][k]; A[j][k]=tmp; }
      parity=-parity;
      norm[maxind]=norm[j];
    }
    int itmp=ind[j]; ind[j]=ind[maxind]; ind[maxind]=itmp;
    if (j!=N-1)
    {
      double den=1/A[j][j];
      for (int i=j+1; i<N; i++) A[i][j]*=den;
    }
    det*=A[j][j];
  } //Go back for the next column in the reduction.

  if (mode==0)
  {
    for (int i=0; i<N-1; i++)
    {
      double den=sqrt(fabs(A[i][i]));
      double iden=1/den;
      for (int j=i+1; j<N; j++) A[j][i]*=den, A[i][j]*=iden;
      A[i][i]*=iden;
    }
    A[N-1][N-1]/=sqrt(fabs(A[N-1][N-1]));
  }
  else if (mode==2)
  {
    for (int i=0; i<N-1; i++)
    {
      double den=A[i][i];
      double iden=1/den;
      for (int j=i+1; j<N; j++) A[j][i]*=den, A[i][j]*=iden;
    }
  }

deletenorm:
  delete[] norm;
  return det*parity;
}//LUCP

//---------------------------------------------------------------------------
/*
  function maxind: returns the index of the maximal value of data[from:(to-1)].

  In: vector data containing at least $to entries.
  Out: the index to the maximal entry of data[from:(to-1)]

  Returns the index to the maximal value.
*/
int maxind(double* data, int from, int to)
{
  int result=from;
  for (int i=from+1; i<to; i++) if (data[result]<data[i]) result=i;
  return result;
}//maxind

//---------------------------------------------------------------------------
/*
  macro Multiply_vect: matrix-vector multiplications

  Each expansion of this macro implements two functions named $MULTIPLY that do matrix-vector
  multiplication. Functions are named after their exact functions. For example, MultiplyXty() does
  multiplication of the transpose of matrix X with vector y, where postfix "t" attched to Y stands for
  transpose. Likewise, the postfix "c" stands for conjugate, and "h" stnads for Hermitian (conjugate
  transpose).

  Two dimension arguments are needed by each function. The first of the two is the number of entries to
  the output vector; the second of the two is the "other" dimension of the matrix multiplier.
*/
#define Multiply_vect(MULTIPLY, DbZ, DbX, DbY, xx, yy) \
  DbZ* MULTIPLY(int M, int N, DbZ* z, DbX* x, DbY* y, MList* List) \
  { \
    if (!z){z=new DbZ[M]; if (List) List->Add(z, 1);} \
    for (int m=0; m<M; m++){z[m]=0; for (int n=0; n<N; n++) z[m]+=xx*yy;} \
    return z; \
  } \
  DbZ* MULTIPLY(int M, int N, DbX* x, DbY* y, MList* List) \
  { \
    DbZ* z=new DbZ[M]; if (List) List->Add(z, 1); \
    for (int m=0; m<M; m++){z[m]=0; for (int n=0; n<N; n++) z[m]+=xx*yy;} \
    return z; \
  }
//function MultiplyXy: z[M]=x[M][N]y[N], identical z and y NOT ALLOWED
Multiply_vect(MultiplyXy, double, double*, double, x[m][n], y[n])
Multiply_vect(MultiplyXy, cdouble, cdouble*, cdouble, x[m][n], y[n])
Multiply_vect(MultiplyXy, cdouble, double*, cdouble, x[m][n], y[n])
//function MultiplyxY: z[M]=x[N]y[N][M], identical z and x NOT ALLOWED
Multiply_vect(MultiplyxY, double, double, double*, x[n], y[n][m])
Multiply_vect(MultiplyxY, cdouble, cdouble, cdouble*, x[n], y[n][m])
//function MultiplyXty: z[M]=xt[M][N]y[N]
Multiply_vect(MultiplyXty, double, double*, double, x[n][m], y[n])
Multiply_vect(MultiplyXty, cdouble, cdouble*, cdouble, x[n][m], y[n])
//function MultiplyXhy: z[M]=xh[M][N]y[N]
Multiply_vect(MultiplyXhy, cdouble, cdouble*, cdouble, *x[n][m], y[n])
//function MultiplyxYt: z[M]=x[N]yt[N][M]
Multiply_vect(MultiplyxYt, double, double, double*, x[n], y[m][n])
//function MultiplyXcy: z[M]=(x*)[M][N]y[N]
Multiply_vect(MultiplyXcy, cdouble, cdouble*, cdouble, *x[m][n], y[n])
Multiply_vect(MultiplyXcy, cdouble, cdouble*, cfloat, *x[m][n], y[n])

//---------------------------------------------------------------------------
/*
  function Norm1: L-1 norm of a square matrix A

  In: matrix A[N][N]
  Out: its L-1 norm

  Returns the L-1 norm.
*/
double Norm1(int N, double** A)
{
  double result=0, norm;
  for (int i=0; i<N; i++)
  {
    norm=0; for (int j=0; j<N; j++) norm+=fabs(A[i][j]);
    if (result<norm) result=norm;
  }
  return result;
}//Norm1

//---------------------------------------------------------------------------
/*
  function QL: QL method for solving tridiagonal symmetric matrix eigenvalue problem.

  In: A[N][N]: tridiagonal symmetric matrix stored in d[N] and sd[] arranged so that d[0:n-1] contains
        the diagonal elements of A, sd[0]=0, sd[1:n-1] contains the subdiagonal elements of A.
      z[N][N]: pre-transform matrix z[N][N] compatible with HouseHolder() routine.
  Out: d[N]: the eigenvalues of A
       z[N][N] the eigenvectors of A.

  Returns 0 if successful. sd[] should have storage for at least N+1 entries.
*/
int QL(int N, double* d, double* sd, double** z)
{
  const int maxiter=30;
  for (int i=1; i<N; i++) sd[i-1]=sd[i];
  sd[N]=0.0;
  for (int l=0; l<N; l++)
  {
    int iter=0, m;
    do
    {
      for (m=l; m<N-1; m++)
      {
        double dd=fabs(d[m])+fabs(d[m+1]);
        if (fabs(sd[m])+dd==dd) break;
      }
      if (m!=l)
      {
        iter++;
        if (iter>=maxiter) return 1;
        double g=(d[l+1]-d[l])/(2*sd[l]);
        double r=sqrt(g*g+1);
        g=d[m]-d[l]+sd[l]/(g+(g>=0?r:-r));
        double s=1, c=1, p=0;
        int i;
        for (i=m-1; i>=l; i--)
        {
          double f=s*sd[i], b=c*sd[i];
          sd[i+1]=(r=sqrt(f*f+g*g));
          if (r==0)
          {
            d[i+1]-=p;
            sd[m]=0;
            break;
          }
          s=f/r, c=g/r;
          g=d[i+1]-p;
          r=(d[i]-g)*s+2.0*c*b;
          p=s*r;
          d[i+1]=g+p;
          g=c*r-b;
          for (int k=0; k<N; k++)
          {
            f=z[k][i+1];
            z[k][i+1]=s*z[k][i]+c*f;
            z[k][i]=c*z[k][i]-s*f;
          }
        }
        if (r==0 && i>=l) continue;
        d[l]-=p;
        sd[l]=g;
        sd[m]=0.0;
      }
    }
    while (m!=l);
  }
  return 0;
}//QL

//---------------------------------------------------------------------------
/*
  function QR: nr version of QR method for solving upper Hessenberg system A. This is compatible with
  Hessenb method.

  In: matrix A[N][N]
  Out: vector ev[N] of eigenvalues

  Returns 0 on success. Content of matrix A is destroyed on return.
*/
int QR(int N, double **A, cdouble* ev)
{
  int n=N, m, l, k, j, iter, i, mmin, maxiter=30;
  double **a=A, z, y, x, w, v, u, t=0, s, r, q, p, a1=0;
  for (i=0; i<n; i++) for (j=i-1>0?i-1:0; j<n; j++) a1+=fabs(a[i][j]);
  n--;
  while (n>=0)
  {
    iter=0;
    do
    {
      for (l=n; l>0; l--)
      {
        s=fabs(a[l-1][l-1])+fabs(a[l][l]);
        if (s==0) s=a1;
        if (fabs(a[l][l-1])+s==s) {a[l][l-1]=0; break;}
      }
      x=a[n][n];
      if (l==n) {ev[n].x=x+t; ev[n--].y=0;}
      else
      {
        y=a[n-1][n-1], w=a[n][n-1]*a[n-1][n];
        if (l==(n-1))
        {
          p=0.5*(y-x);
          q=p*p+w;
          z=sqrt(fabs(q));
          x+=t;
          if (q>=0)
          {
            z=p+(p>=0?z:-z);
            ev[n-1].x=ev[n].x=x+z;
            if (z) ev[n].x=x-w/z;
            ev[n-1].y=ev[n].y=0;
          }
          else
          {
            ev[n-1].x=ev[n].x=x+p;
            ev[n].y=z; ev[n-1].y=-z;
          }
          n-=2;
        }
        else
        {
          if (iter>=maxiter) return 1;
          if (iter%10==9)
          {
            t+=x;
            for (i=0; i<=n; i++) a[i][i]-=x;
            s=fabs(a[n][n-1])+fabs(a[n-1][n-2]);
            y=x=0.75*s;
            w=-0.4375*s*s;
          }
          iter++;
          for (m=n-2; m>=l; m--)
          {
            z=a[m][m];
            r=x-z; s=y-z;
            p=(r*s-w)/a[m+1][m]+a[m][m+1]; q=a[m+1][m+1]-z-r-s; r=a[m+2][m+1];
            s=fabs(p)+fabs(q)+fabs(r);
            p/=s; q/=s; r/=s;
            if (m==l) break;
            u=fabs(a[m][m-1])*(fabs(q)+fabs(r));
            v=fabs(p)*(fabs(a[m-1][m-1])+fabs(z)+fabs(a[m+1][m+1]));
            if (u+v==v) break;
          }
          for (i=m+2; i<=n; i++)
          {
            a[i][i-2]=0;
            if (i!=m+2) a[i][i-3]=0;
          }
          for (k=m; k<=n-1; k++)
          {
            if (k!=m)
            {
              p=a[k][k-1];
              q=a[k+1][k-1];
              r=0;
              if (k!=n-1) r=a[k+2][k-1];
              x=fabs(p)+fabs(q)+fabs(r);
              if (x!=0) p/=x, q/=x, r/=x;
            }
            if (p>=0) s=sqrt(p*p+q*q+r*r);
            else s=-sqrt(p*p+q*q+r*r);
            if (s!=0)
            {
              if (k==m)
              {
                if (l!=m) a[k][k-1]=-a[k][k-1];
              }
              else a[k][k-1]=-s*x;
              p+=s;
              x=p/s; y=q/s; z=r/s; q/=p; r/=p;
              for (j=k; j<=n; j++)
              {
                p=a[k][j]+q*a[k+1][j];
                if (k!=n-1)
                {
                  p+=r*a[k+2][j];
                  a[k+2][j]-=p*z;
                }
                a[k+1][j]-=p*y; a[k][j]-=p*x;
              }
              mmin=n<k+3?n:k+3;
              for (i=l; i<=mmin; i++)
              {
                p=x*a[i][k]+y*a[i][k+1];
                if (k!=(n-1))
                {
                  p+=z*a[i][k+2];
                  a[i][k+2]-=p*r;
                }
                a[i][k+1]-=p*q; a[i][k]-=p;
              }
            }
          }
        }
      }
    } while (n>l+1);
  }
  return 0;
}//QR

/*
  function QR_GS: QR decomposition A=QR using Gram-Schmidt method

  In: matrix A[M][N], M>=N
  Out: Q[M][N], R[N][N]

  No return value.
*/
void QR_GS(int M, int N, double** A, double** Q, double** R)
{
  double *u=new double[M];
  for (int n=0; n<N; n++)
  {
    memset(R[n], 0, sizeof(double)*N);
    for (int m=0; m<M; m++) u[m]=A[m][n];
    for (int k=0; k<n; k++)
    {
      double ip=0; for (int m=0; m<M; m++) ip+=u[m]*Q[m][k];
      for (int m=0; m<M; m++) u[m]-=ip*Q[m][k];
      R[k][n]=ip;
    }
    double iu=0; for (int m=0; m<M; m++) iu+=u[m]*u[m]; iu=sqrt(iu);
    R[n][n]=iu;
    iu=1.0/iu; for (int m=0; m<M; m++) Q[m][n]=u[m]*iu;
  }
  delete[] u;
}//QR_GS

/*
  function QR_householder: QR decomposition using householder transform

  In: A[M][N], M>=N
  Out: Q[M][M], R[M][N]

  No return value.
*/
void QR_householder(int M, int N, double** A, double** Q, double** R)
{
  double *u=new double[M*3], *ur=&u[M], *qu=&u[M*2];
  for (int m=0; m<M; m++)
  {
    memcpy(R[m], A[m], sizeof(double)*N);
    memset(Q[m], 0, sizeof(double)*M); Q[m][m]=1;
  }
  for (int n=0; n<N; n++)
  {
    double alf=0; for (int m=n; m<M; m++) alf+=R[m][n]*R[m][n]; alf=sqrt(alf);
    if (R[n][n]>0) alf=-alf;
    for (int m=n; m<M; m++) u[m]=R[m][n]; u[n]=u[n]-alf;
    double iu2=0; for (int m=n; m<M; m++) iu2+=u[m]*u[m]; iu2=2.0/iu2;
    for (int m=n; m<N; m++)
    {
      ur[m]=0; for (int k=n; k<M; k++) ur[m]+=u[k]*R[k][m];
    }
    for (int m=0; m<M; m++)
    {
      qu[m]=0; for (int k=n; k<M; k++) qu[m]+=Q[m][k]*u[k];
    }
    for (int m=n; m<M; m++) u[m]=u[m]*iu2;
    for (int m=n; m<M; m++) for (int k=n; k<N; k++) R[m][k]-=u[m]*ur[k];
    for (int m=0; m<M; m++) for (int k=n; k<M; k++) Q[m][k]-=qu[m]*u[k];
  }
  delete[] u;
}//QR_householder

//---------------------------------------------------------------------------
/*
  function QU: Unitary decomposition A=QU, where Q is unitary and U is upper triangular

  In: matrix A[N][N]
  Out: matrices Q[N][N], A[n][n] now containing U

  No return value.
*/
void QU(int N, double** Q, double** A)
{
  int sizeN=sizeof(double)*N;
  for (int i=0; i<N; i++) {memset(Q[i], 0, sizeN); Q[i][i]=1;}

  double m, s, c, *tmpi=new double[N], *tmpj=new double[N];
  for (int i=1; i<N; i++) for (int j=0; j<i; j++)
    if (A[i][j]!=0)
    {
      m=sqrt(A[j][j]*A[j][j]+A[i][j]*A[i][j]);
      s=A[i][j]/m;
      c=A[j][j]/m;
      for (int k=0; k<N; k++) tmpi[k]=-s*A[j][k]+c*A[i][k], tmpj[k]=c*A[j][k]+s*A[i][k];
      memcpy(A[i], tmpi, sizeN), memcpy(A[j], tmpj, sizeN);
      for (int k=0; k<N; k++) tmpi[k]=-s*Q[j][k]+c*Q[i][k], tmpj[k]=c*Q[j][k]+s*Q[i][k];
      memcpy(Q[i], tmpi, sizeN), memcpy(Q[j], tmpj, sizeN);
    }
  delete[] tmpi; delete[] tmpj;
  transpose(N, Q);
}//QU

//---------------------------------------------------------------------------
/*
  function Real: extracts the real part of matrix X

  In: matrix x[M][N];
  Out: matrix z[M][N]

  Returns pointer to z. z is created anew if z=0 is specified on start.
*/
double** Real(int M, int N, double** z, cdouble** x, MList* List)
{
  if (!z){Allocate2(double, M, N, z); if (List) List->Add(z, 2);}
  for (int m=0; m<M; m++) for (int n=0; n<N; n++) z[m][n]=x[m][n].x;
  return z;
}//Real
double** Real(int M, int N, cdouble** x, MList* List){return Real(M, N, 0, x, List);}

//---------------------------------------------------------------------------
/*
  function Roots: finds the roots of a polynomial. x^N+p[N-1]x^(N-1)+p[N-2]x^(N-2)...+p[0]

  In: vector p[N] of polynomial coefficients.
  Out: vector r[N] of roots.

  Returns 0 if successful.
*/
int Roots(int N, double* p, cdouble* r)
{
  double** A=new double*[N]; A[0]=new double[N*N]; for (int i=1; i<N; i++) A[i]=&A[0][i*N];
  for (int i=0; i<N; i++) A[0][i]=-p[N-1-i];
  if (N>1) memset(A[1], 0, sizeof(double)*N*(N-1));
  for (int i=1; i<N; i++) A[i][i-1]=1;
  BalanceSim(N, A);
  double result=QR(N, A, r);
  delete[] A[0]; delete[] A;
  return result;
}//Roots
//real implementation
int Roots(int N, double* p, double* rr, double* ri)
{
  cdouble* r=new cdouble[N];
  int result=Roots(N, p, r);
  for (int n=0; n<N; n++) rr[n]=r[n].x, ri[n]=r[n].y;
  delete[] r;
  return result;
}//Roots

//---------------------------------------------------------------------------
/*
  function SorI: Sor iteration algorithm for solving linear system Ax=b.

  Sor method is an extension of the Gaussian-Siedel method, with the latter equivalent to the former
  with w set to 1. The Sor iteration is given by x(k)=(D-wL)^(-1)(((1-w)D+wU)x(k-1)+wb), where 0<w<2, D
  is diagonal, L is lower triangular, U is upper triangular and A=L+D+U. Sor method converges if A is
  positive definite.

  In: matrix A[N][N], vector b[N], initial vector x0[N]
  Out: vector x0[N]

  Returns 0 if successful. Contents of matrix A and vector b are unchanged on return.
*/
int SorI(int N, double* x0, double** a, double* b, double w, double ep, int maxiter)
{
  double e, v=1-w, *x=new double[N];
  int k=0, sizeN=sizeof(double)*N;
  while (k<maxiter)
  {
    for (int i=0; i<N; i++)
    {
      x[i]=b[i];
      for (int j=0; j<i; j++) x[i]-=a[i][j]*x[j];
      for (int j=i+1; j<N; j++) x[i]-=a[i][j]*x0[j];
      x[i]=v*x0[i]+w*x[i]/a[i][i];
    }
    e=0; for (int j=0; j<N; j++) e+=fabs(x[j]-x0[j]);
    memcpy(x0, x, sizeN);
    if (e<ep) break;
    k++;
  }
  delete[] x;
  if (k>=maxiter) return 1;
  return 0;
}//SorI

//---------------------------------------------------------------------------
//Submatrix routines

/*
  function SetSubMatrix: copy matrix x[Y][X] into matrix z at (Y1, X1).

  In: matrix x[Y][X], matrix z with dimensions no less than [Y+Y1][X+X1]
  Out: matrix z, updated.

  No return value.
*/
void SetSubMatrix(double** z, double** x, int Y1, int Y, int X1, int X)
{
  for (int y=0; y<Y; y++) memcpy(&z[Y1+y][X1], x[y], sizeof(double)*X);
}//SetSubMatrix
//complex version
void SetSubMatrix(cdouble** z, cdouble** x, int Y1, int Y, int X1, int X)
{
  for (int y=0; y<Y; y++) memcpy(&z[Y1+y][X1], x[y], sizeof(cdouble)*X);
}//SetSubMatrix

/*
  function SubMatrix: extract a submatrix of x at (Y1, X1) to z[Y][X].

  In: matrix x of dimensions no less than [Y+Y1][X+X1]
  Out: matrix z[Y][X].

  Returns pointer to z. z is created anew if z=0 is specifid on start.
*/
cdouble** SubMatrix(cdouble** z, cdouble** x, int Y1, int Y, int X1, int X, MList* List)
{
  if (!z) {Allocate2(cdouble, Y, X, z); if (List) List->Add(z, 2);}
  for (int y=0; y<Y; y++) memcpy(z[y], &x[Y1+y][X1], sizeof(cdouble)*X);
  return z;
}//SetSubMatrix
//wrapper function
cdouble** SubMatrix(cdouble** x, int Y1, int Y, int X1, int X, MList* List)
{
  return SubMatrix(0, x, Y1, Y, X1, X, List);
}//SetSubMatrix

/*
  function SubVector: extract a subvector of x at X1 to z[X].

  In: vector x no shorter than X+X1.
  Out: vector z[X].

  Returns pointer to z. z is created anew if z=0 is specifid on start.
*/
cdouble* SubVector(cdouble* z, cdouble* x, int X1, int X, MList* List)
{
  if (!z){z=new cdouble[X]; if (List) List->Add(z, 1);}
  memcpy(z, &x[X1], sizeof(cdouble)*X);
  return z;
}//SubVector
//wrapper function
cdouble* SubVector(cdouble* x, int X1, int X, MList* List)
{
  return SubVector(0, x, X1, X, List);
}//SubVector

//---------------------------------------------------------------------------
/*
  function transpose: matrix transpose: A'->A

  In: matrix a[N][N]
  Out: matrix a[N][N] after transpose

  No return value.
*/
void transpose(int N, double** a)
{
  double tmp;
  for (int i=1; i<N; i++) for (int j=0; j<i; j++) {tmp=a[i][j]; a[i][j]=a[j][i]; a[j][i]=tmp;}
}//transpose
//complex version
void transpose(int N, cdouble** a)
{
  cdouble tmp;
  for (int i=1; i<N; i++) for (int j=0; j<i; j++) {tmp=a[i][j]; a[i][j]=a[j][i]; a[j][i]=tmp;}
}//transpose

/*
  function transpose: matrix transpose: A'->Z

  In: matrix a[M][N]
  Out: matrix z[N][M]

  Returns pointer to z. z is created anew if z=0 is specifid on start.
*/
double** transpose(int N, int M, double** ta, double** a, MList* List)
{
  if (!ta) {Allocate2(double, N, M, ta);  if (List) List->Add(ta, 2);}
  for (int n=0; n<N; n++) for (int m=0; m<M; m++) ta[n][m]=a[m][n];
  return ta;
}//transpose
//wrapper function
double** transpose(int N, int M, double** a, MList* List)
{
  return transpose(N, M, 0, a, List);
}//transpose

//---------------------------------------------------------------------------
/*
  function Unitary: given x & y s.t. |x|=|y|, find unitary matrix P s.t. Px=y. P is given in closed form
  as I-(x-y)(x-y)'/(x-y)'x

  In: vectors x[N] and y[N]
  Out: matrix P[N][N]

  Returns pointer to P. P is created anew if P=0 is specified on start.
*/
double** Unitary(int N, double** P, double* x, double* y, MList* List)
{
  if (!P) {Allocate2(double, N, N, P); if (List) List->Add(P, 2);}
  int sizeN=sizeof(double)*N;
  for (int i=0; i<N; i++) {memset(P[i], 0, sizeN); P[i][i]=1;}

  double* w=MultiAdd(N, x, y, -1.0); //w=x-y
  double m=Inner(N, x, w); //m=(x-y)'x
  if (m!=0)
  {
    m=1.0/m; //m=1/(x-y)'x
    double* mw=Multiply(N, w, m);
    for (int i=0; i<N; i++) for (int j=0; j<N; j++) P[i][j]=P[i][j]-mw[i]*w[j];
    delete[] mw;
  }
  delete[] w;
  return P;
}//Unitary
//complex version
cdouble** Unitary(int N, cdouble** P, cdouble* x, cdouble* y, MList* List)
{
  if (!P) {Allocate2(cdouble, N, N, P);}
  int sizeN=sizeof(cdouble)*N;
  for (int i=0; i<N; i++) {memset(P[i], 0, sizeN); P[i][i]=1;}

  cdouble *w=MultiAdd(N, x, y, -1);
  cdouble m=Inner(N, x, w);
  if (m!=0)
  {
    m=m.cinv();
    cdouble *mw=Multiply(N, w, m);
    for (int i=0; i<N; i++) for (int j=0; j<N; j++) P[i][j]=P[i][j]-(mw[i]^w[j]),
    delete[] mw;
  }
  delete[] w;
  if (List) List->Add(P, 2);
  return P;
}//Unitary
//wrapper functions
double** Unitary(int N, double* x, double* y, MList* List){return Unitary(N, 0, x, y, List);}
cdouble** Unitary(int N, cdouble* x, cdouble* y, MList* List){return Unitary(N, 0, x, y, List);}