view main.cpp @ 10:c6528c38b23c

a few minor fixes
author Wen X <xue.wen@elec.qmul.ac.uk>
date Thu, 28 Jul 2011 10:36:57 +0100
parents 91301b3d02c5
children 977f541d6683
line wrap: on
line source
/*
  Sample program: analysis of harmonic sinusoid in low-noise monophonic context.

  Syntax:
  (executable path) filename <setting=value> <setting=value> ... <setting=value>

  filename: path input wave form audio file, 16-bit PCM
  setting: optional algorithmic parameters; search "stricmp" in the source code for a complete list
  value: non-default values given to the algorithmic parameters

  Output:
  a *.evt file containing RIFF chunks, each starting with id "EVT\0" and hosting a harmonic sinusoid.

  The "EVT\0" chunk body contains a header ("HDR\0") chunk followed by atom ("ATM\0") chunks.

  The "HDR\0" chunk body contains 4 __int32 values: channel index, number of partials, number of
  measurement points (frames), and a tag.

  The "ATM\0" chunk body contains an atom structure (see hs.h).

*/

#include <QtCore/QCoreApplication>
#include "arrayalloc.h"
#include "hs.h"
#include "matrix.h"
#include "quickspec.h"
#include "procedures.h"
#include "vibrato.h"
#include <string.h>
#include <stdio.h>
#include <conio.h>

//structure hosting wave header data
struct wavehdr
{
  __int16 fmttag;
  __int16 channels;
  __int32 samplespersec;
  __int32 bytespersec;
  __int16 blockalign;
  __int16 bitspersample;
};

//tells if a wave header contains consistent data
bool isvalidwavehdr(wavehdr* hdr)
{
  if (hdr->fmttag!=1) return false;
  if (hdr->bitspersample!=8 && hdr->bitspersample!=16) return false;
  if (hdr->channels*hdr->bitspersample!=hdr->blockalign*8) return false;
  if (hdr->samplespersec*hdr->blockalign!=hdr->bytespersec) return false;
  return true;
}

//structure hosting a waveform audio
struct waveaudio
{
  union
  {
    wavehdr hdr;
    struct
    {
      __int16 fmttag;
      __int16 channels;
      __int32 samplespersec;
      __int32 bytespersec;
      __int16 blockalign;
      __int16 bitspersample;
    };
  };
  int length;
  union
  {
    unsigned char* data;
    unsigned char* data8;
    __int16* data16;
  };
  waveaudio(){memset(this, 0, sizeof(waveaudio));}
  ~waveaudio(){delete[] data;}
};

void freewaveaudio(waveaudio* wa)
{
  delete[] wa->data;
  memset(wa, 0, sizeof(waveaudio));
}

//error messages for reading waveform audio file
enum err_wavefile
{
  err_success,
  err_RIFF_hdr,
  err_wave_hdr,
  err_data_hdr,
  err_data_chunk,
  err_io
};

//reads wave header from file stream
int loadwavehdr(wavehdr* hdr, FILE* fp)
{
  int rc, hdrlen;
  char s[5];

  rc=fread(s, 1, 4, fp); s[4]=0;
  if (rc<4) return err_wave_hdr;
  if (strcmp(s, "WAVE")) return err_wave_hdr;

  rc=fread(s, 1, 4, fp); s[4]=0;
  if (rc<4) return err_wave_hdr;
  if (strcmp(s, "fmt ")) return err_wave_hdr;

  rc=fread(&hdrlen, 1, 4, fp);
  if (rc<4) return err_wave_hdr;
  if (hdrlen<16) return err_wave_hdr;

  rc=fread(hdr, 1, sizeof(wavehdr), fp);
  if (rc<sizeof(wavehdr)) return err_wave_hdr;
  if (!isvalidwavehdr(hdr)) return err_wave_hdr;

  if (hdrlen>16)
  {
    rc=fseek(fp, hdrlen-16, SEEK_CUR);
    if (rc!=0) return err_wave_hdr;
  }

  return err_success;
}

//reads waveform audio, including header, from a file stream
int loadwaveaudio(waveaudio* wa, FILE* fp)
{
  int rc, mainlen;
  char s[5];
  wavehdr hdr;

  rc=fread(s, 1, 4, fp); s[4]=0;
  if (rc<4) return err_RIFF_hdr;
  if (strcmp(s, "RIFF")) return err_RIFF_hdr;

  rc=fread(&mainlen, 1, 4, fp);
  if (rc<4) return err_RIFF_hdr;

  rc=loadwavehdr(&hdr, fp);
  if (rc!=err_success) return rc;

  rc=fread(s, 1, 4, fp); s[4]=0;
  if (rc<4) return err_data_hdr;
  if (strcmp(s, "data")) return err_data_hdr;

  rc=fread(&mainlen, 1, 4, fp);
  if (rc<4) return err_data_hdr;

  if (mainlen<=0) return err_data_hdr;

  unsigned char* data=new unsigned char[mainlen];
  rc=fread(data, 1, mainlen, fp);
  if (rc<mainlen)
  {
    delete[] data;
    return err_data_chunk;
  }

  wa->hdr=hdr;
  wa->length=rc/hdr.blockalign;
  delete[] wa->data;
  wa->data=data;

  return err_success;
}

//reads waveform audio, including header, from a file
int loadwavefile(waveaudio* wa, char* filename)
{
  FILE* fp;
  if ((fp=fopen(filename, "rb"))==NULL) return err_io;
  int result=loadwaveaudio(wa, fp);
  fclose(fp);
  return result;
}

//returns new file path with the specific extension replacing the original one
char* ChangeFileExt(char* FileName, char* Ext)
{
  char* oldext=strrchr(FileName, '.');
  int namelen=strlen(FileName)-strlen(oldext);
  char* dest=new char[namelen+strlen(Ext)+1];
  memcpy(dest, FileName, namelen); dest[namelen]=0;
  strcat(dest, Ext);
  return dest;
}

//ACPower for __int16 data input
double ACPower(__int16* data, int Count)
{
  if (Count<=0) return 0;
  double power=0, avg=0, tmp;
  for (int i=0; i<Count; i++)
  {
    tmp=*(data++);
    power+=tmp*tmp;
    avg+=tmp;
  }
  power=(power-avg*avg/Count)/Count;
  return power;
}//ACPower

//double QIE(double* y, double& x){double a=0.5*(y[1]+y[-1])-y[0], b=0.5*(y[1]-y[-1]); x=-0.5*b/a; return y[0]-0.25*b*b/a;}

//correlation coefficient
double InnerC(int N, __int16* x, __int16* y)
{
  double inner=0, inn1=0, inn2=0;
  for (int n=0; n<N; n++) inner+=1.0*x[n]*y[n], inn1+=1.0*x[n]*x[n], inn2+=1.0*y[n]*y[n];
  double inn=sqrt(inn1*inn2);
  return (inn<=0)?0:inner/inn;
}

//monophonic pitch estimation by autocorrelation
double pitchautocor(__int16* data, int wid, double minf0bin, double maxf0bin, double& hsr)
{
  int hwid=wid/2, minT=wid/maxf0bin, maxT=wid/minf0bin;
  double ene=InnerC(wid, data, data);
  if (ene<=0){hsr=0; return 0;}
  double* autocor=new double[hwid];
  for (int i=1; i<maxT+2; i++)
  {
    autocor[i]=InnerC(wid-i, data, &data[i]);
  }

  int prd=0, m=minT;
  while(m<=maxT)
  {
    while (m<=maxT && (autocor[m]<0 || autocor[m]<autocor[m-1] || autocor[m]<autocor[m+1])) m++;
    if (m<=maxT)
    {
      int mm=ceil(m*0.6667), mp=floor(m*1.3333), cont=0;
      //for (int im=mm; im<=mp; im++) if (m!=im && autocor[im]>=autocor[m]) {cont=1; break;}
      if (cont==0)
      {
        if (prd==0 || autocor[m]>autocor[prd]*1.05) prd=m;
      }
      m++;
    }
  }

  double pitchbin=0;
  if (prd>=minT && prd<=maxT)
  {
    double mshift; hsr=QIE(&autocor[prd], mshift); pitchbin=wid/(prd+mshift);
  }
  else{hsr=0;}
  delete[] autocor;
  return pitchbin;
}

//main function
int main(int argc, char* argv[])
{
  //read audio file
  if (argc<2){printf("Please specify input wave file."); getch(); return 0;}
  printf("Loading wave file %s... ", argv[1]);
  waveaudio* wa=new waveaudio;
  int waverr=loadwavefile(wa, argv[1]); printf("[%d]\n", waverr);
  if (waverr!=err_success){printf("Aborted: error loading wave file."); getch(); return 0;}
  if (wa->bitspersample!=16){printf("Aborted: this program accepts 16-bit pcm only."); getch(); return 0;}

  int length=wa->length, sps=wa->samplespersec;
  __int16* data16=wa->data16;
  if (wa->channels>1)
  {
    printf("Extracting channel 0... ");
    for (int i=1; i<length; i++) data16[i]=data16[i*wa->channels];
    printf("[0]\n");
  }
  wa->data=0;
  delete wa;

  //default settings
  int maxhscount=100;
  float wid_s=0.02, offstwidratio=0.5;
  float dur_s=0.5;
  float intv_s=0.05; //duration, in seconds, to scan for peaks
  float minf0=55, maxf0=3520;
  WindowType wintype=wtHann;

  NMSettings settings; memset(&settings, 0, sizeof(NMSettings));
  settings.hB=3;          //spectral truncation half width
  settings.maxp=50;         //maximal number of partials
  settings.maxB=0.001;      //stiffness coefficient upper bound
  settings.epf=1e-4;        //frequency estimation error tolerance for LSE estimation
  settings.epf0=1;          //input frequency error bound for harmonic grouping
  settings.delm=1.1;        //frequency error bound for harmonic grouping
  settings.delp=1.1;        //pitch jump upper bound

  double mina=0.5;

  //user settings through commandline arguments
  for (int i=2; i<argc; i++)
  {
    char* s1=strchr(argv[i], '=');
    if (s1)
    {
      s1[0]=0; s1++;
      if (!stricmp(argv[i], "hB")){settings.hB=atof(s1); continue;}
      if (!stricmp(argv[i], "maxp")){settings.maxp=atoi(s1); continue;}
      if (!stricmp(argv[i], "maxB")){settings.maxB=atof(s1); continue;}
      if (!stricmp(argv[i], "epf")){settings.epf=atof(s1); continue;}
      if (!stricmp(argv[i], "epf0")){settings.epf0=atof(s1); continue;}
      if (!stricmp(argv[i], "delm")){settings.delm=atof(s1); continue;}
      if (!stricmp(argv[i], "delp")){settings.delp=atof(s1); continue;}
      if (!stricmp(argv[i], "minf0")){minf0=atof(s1); continue;}
      if (!stricmp(argv[i], "maxf0")){maxf0=atof(s1); continue;}
      if (!stricmp(argv[i], "wid_s")){wid_s=atof(s1); continue;}
      if (!stricmp(argv[i], "offstwidratio")){offstwidratio=atof(s1); continue;}
      if (!stricmp(argv[i], "dur_s")){dur_s=atof(s1); continue;}
      if (!stricmp(argv[i], "intv_s")){intv_s=atof(s1); continue;}
      if (!stricmp(argv[i], "maxhscount")){maxhscount=atoi(s1); continue;}
      if (!stricmp(argv[i], "wintype"))
      {
        if (!stricmp(s1, "hann")){wintype=wtHann; continue;}
        if (!stricmp(s1, "hamming")){wintype=wtHamming; continue;}
        if (!stricmp(s1, "blackman")){wintype=wtBlackman; continue;}
        if (!stricmp(s1, "rectangle")){wintype=wtRectangle; continue;}
        if (!stricmp(s1, "hannsqr")){wintype=wtHannSqr; continue;}
      }
    }
  }

  //
  MList* List=new MList; List->Add(data16, 1);

  //analysis window size and hop size
  int wid=1<<((int)floor(log2(wid_s*sps)+1)), offst=wid*offstwidratio;

  //set up the spectrogram
  TQuickSpectrogram* Sp=new TQuickSpectrogram(0, 0, true, false, 0);
  Sp->Data=data16; Sp->DataLength=length; Sp->BytesPerSample=2;
  Sp->Wid=wid; Sp->Offst=offst; Sp->WinType=wintype;
  Sp->Spec(-1); //this sets Sp->Capacity to the number of accessible frames and does nothing else

  //get number of frames
  int Fr=Sp->Capacity;

  //the program searches every intv_fr frames over a duration of dur_fr frames for a spectral peak to start tracking.
  int dur_fr=sps*dur_s/offst, intv_fr=sps*intv_s/offst;
  if (dur_fr<10) dur_fr=10;
  if (intv_fr<1) intv_fr=1;

  //minimal and maximal fundamental frequency, in bins
  settings.minf0=minf0/sps*wid;
  settings.maxf0=maxf0/sps*wid;
  if (settings.minf0<settings.hB) settings.minf0=settings.hB;

  windowspec(wintype, wid, &settings.M, settings.c, &settings.iH2);
  double *fps=new double[wid], *vps=&fps[wid/2]; List->Add(fps, 1);
  cdouble *x=new cdouble[wid/2+1]; List->Add(x, 1);

  //output file
  char* filename=ChangeFileExt(argv[1], ".evt"); List->Add(filename, 1);
  TFileStream* File=new TFileStream(filename, fmWrite);

  double* frames=new double[Fr]; memset(frames, 0, sizeof(double)*Fr); List->Add(frames, 1);
  double* framesa=new double[Fr]; memset(framesa, 0, sizeof(double)*Fr); List->Add(framesa, 1);
  double* rsr=new double[Fr]; List->Add(rsr, 1); memset(rsr, 0, sizeof(double)*Fr);
  double* f0s=new double[Fr]; List->Add(f0s, 1); memset(f0s, 0, sizeof(double)*Fr);
  for (int fr=0; fr<Fr-1; fr++)
  {
    __int16* ldata16=&data16[offst*fr];
    frames[fr]=ACPower(ldata16, wid);
    framesa[fr]=1;
 //   f0s[fr]=pitchautocor(ldata16, wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr];
  }
  frames[Fr-1]=ACPower(&data16[(Fr-1)*offst], length-(Fr-1)*offst);
  framesa[Fr-1]=1;

  int start=0, writecount=0;

  while (writecount<maxhscount)
  {
    int fr_m=start+intv_fr;

    //search for the highest spectral peak to start tracking
    double fp_m=0, vp_m=0;
    for (int fr=start+intv_fr; fr<start+dur_fr && fr<Fr; fr+=intv_fr)
    {
      cmplx<QSPEC_FORMAT>* speci=Sp->Spec(fr);
      for (int k=0; k<=wid/2; k++) x[k]=speci[k];
      int pc=QuickPeaks(fps, vps, wid, x, settings.M, settings.c, settings.iH2, mina, 0, wid/4);

      for(int p=0; p<pc; p++)
        if (vps[p]>vp_m)
        {
          if (f0s[fr]==0 && rsr[fr]==0) f0s[fr]=pitchautocor(&data16[offst*fr], wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr];
          if (f0s[fr]>0)
          {
            double dpind=fps[p]/f0s[fr], pind=floor(dpind+0.5);
            if (fabs(dpind-pind)<0.1) vp_m=vps[p], fr_m=fr, fp_m=fps[p];
          }
        }
    }

    if (fp_m<=0){start=start+dur_fr; if (start>Fr) break; continue;}

    f0s[fr_m]=pitchautocor(&data16[offst*fr_m], wid, settings.minf0, settings.maxf0, rsr[fr_m]); rsr[fr_m]=1-rsr[fr_m];

    int _t=fr_m*offst+wid/2;
    double _f=fp_m/wid;
    int hsM, hsFr, frst=start, fren=Fr;
    atom** hsPartials=0;

    settings.pin0=floor(fp_m/f0s[fr_m]+0.5);
    int tag=FindNote(_t, _f, hsM, hsFr, hsPartials, frst, fren, wid, offst, Sp, settings);

    int fr1=(hsPartials[0][0].t-wid/2)/offst;
    double consi=1;
    for (int hsfr=0; hsfr<hsFr; hsfr++)
    {
      double ene=0;
      for (int m=0; m<hsM; m++)
      {
        double a=(hsPartials[m][hsfr].f>0)?hsPartials[m][hsfr].a:0;

        if (hsfr==0) ene+=a*a;
        else
        {
          double b=(hsPartials[m][hsfr-1].f>0)?hsPartials[m][hsfr-1].a:0;
          ene+=(a*a+b*b+a*b)/3;
        }

        if (hsfr==hsFr-1) ene+=a*a;
        else
        {
          double b=(hsPartials[m][hsfr+1].f>0)?hsPartials[m][hsfr+1].a:0;
          ene+=(a*a+b*b+a*b)/3;
        }
      }
      framesa[fr1+hsfr]=ene;//*2;
      if (framesa[fr1+hsfr]<frames[fr1+hsfr]) consi*=frames[fr1+hsfr]/framesa[fr1+hsfr];
      else consi*=framesa[fr1+hsfr]/frames[fr1+hsfr];
      if (framesa[fr1+hsfr]>frames[fr1+hsfr]) framesa[fr1+hsfr]=0;
      else framesa[fr1+hsfr]=1-framesa[fr1+hsfr]/frames[fr1+hsfr];
    }
    consi=pow(consi, 1.0/hsFr);

    THS* HS=new THS;
    HS->M=hsM; HS->Fr=hsFr; HS->Partials=hsPartials;
    printf("%d (%.2fs), %d (%.2fs), inconsistency %f\n", start, start*offst*1.0/sps, hsFr, hsFr*offst*1.0/sps, consi-1);
    if (consi<1.5)
    {
      HS->WriteToStream(File);
      printf("Write note %d\n", writecount++);
    }
    delete HS;

    start=fr1+hsFr;

  }
  delete File; delete Sp;
  delete List;
  printf("Completed\n"); getch(); return 0;
}