Mercurial > hg > x
changeset 9:91301b3d02c5
Example main program.
author | Wen X <xue.wen@elec.qmul.ac.uk> |
---|---|
date | Fri, 15 Oct 2010 14:54:51 +0100 |
parents | f0d2c9b5d3a3 |
children | c6528c38b23c |
files | main.cpp |
diffstat | 1 files changed, 462 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main.cpp Fri Oct 15 14:54:51 2010 +0100 @@ -0,0 +1,462 @@ +/* + Sample program: analysis of harmonic sinusoid in low-noise monophonic context. + + Syntax: + (executable path) filename <setting=value> <setting=value> ... <setting=value> + + filename: path input wave form audio file, 16-bit PCM + setting: optional algorithmic parameters; search "stricmp" in the source code for a complete list + value: non-default values given to the algorithmic parameters + + Output: + a *.evt file containing RIFF chunks, each starting with id "EVT\0" and hosting a harmonic sinusoid. + + The "EVT\0" chunk body contains a header ("HDR\0") chunk followed by atom ("ATM\0") chunks. + + The "HDR\0" chunk body contains 4 __int32 values: channel index, number of partials, number of + measurement points (frames), and a tag. + + The "ATM\0" chunk body contains an atom structure (see hs.h). + +*/ + +#include <QtCore/QCoreApplication> +#include "arrayalloc.h" +#include "hs.h" +#include "matrix.h" +#include "quickspec.h" +#include "procedures.h" +#include "vibrato.h" +#include <string.h> +#include <stdio.h> +#include <conio.h> + +//structure hosting wave header data +struct wavehdr +{ + __int16 fmttag; + __int16 channels; + __int32 samplespersec; + __int32 bytespersec; + __int16 blockalign; + __int16 bitspersample; +}; + +//tells if a wave header contains consistent data +bool isvalidwavehdr(wavehdr* hdr) +{ + if (hdr->fmttag!=1) return false; + if (hdr->bitspersample!=8 && hdr->bitspersample!=16) return false; + if (hdr->channels*hdr->bitspersample!=hdr->blockalign*8) return false; + if (hdr->samplespersec*hdr->blockalign!=hdr->bytespersec) return false; + return true; +} + +//structure hosting a waveform audio +struct waveaudio +{ + union + { + wavehdr hdr; + struct + { + __int16 fmttag; + __int16 channels; + __int32 samplespersec; + __int32 bytespersec; + __int16 blockalign; + __int16 bitspersample; + }; + }; + int length; + union + { + unsigned char* data; + unsigned char* data8; + __int16* data16; + }; + waveaudio(){memset(this, 0, sizeof(waveaudio));} + ~waveaudio(){delete[] data;} +}; + +void freewaveaudio(waveaudio* wa) +{ + delete[] wa->data; + memset(wa, 0, sizeof(waveaudio)); +} + +//error messages for reading waveform audio file +enum err_wavefile +{ + err_success, + err_RIFF_hdr, + err_wave_hdr, + err_data_hdr, + err_data_chunk, + err_io +}; + +//reads wave header from file stream +int loadwavehdr(wavehdr* hdr, FILE* fp) +{ + int rc, hdrlen; + char s[5]; + + rc=fread(s, 1, 4, fp); s[4]=0; + if (rc<4) return err_wave_hdr; + if (strcmp(s, "WAVE")) return err_wave_hdr; + + rc=fread(s, 1, 4, fp); s[4]=0; + if (rc<4) return err_wave_hdr; + if (strcmp(s, "fmt ")) return err_wave_hdr; + + rc=fread(&hdrlen, 1, 4, fp); + if (rc<4) return err_wave_hdr; + if (hdrlen<16) return err_wave_hdr; + + rc=fread(hdr, 1, sizeof(wavehdr), fp); + if (rc<sizeof(wavehdr)) return err_wave_hdr; + if (!isvalidwavehdr(hdr)) return err_wave_hdr; + + if (hdrlen>16) + { + rc=fseek(fp, hdrlen-16, SEEK_CUR); + if (rc!=0) return err_wave_hdr; + } + + return err_success; +} + +//reads waveform audio, including header, from a file stream +int loadwaveaudio(waveaudio* wa, FILE* fp) +{ + int rc, mainlen; + char s[5]; + wavehdr hdr; + + rc=fread(s, 1, 4, fp); s[4]=0; + if (rc<4) return err_RIFF_hdr; + if (strcmp(s, "RIFF")) return err_RIFF_hdr; + + rc=fread(&mainlen, 1, 4, fp); + if (rc<4) return err_RIFF_hdr; + + rc=loadwavehdr(&hdr, fp); + if (rc!=err_success) return rc; + + rc=fread(s, 1, 4, fp); s[4]=0; + if (rc<4) return err_data_hdr; + if (strcmp(s, "data")) return err_data_hdr; + + rc=fread(&mainlen, 1, 4, fp); + if (rc<4) return err_data_hdr; + + if (mainlen<=0) return err_data_hdr; + + unsigned char* data=new unsigned char[mainlen]; + rc=fread(data, 1, mainlen, fp); + if (rc<mainlen) + { + delete[] data; + return err_data_chunk; + } + + wa->hdr=hdr; + wa->length=rc/hdr.blockalign; + delete[] wa->data; + wa->data=data; + + return err_success; +} + +//reads waveform audio, including header, from a file +int loadwavefile(waveaudio* wa, char* filename) +{ + FILE* fp; + if ((fp=fopen(filename, "rb"))==NULL) return err_io; + int result=loadwaveaudio(wa, fp); + fclose(fp); + return result; +} + +//returns new file path with the specific extension replacing the original one +char* ChangeFileExt(char* FileName, char* Ext) +{ + char* oldext=strrchr(FileName, '.'); + int namelen=strlen(FileName)-strlen(oldext); + char* dest=new char[namelen+strlen(Ext)+1]; + memcpy(dest, FileName, namelen); dest[namelen]=0; + strcat(dest, Ext); + return dest; +} + +//ACPower for __int16 data input +double ACPower(__int16* data, int Count) +{ + if (Count<=0) return 0; + double power=0, avg=0, tmp; + for (int i=0; i<Count; i++) + { + tmp=*(data++); + power+=tmp*tmp; + avg+=tmp; + } + power=(power-avg*avg/Count)/Count; + return power; +}//ACPower + +//double QIE(double* y, double& x){double a=0.5*(y[1]+y[-1])-y[0], b=0.5*(y[1]-y[-1]); x=-0.5*b/a; return y[0]-0.25*b*b/a;} + +//correlation coefficient +double InnerC(int N, __int16* x, __int16* y) +{ + double inner=0, inn1=0, inn2=0; + for (int n=0; n<N; n++) inner+=1.0*x[n]*y[n], inn1+=1.0*x[n]*x[n], inn2+=1.0*y[n]*y[n]; + double inn=sqrt(inn1*inn2); + return (inn<=0)?0:inner/inn; +} + +//monophonic pitch estimation by autocorrelation +double pitchautocor(__int16* data, int wid, double minf0bin, double maxf0bin, double& hsr) +{ + int hwid=wid/2, minT=wid/maxf0bin, maxT=wid/minf0bin; + double ene=InnerC(wid, data, data); + if (ene<=0){hsr=0; return 0;} + double* autocor=new double[hwid]; + for (int i=1; i<maxT+2; i++) + { + autocor[i]=InnerC(wid-i, data, &data[i]); + } + + int prd=0, m=minT; + while(m<=maxT) + { + while (m<=maxT && (autocor[m]<0 || autocor[m]<autocor[m-1] || autocor[m]<autocor[m+1])) m++; + if (m<=maxT) + { + int mm=ceil(m*0.6667), mp=floor(m*1.3333), cont=0; + for (int im=mm; im<=mp; im++) if (m!=im && autocor[im]>=autocor[m]) {cont=1; break;} + if (cont==0) + { + if (prd==0 || autocor[m]>autocor[prd]*1.05) prd=m; + } + m++; + } + } + + double pitchbin=0; + if (prd>=minT && prd<=maxT) + { + double mshift; hsr=QIE(&autocor[prd], mshift); pitchbin=wid/(prd+mshift); + } + else{hsr=0;} + delete[] autocor; + return pitchbin; +} + +//main function +int main(int argc, char* argv[]) +{ + //read audio file + if (argc<2){printf("Please specify input wave file."); getch(); return 0;} + printf("Loading wave file %s... ", argv[1]); + waveaudio* wa=new waveaudio; + int waverr=loadwavefile(wa, argv[1]); printf("[%d]\n", waverr); + if (waverr!=err_success){printf("Aborted: error loading wave file."); getch(); return 0;} + if (wa->bitspersample!=16){printf("Aborted: this program accepts 16-bit pcm only."); getch(); return 0;} + + int length=wa->length, sps=wa->samplespersec; + __int16* data16=wa->data16; + if (wa->channels>1) + { + printf("Extracting channel 0... "); + for (int i=1; i<length; i++) data16[i]=data16[i*wa->channels]; + printf("[0]\n"); + } + wa->data=0; + delete wa; + + //default settings + int maxhscount=100; + float wid_s=0.02, offstwidratio=0.5; + float dur_s=0.5; + float intv_s=0.05; //duration, in seconds, to scan for peaks + float minf0=55, maxf0=3520; + WindowType wintype=wtHann; + + NMSettings settings; memset(&settings, 0, sizeof(NMSettings)); + settings.hB=3; //spectral truncation half width + settings.maxp=50; //maximal number of partials + settings.maxB=0.001; //stiffness coefficient upper bound + settings.epf=1e-4; //frequency estimation error tolerance for LSE estimation + settings.epf0=1; //input frequency error bound for harmonic grouping + settings.delm=1.1; //frequency error bound for harmonic grouping + settings.delp=1.1; //pitch jump upper bound + + double mina=0.5; + + //user settings through commandline arguments + for (int i=2; i<argc; i++) + { + char* s1=strchr(argv[i], '='); + if (s1) + { + s1[0]=0; s1++; + if (!stricmp(argv[i], "hB")){settings.hB=atof(s1); continue;} + if (!stricmp(argv[i], "maxp")){settings.maxp=atoi(s1); continue;} + if (!stricmp(argv[i], "maxB")){settings.maxB=atof(s1); continue;} + if (!stricmp(argv[i], "epf")){settings.epf=atof(s1); continue;} + if (!stricmp(argv[i], "epf0")){settings.epf0=atof(s1); continue;} + if (!stricmp(argv[i], "delm")){settings.delm=atof(s1); continue;} + if (!stricmp(argv[i], "delp")){settings.delp=atof(s1); continue;} + if (!stricmp(argv[i], "minf0")){minf0=atof(s1); continue;} + if (!stricmp(argv[i], "maxf0")){maxf0=atof(s1); continue;} + if (!stricmp(argv[i], "wid_s")){wid_s=atof(s1); continue;} + if (!stricmp(argv[i], "offstwidratio")){offstwidratio=atof(s1); continue;} + if (!stricmp(argv[i], "dur_s")){dur_s=atof(s1); continue;} + if (!stricmp(argv[i], "intv_s")){intv_s=atof(s1); continue;} + if (!stricmp(argv[i], "maxhscount")){maxhscount=atoi(s1); continue;} + if (!stricmp(argv[i], "wintype")) + { + if (!stricmp(s1, "hann")){wintype=wtHann; continue;} + if (!stricmp(s1, "hamming")){wintype=wtHamming; continue;} + if (!stricmp(s1, "blackman")){wintype=wtBlackman; continue;} + if (!stricmp(s1, "rectangle")){wintype=wtRectangle; continue;} + if (!stricmp(s1, "hannsqr")){wintype=wtHannSqr; continue;} + } + } + } + + // + MList* List=new MList; List->Add(data16, 1); + + //analysis window size and hop size + int wid=1<<((int)floor(log2(wid_s*sps)+1)), offst=wid*offstwidratio; + + //set up the spectrogram + TQuickSpectrogram* Sp=new TQuickSpectrogram(0, 0, true, false, 0); + Sp->Data=data16; Sp->DataLength=length; Sp->BytesPerSample=2; + Sp->Wid=wid; Sp->Offst=offst; Sp->WinType=wintype; + Sp->Spec(-1); //this sets Sp->Capacity to the number of accessible frames and does nothing else + + //get number of frames + int Fr=Sp->Capacity; + + //the program searches every intv_fr frames over a duration of dur_fr frames for a spectral peak to start tracking. + int dur_fr=sps*dur_s/offst, intv_fr=sps*intv_s/offst; + if (dur_fr<10) dur_fr=10; + if (intv_fr<1) intv_fr=1; + + //minimal and maximal fundamental frequency, in bins + settings.minf0=minf0/sps*wid; + settings.maxf0=maxf0/sps*wid; + if (settings.minf0<settings.hB) settings.minf0=settings.hB; + + windowspec(wintype, wid, &settings.M, settings.c, &settings.iH2); + double *fps=new double[wid], *vps=&fps[wid/2]; List->Add(fps, 1); + cdouble *x=new cdouble[wid/2+1]; List->Add(x, 1); + + //output file + char* filename=ChangeFileExt(argv[1], ".evt"); List->Add(filename, 1); + TFileStream* File=new TFileStream(filename, fmWrite); + + double* frames=new double[Fr]; memset(frames, 0, sizeof(double)*Fr); List->Add(frames, 1); + double* framesa=new double[Fr]; memset(framesa, 0, sizeof(double)*Fr); List->Add(framesa, 1); + double* rsr=new double[Fr]; List->Add(rsr, 1); memset(rsr, 0, sizeof(double)*Fr); + double* f0s=new double[Fr]; List->Add(f0s, 1); memset(f0s, 0, sizeof(double)*Fr); + for (int fr=0; fr<Fr-1; fr++) + { + __int16* ldata16=&data16[offst*fr]; + frames[fr]=ACPower(ldata16, wid); + framesa[fr]=1; + // f0s[fr]=pitchautocor(ldata16, wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr]; + } + frames[Fr-1]=ACPower(&data16[(Fr-1)*offst], length-(Fr-1)*offst); + framesa[Fr-1]=1; + + int start=0, writecount=0; + + while (writecount<maxhscount) + { + int fr_m=start+intv_fr; + + //search for the highest spectral peak to start tracking + double fp_m=0, vp_m=0; + for (int fr=start+intv_fr; fr<start+dur_fr && fr<Fr; fr+=intv_fr) + { + cmplx<QSPEC_FORMAT>* speci=Sp->Spec(fr); + for (int k=0; k<=wid/2; k++) x[k]=speci[k]; + int pc=QuickPeaks(fps, vps, wid, x, settings.M, settings.c, settings.iH2, mina, 0, wid/4); + + for(int p=0; p<pc; p++) + if (vps[p]>vp_m) + { + if (f0s[fr]==0 && rsr[fr]==0) f0s[fr]=pitchautocor(&data16[offst*fr], wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr]; + if (f0s[fr]>0) + { + double dpind=fps[p]/f0s[fr], pind=floor(dpind+0.5); + if (fabs(dpind-pind)<0.1) vp_m=vps[p], fr_m=fr, fp_m=fps[p]; + } + } + } + + if (fp_m<=0){start=start+dur_fr; if (start>Fr) break; continue;} + + f0s[fr_m]=pitchautocor(&data16[offst*fr_m], wid, settings.minf0, settings.maxf0, rsr[fr_m]); rsr[fr_m]=1-rsr[fr_m]; + + int _t=fr_m*offst+wid/2; + double _f=fp_m/wid; + int hsM, hsFr, frst=start, fren=Fr; + atom** hsPartials=0; + + settings.pin0=floor(fp_m/f0s[fr_m]+0.5); + int tag=FindNote(_t, _f, hsM, hsFr, hsPartials, frst, fren, wid, offst, Sp, settings); + + int fr1=(hsPartials[0][0].t-wid/2)/offst; + double consi=1; + for (int hsfr=0; hsfr<hsFr; hsfr++) + { + double ene=0; + for (int m=0; m<hsM; m++) + { + double a=(hsPartials[m][hsfr].f>0)?hsPartials[m][hsfr].a:0; + + if (hsfr==0) ene+=a*a; + else + { + double b=(hsPartials[m][hsfr-1].f>0)?hsPartials[m][hsfr-1].a:0; + ene+=(a*a+b*b+a*b)/3; + } + + if (hsfr==hsFr-1) ene+=a*a; + else + { + double b=(hsPartials[m][hsfr+1].f>0)?hsPartials[m][hsfr+1].a:0; + ene+=(a*a+b*b+a*b)/3; + } + } + framesa[fr1+hsfr]=ene;//*2; + if (framesa[fr1+hsfr]<frames[fr1+hsfr]) consi*=frames[fr1+hsfr]/framesa[fr1+hsfr]; + else consi*=framesa[fr1+hsfr]/frames[fr1+hsfr]; + if (framesa[fr1+hsfr]>frames[fr1+hsfr]) framesa[fr1+hsfr]=0; + else framesa[fr1+hsfr]=1-framesa[fr1+hsfr]/frames[fr1+hsfr]; + } + consi=pow(consi, 1.0/hsFr); + + THS* HS=new THS; + HS->M=hsM; HS->Fr=hsFr; HS->Partials=hsPartials; + printf("%d (%.2fs), %d (%.2fs), inconsistency %f\n", start, start*offst*1.0/sps, hsFr, hsFr*offst*1.0/sps, consi-1); + if (consi<1.5) + { + HS->WriteToStream(File); + printf("Write note %d\n", writecount++); + } + delete HS; + + start=fr1+hsFr; + + } + delete File; delete Sp; + delete List; + printf("Completed\n"); getch(); return 0; +}