Mercurial > hg > x
view main.cpp @ 12:4b35f8ac5113
Fix: do not synthesize sinusoids above Nyqvist freq.
author | wenx <xue.wen@eecs.qmul.ac.uk> |
---|---|
date | Fri, 12 Aug 2011 18:10:46 +0100 |
parents | 977f541d6683 |
children |
line wrap: on
line source
/* Sample program: analysis of harmonic sinusoid in low-noise monophonic context. Syntax: (executable path) filename <setting=value> <setting=value> ... <setting=value> filename: path input wave form audio file, 16-bit PCM setting: optional algorithmic parameters; search "stricmp" in the source code for a complete list value: non-default values given to the algorithmic parameters Output: a *.evt file containing RIFF chunks, each starting with id "EVT\0" and hosting a harmonic sinusoid. The "EVT\0" chunk body contains a header ("HDR\0") chunk followed by atom ("ATM\0") chunks. The "HDR\0" chunk body contains 4 __int32 values: channel index, number of partials, number of measurement points (frames), and a tag. The "ATM\0" chunk body contains an atom structure (see hs.h). */ #include <QtCore/QCoreApplication> #include "arrayalloc.h" #include "hs.h" #include "matrix.h" #include "quickspec.h" #include "procedures.h" #include "vibrato.h" #include <string.h> #include <stdio.h> #include <conio.h> //structure hosting wave header data struct wavehdr { __int16 fmttag; __int16 channels; __int32 samplespersec; __int32 bytespersec; __int16 blockalign; __int16 bitspersample; }; //tells if a wave header contains consistent data bool isvalidwavehdr(wavehdr* hdr) { if (hdr->fmttag!=1) return false; if (hdr->bitspersample!=8 && hdr->bitspersample!=16) return false; if (hdr->channels*hdr->bitspersample!=hdr->blockalign*8) return false; if (hdr->samplespersec*hdr->blockalign!=hdr->bytespersec) return false; return true; } //structure hosting a waveform audio struct waveaudio { union { wavehdr hdr; struct { __int16 fmttag; __int16 channels; __int32 samplespersec; __int32 bytespersec; __int16 blockalign; __int16 bitspersample; }; }; int length; union { unsigned char* data; unsigned char* data8; __int16* data16; }; waveaudio(){memset(this, 0, sizeof(waveaudio));} ~waveaudio(){delete[] data;} }; void freewaveaudio(waveaudio* wa) { delete[] wa->data; memset(wa, 0, sizeof(waveaudio)); } //error messages for reading waveform audio file enum err_wavefile { err_success, err_RIFF_hdr, err_wave_hdr, err_data_hdr, err_data_chunk, err_io }; //reads wave header from file stream int loadwavehdr(wavehdr* hdr, FILE* fp) { int rc, hdrlen; char s[5]; rc=fread(s, 1, 4, fp); s[4]=0; if (rc<4) return err_wave_hdr; if (strcmp(s, "WAVE")) return err_wave_hdr; rc=fread(s, 1, 4, fp); s[4]=0; if (rc<4) return err_wave_hdr; if (strcmp(s, "fmt ")) return err_wave_hdr; rc=fread(&hdrlen, 1, 4, fp); if (rc<4) return err_wave_hdr; if (hdrlen<16) return err_wave_hdr; rc=fread(hdr, 1, sizeof(wavehdr), fp); if (rc<sizeof(wavehdr)) return err_wave_hdr; if (!isvalidwavehdr(hdr)) return err_wave_hdr; if (hdrlen>16) { rc=fseek(fp, hdrlen-16, SEEK_CUR); if (rc!=0) return err_wave_hdr; } return err_success; } //reads waveform audio, including header, from a file stream int loadwaveaudio(waveaudio* wa, FILE* fp) { int rc, mainlen; char s[5]; wavehdr hdr; rc=fread(s, 1, 4, fp); s[4]=0; if (rc<4) return err_RIFF_hdr; if (strcmp(s, "RIFF")) return err_RIFF_hdr; rc=fread(&mainlen, 1, 4, fp); if (rc<4) return err_RIFF_hdr; rc=loadwavehdr(&hdr, fp); if (rc!=err_success) return rc; rc=fread(s, 1, 4, fp); s[4]=0; if (rc<4) return err_data_hdr; if (strcmp(s, "data")) return err_data_hdr; rc=fread(&mainlen, 1, 4, fp); if (rc<4) return err_data_hdr; if (mainlen<=0) return err_data_hdr; unsigned char* data=new unsigned char[mainlen]; rc=fread(data, 1, mainlen, fp); if (rc<mainlen) { delete[] data; return err_data_chunk; } wa->hdr=hdr; wa->length=rc/hdr.blockalign; delete[] wa->data; wa->data=data; return err_success; } //reads waveform audio, including header, from a file int loadwavefile(waveaudio* wa, char* filename) { FILE* fp; if ((fp=fopen(filename, "rb"))==NULL) return err_io; int result=loadwaveaudio(wa, fp); fclose(fp); return result; } //returns new file path with the specific extension replacing the original one char* ChangeFileExt(char* FileName, char* Ext) { char* oldext=strrchr(FileName, '.'); int namelen=strlen(FileName)-strlen(oldext); char* dest=new char[namelen+strlen(Ext)+1]; memcpy(dest, FileName, namelen); dest[namelen]=0; strcat(dest, Ext); return dest; } //ACPower for __int16 data input double ACPower(__int16* data, int Count) { if (Count<=0) return 0; double power=0, avg=0, tmp; for (int i=0; i<Count; i++) { tmp=*(data++); power+=tmp*tmp; avg+=tmp; } power=(power-avg*avg/Count)/Count; return power; }//ACPower //double QIE(double* y, double& x){double a=0.5*(y[1]+y[-1])-y[0], b=0.5*(y[1]-y[-1]); x=-0.5*b/a; return y[0]-0.25*b*b/a;} //correlation coefficient double InnerC(int N, __int16* x, __int16* y) { double inner=0, inn1=0, inn2=0; for (int n=0; n<N; n++) inner+=1.0*x[n]*y[n], inn1+=1.0*x[n]*x[n], inn2+=1.0*y[n]*y[n]; double inn=sqrt(inn1*inn2); return (inn<=0)?0:inner/inn; } //monophonic pitch estimation by autocorrelation double pitchautocor(__int16* data, int wid, double minf0bin, double maxf0bin, double& hsr) { int hwid=wid/2, minT=wid/maxf0bin, maxT=wid/minf0bin; double ene=InnerC(wid, data, data); if (ene<=0){hsr=0; return 0;} double* autocor=new double[hwid]; for (int i=1; i<maxT+2; i++) { autocor[i]=InnerC(wid-i, data, &data[i]); } int prd=0, m=minT; while(m<=maxT) { while (m<=maxT && (autocor[m]<0 || autocor[m]<autocor[m-1] || autocor[m]<autocor[m+1])) m++; if (m<=maxT) { int mm=ceil(m*0.6667), mp=floor(m*1.3333), cont=0; //for (int im=mm; im<=mp; im++) if (m!=im && autocor[im]>=autocor[m]) {cont=1; break;} if (cont==0) { if (prd==0 || autocor[m]>autocor[prd]*1.05) prd=m; } m++; } } double pitchbin=0; if (prd>=minT && prd<=maxT) { double mshift; hsr=QIE(&autocor[prd], mshift); pitchbin=wid/(prd+mshift); } else{hsr=0;} delete[] autocor; return pitchbin; } //main function int main(int argc, char* argv[]) { //read audio file if (argc<2){printf("Please specify input wave file."); getch(); return 0;} printf("Loading wave file %s... ", argv[1]); waveaudio* wa=new waveaudio; int waverr=loadwavefile(wa, argv[1]); printf("[%d]\n", waverr); if (waverr!=err_success){printf("Aborted: error loading wave file."); getch(); return 0;} if (wa->bitspersample!=16){printf("Aborted: this program accepts 16-bit pcm only."); getch(); return 0;} int length=wa->length, sps=wa->samplespersec; __int16* data16=wa->data16; if (wa->channels>1) { printf("Extracting channel 0... "); for (int i=1; i<length; i++) data16[i]=data16[i*wa->channels]; printf("[0]\n"); } wa->data=0; delete wa; //default settings int maxhscount=100; float wid_s=0.02, offstwidratio=0.5; float dur_s=0.5; float intv_s=0.05; //duration, in seconds, to scan for peaks float minf0=55, maxf0=3520; WindowType wintype=wtHann; NMSettings settings; memset(&settings, 0, sizeof(NMSettings)); settings.hB=3; //spectral truncation half width settings.maxp=50; //maximal number of partials settings.maxB=0.001; //stiffness coefficient upper bound settings.epf=1e-4; //frequency estimation error tolerance for LSE estimation settings.epf0=1; //input frequency error bound for harmonic grouping settings.delm=1.1; //frequency error bound for harmonic grouping settings.delp=1.1; //pitch jump upper bound double mina=0.5; //user settings through commandline arguments for (int i=2; i<argc; i++) { char* s1=strchr(argv[i], '='); if (s1) { s1[0]=0; s1++; if (!stricmp(argv[i], "hB")){settings.hB=atof(s1); continue;} if (!stricmp(argv[i], "maxp")){settings.maxp=atoi(s1); continue;} if (!stricmp(argv[i], "maxB")){settings.maxB=atof(s1); continue;} if (!stricmp(argv[i], "epf")){settings.epf=atof(s1); continue;} if (!stricmp(argv[i], "epf0")){settings.epf0=atof(s1); continue;} if (!stricmp(argv[i], "delm")){settings.delm=atof(s1); continue;} if (!stricmp(argv[i], "delp")){settings.delp=atof(s1); continue;} if (!stricmp(argv[i], "minf0")){minf0=atof(s1); continue;} if (!stricmp(argv[i], "maxf0")){maxf0=atof(s1); continue;} if (!stricmp(argv[i], "wid_s")){wid_s=atof(s1); continue;} if (!stricmp(argv[i], "offstwidratio")){offstwidratio=atof(s1); continue;} if (!stricmp(argv[i], "dur_s")){dur_s=atof(s1); continue;} if (!stricmp(argv[i], "intv_s")){intv_s=atof(s1); continue;} if (!stricmp(argv[i], "maxhscount")){maxhscount=atoi(s1); continue;} if (!stricmp(argv[i], "wintype")) { if (!stricmp(s1, "hann")){wintype=wtHann; continue;} if (!stricmp(s1, "hamming")){wintype=wtHamming; continue;} if (!stricmp(s1, "blackman")){wintype=wtBlackman; continue;} if (!stricmp(s1, "rectangle")){wintype=wtRectangle; continue;} if (!stricmp(s1, "hannsqr")){wintype=wtHannSqr; continue;} } } } // MList* List=new MList; List->Add(data16, 1); //analysis window size and hop size int wid=1<<((int)floor(log2(wid_s*sps)+1)), offst=wid*offstwidratio; //set up the spectrogram TQuickSpectrogram* Sp=new TQuickSpectrogram(0, 0, true, false, 0); Sp->Data=data16; Sp->DataLength=length; Sp->BytesPerSample=2; Sp->Wid=wid; Sp->Offst=offst; Sp->WinType=wintype; Sp->Spec(-1); //this sets Sp->Capacity to the number of accessible frames and does nothing else //get number of frames int Fr=Sp->Capacity; //the program searches every intv_fr frames over a duration of dur_fr frames for a spectral peak to start tracking. int dur_fr=sps*dur_s/offst, intv_fr=sps*intv_s/offst; if (dur_fr<10) dur_fr=10; if (intv_fr<1) intv_fr=1; //minimal and maximal fundamental frequency, in bins settings.minf0=minf0/sps*wid; settings.maxf0=maxf0/sps*wid; if (settings.minf0<settings.hB) settings.minf0=settings.hB; windowspec(wintype, wid, &settings.M, settings.c, &settings.iH2); double *fps=new double[wid], *vps=&fps[wid/2]; List->Add(fps, 1); cdouble *x=new cdouble[wid/2+1]; List->Add(x, 1); //output file char* filename=ChangeFileExt(argv[1], ".evt"); List->Add(filename, 1); TFileStream* File=new TFileStream(filename, fmWrite); double* frames=new double[Fr]; memset(frames, 0, sizeof(double)*Fr); List->Add(frames, 1); double* framesa=new double[Fr]; memset(framesa, 0, sizeof(double)*Fr); List->Add(framesa, 1); double* rsr=new double[Fr]; List->Add(rsr, 1); memset(rsr, 0, sizeof(double)*Fr); double* f0s=new double[Fr]; List->Add(f0s, 1); memset(f0s, 0, sizeof(double)*Fr); for (int fr=0; fr<Fr-1; fr++) { __int16* ldata16=&data16[offst*fr]; frames[fr]=ACPower(ldata16, wid); framesa[fr]=1; // f0s[fr]=pitchautocor(ldata16, wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr]; } frames[Fr-1]=ACPower(&data16[(Fr-1)*offst], length-(Fr-1)*offst); framesa[Fr-1]=1; int start=0, writecount=0; while (writecount<maxhscount) { int fr_m=start+intv_fr; //search for the highest spectral peak to start tracking double fp_m=0, vp_m=0; for (int fr=start+intv_fr; fr<start+dur_fr && fr<Fr; fr+=intv_fr) { cmplx<QSPEC_FORMAT>* speci=Sp->Spec(fr); for (int k=0; k<=wid/2; k++) x[k]=speci[k]; int pc=QuickPeaks(fps, vps, wid, x, settings.M, settings.c, settings.iH2, mina, 0, wid/4); for(int p=0; p<pc; p++) if (vps[p]>vp_m) { if (f0s[fr]==0 && rsr[fr]==0) f0s[fr]=pitchautocor(&data16[offst*fr], wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr]; if (f0s[fr]>0) { double dpind=fps[p]/f0s[fr], pind=floor(dpind+0.5); if (fabs(dpind-pind)<0.1) vp_m=vps[p], fr_m=fr, fp_m=fps[p]; } } } if (fp_m<=0){start=start+dur_fr; if (start>Fr) break; continue;} f0s[fr_m]=pitchautocor(&data16[offst*fr_m], wid, settings.minf0, settings.maxf0, rsr[fr_m]); rsr[fr_m]=1-rsr[fr_m]; int _t=fr_m*offst+wid/2; double _f=fp_m/wid; int hsM, hsFr, frst=start, fren=Fr; atom** hsPartials=0; settings.pin0=floor(fp_m/f0s[fr_m]+0.5); int tag=FindNote(_t, _f, hsM, hsFr, hsPartials, frst, fren, wid, offst, Sp, settings); int fr1=(hsPartials[0][0].t-wid/2)/offst; double consi=1; for (int hsfr=0; hsfr<hsFr; hsfr++) { double ene=0; for (int m=0; m<hsM; m++) { double a=(hsPartials[m][hsfr].f>0)?hsPartials[m][hsfr].a:0; if (hsfr==0) ene+=a*a; else { double b=(hsPartials[m][hsfr-1].f>0)?hsPartials[m][hsfr-1].a:0; ene+=(a*a+b*b+a*b)/3; } if (hsfr==hsFr-1) ene+=a*a; else { double b=(hsPartials[m][hsfr+1].f>0)?hsPartials[m][hsfr+1].a:0; ene+=(a*a+b*b+a*b)/3; } } framesa[fr1+hsfr]=ene;//*2; if (framesa[fr1+hsfr]<frames[fr1+hsfr]) consi*=frames[fr1+hsfr]/framesa[fr1+hsfr]; else consi*=framesa[fr1+hsfr]/frames[fr1+hsfr]; if (framesa[fr1+hsfr]>frames[fr1+hsfr]) framesa[fr1+hsfr]=0; else framesa[fr1+hsfr]=1-framesa[fr1+hsfr]/frames[fr1+hsfr]; } consi=pow(consi, 1.0/hsFr); THS* HS=new THS; HS->M=hsM; HS->Fr=hsFr; HS->Partials=hsPartials; printf("%d (%.2fs), %d (%.2fs), inconsistency %f\n", start, start*offst*1.0/sps, hsFr, hsFr*offst*1.0/sps, consi-1); if (consi<1.5) { HS->WriteToStream(File); printf("Write note %d\n", writecount++); } delete HS; start=fr1+hsFr; } delete File; delete Sp; delete List; printf("Completed\n"); getch(); return 0; }