comparison main.cpp @ 9:91301b3d02c5

Example main program.
author Wen X <xue.wen@elec.qmul.ac.uk>
date Fri, 15 Oct 2010 14:54:51 +0100
parents
children c6528c38b23c
comparison
equal deleted inserted replaced
8:f0d2c9b5d3a3 9:91301b3d02c5
1 /*
2 Sample program: analysis of harmonic sinusoid in low-noise monophonic context.
3
4 Syntax:
5 (executable path) filename <setting=value> <setting=value> ... <setting=value>
6
7 filename: path input wave form audio file, 16-bit PCM
8 setting: optional algorithmic parameters; search "stricmp" in the source code for a complete list
9 value: non-default values given to the algorithmic parameters
10
11 Output:
12 a *.evt file containing RIFF chunks, each starting with id "EVT\0" and hosting a harmonic sinusoid.
13
14 The "EVT\0" chunk body contains a header ("HDR\0") chunk followed by atom ("ATM\0") chunks.
15
16 The "HDR\0" chunk body contains 4 __int32 values: channel index, number of partials, number of
17 measurement points (frames), and a tag.
18
19 The "ATM\0" chunk body contains an atom structure (see hs.h).
20
21 */
22
23 #include <QtCore/QCoreApplication>
24 #include "arrayalloc.h"
25 #include "hs.h"
26 #include "matrix.h"
27 #include "quickspec.h"
28 #include "procedures.h"
29 #include "vibrato.h"
30 #include <string.h>
31 #include <stdio.h>
32 #include <conio.h>
33
34 //structure hosting wave header data
35 struct wavehdr
36 {
37 __int16 fmttag;
38 __int16 channels;
39 __int32 samplespersec;
40 __int32 bytespersec;
41 __int16 blockalign;
42 __int16 bitspersample;
43 };
44
45 //tells if a wave header contains consistent data
46 bool isvalidwavehdr(wavehdr* hdr)
47 {
48 if (hdr->fmttag!=1) return false;
49 if (hdr->bitspersample!=8 && hdr->bitspersample!=16) return false;
50 if (hdr->channels*hdr->bitspersample!=hdr->blockalign*8) return false;
51 if (hdr->samplespersec*hdr->blockalign!=hdr->bytespersec) return false;
52 return true;
53 }
54
55 //structure hosting a waveform audio
56 struct waveaudio
57 {
58 union
59 {
60 wavehdr hdr;
61 struct
62 {
63 __int16 fmttag;
64 __int16 channels;
65 __int32 samplespersec;
66 __int32 bytespersec;
67 __int16 blockalign;
68 __int16 bitspersample;
69 };
70 };
71 int length;
72 union
73 {
74 unsigned char* data;
75 unsigned char* data8;
76 __int16* data16;
77 };
78 waveaudio(){memset(this, 0, sizeof(waveaudio));}
79 ~waveaudio(){delete[] data;}
80 };
81
82 void freewaveaudio(waveaudio* wa)
83 {
84 delete[] wa->data;
85 memset(wa, 0, sizeof(waveaudio));
86 }
87
88 //error messages for reading waveform audio file
89 enum err_wavefile
90 {
91 err_success,
92 err_RIFF_hdr,
93 err_wave_hdr,
94 err_data_hdr,
95 err_data_chunk,
96 err_io
97 };
98
99 //reads wave header from file stream
100 int loadwavehdr(wavehdr* hdr, FILE* fp)
101 {
102 int rc, hdrlen;
103 char s[5];
104
105 rc=fread(s, 1, 4, fp); s[4]=0;
106 if (rc<4) return err_wave_hdr;
107 if (strcmp(s, "WAVE")) return err_wave_hdr;
108
109 rc=fread(s, 1, 4, fp); s[4]=0;
110 if (rc<4) return err_wave_hdr;
111 if (strcmp(s, "fmt ")) return err_wave_hdr;
112
113 rc=fread(&hdrlen, 1, 4, fp);
114 if (rc<4) return err_wave_hdr;
115 if (hdrlen<16) return err_wave_hdr;
116
117 rc=fread(hdr, 1, sizeof(wavehdr), fp);
118 if (rc<sizeof(wavehdr)) return err_wave_hdr;
119 if (!isvalidwavehdr(hdr)) return err_wave_hdr;
120
121 if (hdrlen>16)
122 {
123 rc=fseek(fp, hdrlen-16, SEEK_CUR);
124 if (rc!=0) return err_wave_hdr;
125 }
126
127 return err_success;
128 }
129
130 //reads waveform audio, including header, from a file stream
131 int loadwaveaudio(waveaudio* wa, FILE* fp)
132 {
133 int rc, mainlen;
134 char s[5];
135 wavehdr hdr;
136
137 rc=fread(s, 1, 4, fp); s[4]=0;
138 if (rc<4) return err_RIFF_hdr;
139 if (strcmp(s, "RIFF")) return err_RIFF_hdr;
140
141 rc=fread(&mainlen, 1, 4, fp);
142 if (rc<4) return err_RIFF_hdr;
143
144 rc=loadwavehdr(&hdr, fp);
145 if (rc!=err_success) return rc;
146
147 rc=fread(s, 1, 4, fp); s[4]=0;
148 if (rc<4) return err_data_hdr;
149 if (strcmp(s, "data")) return err_data_hdr;
150
151 rc=fread(&mainlen, 1, 4, fp);
152 if (rc<4) return err_data_hdr;
153
154 if (mainlen<=0) return err_data_hdr;
155
156 unsigned char* data=new unsigned char[mainlen];
157 rc=fread(data, 1, mainlen, fp);
158 if (rc<mainlen)
159 {
160 delete[] data;
161 return err_data_chunk;
162 }
163
164 wa->hdr=hdr;
165 wa->length=rc/hdr.blockalign;
166 delete[] wa->data;
167 wa->data=data;
168
169 return err_success;
170 }
171
172 //reads waveform audio, including header, from a file
173 int loadwavefile(waveaudio* wa, char* filename)
174 {
175 FILE* fp;
176 if ((fp=fopen(filename, "rb"))==NULL) return err_io;
177 int result=loadwaveaudio(wa, fp);
178 fclose(fp);
179 return result;
180 }
181
182 //returns new file path with the specific extension replacing the original one
183 char* ChangeFileExt(char* FileName, char* Ext)
184 {
185 char* oldext=strrchr(FileName, '.');
186 int namelen=strlen(FileName)-strlen(oldext);
187 char* dest=new char[namelen+strlen(Ext)+1];
188 memcpy(dest, FileName, namelen); dest[namelen]=0;
189 strcat(dest, Ext);
190 return dest;
191 }
192
193 //ACPower for __int16 data input
194 double ACPower(__int16* data, int Count)
195 {
196 if (Count<=0) return 0;
197 double power=0, avg=0, tmp;
198 for (int i=0; i<Count; i++)
199 {
200 tmp=*(data++);
201 power+=tmp*tmp;
202 avg+=tmp;
203 }
204 power=(power-avg*avg/Count)/Count;
205 return power;
206 }//ACPower
207
208 //double QIE(double* y, double& x){double a=0.5*(y[1]+y[-1])-y[0], b=0.5*(y[1]-y[-1]); x=-0.5*b/a; return y[0]-0.25*b*b/a;}
209
210 //correlation coefficient
211 double InnerC(int N, __int16* x, __int16* y)
212 {
213 double inner=0, inn1=0, inn2=0;
214 for (int n=0; n<N; n++) inner+=1.0*x[n]*y[n], inn1+=1.0*x[n]*x[n], inn2+=1.0*y[n]*y[n];
215 double inn=sqrt(inn1*inn2);
216 return (inn<=0)?0:inner/inn;
217 }
218
219 //monophonic pitch estimation by autocorrelation
220 double pitchautocor(__int16* data, int wid, double minf0bin, double maxf0bin, double& hsr)
221 {
222 int hwid=wid/2, minT=wid/maxf0bin, maxT=wid/minf0bin;
223 double ene=InnerC(wid, data, data);
224 if (ene<=0){hsr=0; return 0;}
225 double* autocor=new double[hwid];
226 for (int i=1; i<maxT+2; i++)
227 {
228 autocor[i]=InnerC(wid-i, data, &data[i]);
229 }
230
231 int prd=0, m=minT;
232 while(m<=maxT)
233 {
234 while (m<=maxT && (autocor[m]<0 || autocor[m]<autocor[m-1] || autocor[m]<autocor[m+1])) m++;
235 if (m<=maxT)
236 {
237 int mm=ceil(m*0.6667), mp=floor(m*1.3333), cont=0;
238 for (int im=mm; im<=mp; im++) if (m!=im && autocor[im]>=autocor[m]) {cont=1; break;}
239 if (cont==0)
240 {
241 if (prd==0 || autocor[m]>autocor[prd]*1.05) prd=m;
242 }
243 m++;
244 }
245 }
246
247 double pitchbin=0;
248 if (prd>=minT && prd<=maxT)
249 {
250 double mshift; hsr=QIE(&autocor[prd], mshift); pitchbin=wid/(prd+mshift);
251 }
252 else{hsr=0;}
253 delete[] autocor;
254 return pitchbin;
255 }
256
257 //main function
258 int main(int argc, char* argv[])
259 {
260 //read audio file
261 if (argc<2){printf("Please specify input wave file."); getch(); return 0;}
262 printf("Loading wave file %s... ", argv[1]);
263 waveaudio* wa=new waveaudio;
264 int waverr=loadwavefile(wa, argv[1]); printf("[%d]\n", waverr);
265 if (waverr!=err_success){printf("Aborted: error loading wave file."); getch(); return 0;}
266 if (wa->bitspersample!=16){printf("Aborted: this program accepts 16-bit pcm only."); getch(); return 0;}
267
268 int length=wa->length, sps=wa->samplespersec;
269 __int16* data16=wa->data16;
270 if (wa->channels>1)
271 {
272 printf("Extracting channel 0... ");
273 for (int i=1; i<length; i++) data16[i]=data16[i*wa->channels];
274 printf("[0]\n");
275 }
276 wa->data=0;
277 delete wa;
278
279 //default settings
280 int maxhscount=100;
281 float wid_s=0.02, offstwidratio=0.5;
282 float dur_s=0.5;
283 float intv_s=0.05; //duration, in seconds, to scan for peaks
284 float minf0=55, maxf0=3520;
285 WindowType wintype=wtHann;
286
287 NMSettings settings; memset(&settings, 0, sizeof(NMSettings));
288 settings.hB=3; //spectral truncation half width
289 settings.maxp=50; //maximal number of partials
290 settings.maxB=0.001; //stiffness coefficient upper bound
291 settings.epf=1e-4; //frequency estimation error tolerance for LSE estimation
292 settings.epf0=1; //input frequency error bound for harmonic grouping
293 settings.delm=1.1; //frequency error bound for harmonic grouping
294 settings.delp=1.1; //pitch jump upper bound
295
296 double mina=0.5;
297
298 //user settings through commandline arguments
299 for (int i=2; i<argc; i++)
300 {
301 char* s1=strchr(argv[i], '=');
302 if (s1)
303 {
304 s1[0]=0; s1++;
305 if (!stricmp(argv[i], "hB")){settings.hB=atof(s1); continue;}
306 if (!stricmp(argv[i], "maxp")){settings.maxp=atoi(s1); continue;}
307 if (!stricmp(argv[i], "maxB")){settings.maxB=atof(s1); continue;}
308 if (!stricmp(argv[i], "epf")){settings.epf=atof(s1); continue;}
309 if (!stricmp(argv[i], "epf0")){settings.epf0=atof(s1); continue;}
310 if (!stricmp(argv[i], "delm")){settings.delm=atof(s1); continue;}
311 if (!stricmp(argv[i], "delp")){settings.delp=atof(s1); continue;}
312 if (!stricmp(argv[i], "minf0")){minf0=atof(s1); continue;}
313 if (!stricmp(argv[i], "maxf0")){maxf0=atof(s1); continue;}
314 if (!stricmp(argv[i], "wid_s")){wid_s=atof(s1); continue;}
315 if (!stricmp(argv[i], "offstwidratio")){offstwidratio=atof(s1); continue;}
316 if (!stricmp(argv[i], "dur_s")){dur_s=atof(s1); continue;}
317 if (!stricmp(argv[i], "intv_s")){intv_s=atof(s1); continue;}
318 if (!stricmp(argv[i], "maxhscount")){maxhscount=atoi(s1); continue;}
319 if (!stricmp(argv[i], "wintype"))
320 {
321 if (!stricmp(s1, "hann")){wintype=wtHann; continue;}
322 if (!stricmp(s1, "hamming")){wintype=wtHamming; continue;}
323 if (!stricmp(s1, "blackman")){wintype=wtBlackman; continue;}
324 if (!stricmp(s1, "rectangle")){wintype=wtRectangle; continue;}
325 if (!stricmp(s1, "hannsqr")){wintype=wtHannSqr; continue;}
326 }
327 }
328 }
329
330 //
331 MList* List=new MList; List->Add(data16, 1);
332
333 //analysis window size and hop size
334 int wid=1<<((int)floor(log2(wid_s*sps)+1)), offst=wid*offstwidratio;
335
336 //set up the spectrogram
337 TQuickSpectrogram* Sp=new TQuickSpectrogram(0, 0, true, false, 0);
338 Sp->Data=data16; Sp->DataLength=length; Sp->BytesPerSample=2;
339 Sp->Wid=wid; Sp->Offst=offst; Sp->WinType=wintype;
340 Sp->Spec(-1); //this sets Sp->Capacity to the number of accessible frames and does nothing else
341
342 //get number of frames
343 int Fr=Sp->Capacity;
344
345 //the program searches every intv_fr frames over a duration of dur_fr frames for a spectral peak to start tracking.
346 int dur_fr=sps*dur_s/offst, intv_fr=sps*intv_s/offst;
347 if (dur_fr<10) dur_fr=10;
348 if (intv_fr<1) intv_fr=1;
349
350 //minimal and maximal fundamental frequency, in bins
351 settings.minf0=minf0/sps*wid;
352 settings.maxf0=maxf0/sps*wid;
353 if (settings.minf0<settings.hB) settings.minf0=settings.hB;
354
355 windowspec(wintype, wid, &settings.M, settings.c, &settings.iH2);
356 double *fps=new double[wid], *vps=&fps[wid/2]; List->Add(fps, 1);
357 cdouble *x=new cdouble[wid/2+1]; List->Add(x, 1);
358
359 //output file
360 char* filename=ChangeFileExt(argv[1], ".evt"); List->Add(filename, 1);
361 TFileStream* File=new TFileStream(filename, fmWrite);
362
363 double* frames=new double[Fr]; memset(frames, 0, sizeof(double)*Fr); List->Add(frames, 1);
364 double* framesa=new double[Fr]; memset(framesa, 0, sizeof(double)*Fr); List->Add(framesa, 1);
365 double* rsr=new double[Fr]; List->Add(rsr, 1); memset(rsr, 0, sizeof(double)*Fr);
366 double* f0s=new double[Fr]; List->Add(f0s, 1); memset(f0s, 0, sizeof(double)*Fr);
367 for (int fr=0; fr<Fr-1; fr++)
368 {
369 __int16* ldata16=&data16[offst*fr];
370 frames[fr]=ACPower(ldata16, wid);
371 framesa[fr]=1;
372 // f0s[fr]=pitchautocor(ldata16, wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr];
373 }
374 frames[Fr-1]=ACPower(&data16[(Fr-1)*offst], length-(Fr-1)*offst);
375 framesa[Fr-1]=1;
376
377 int start=0, writecount=0;
378
379 while (writecount<maxhscount)
380 {
381 int fr_m=start+intv_fr;
382
383 //search for the highest spectral peak to start tracking
384 double fp_m=0, vp_m=0;
385 for (int fr=start+intv_fr; fr<start+dur_fr && fr<Fr; fr+=intv_fr)
386 {
387 cmplx<QSPEC_FORMAT>* speci=Sp->Spec(fr);
388 for (int k=0; k<=wid/2; k++) x[k]=speci[k];
389 int pc=QuickPeaks(fps, vps, wid, x, settings.M, settings.c, settings.iH2, mina, 0, wid/4);
390
391 for(int p=0; p<pc; p++)
392 if (vps[p]>vp_m)
393 {
394 if (f0s[fr]==0 && rsr[fr]==0) f0s[fr]=pitchautocor(&data16[offst*fr], wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr];
395 if (f0s[fr]>0)
396 {
397 double dpind=fps[p]/f0s[fr], pind=floor(dpind+0.5);
398 if (fabs(dpind-pind)<0.1) vp_m=vps[p], fr_m=fr, fp_m=fps[p];
399 }
400 }
401 }
402
403 if (fp_m<=0){start=start+dur_fr; if (start>Fr) break; continue;}
404
405 f0s[fr_m]=pitchautocor(&data16[offst*fr_m], wid, settings.minf0, settings.maxf0, rsr[fr_m]); rsr[fr_m]=1-rsr[fr_m];
406
407 int _t=fr_m*offst+wid/2;
408 double _f=fp_m/wid;
409 int hsM, hsFr, frst=start, fren=Fr;
410 atom** hsPartials=0;
411
412 settings.pin0=floor(fp_m/f0s[fr_m]+0.5);
413 int tag=FindNote(_t, _f, hsM, hsFr, hsPartials, frst, fren, wid, offst, Sp, settings);
414
415 int fr1=(hsPartials[0][0].t-wid/2)/offst;
416 double consi=1;
417 for (int hsfr=0; hsfr<hsFr; hsfr++)
418 {
419 double ene=0;
420 for (int m=0; m<hsM; m++)
421 {
422 double a=(hsPartials[m][hsfr].f>0)?hsPartials[m][hsfr].a:0;
423
424 if (hsfr==0) ene+=a*a;
425 else
426 {
427 double b=(hsPartials[m][hsfr-1].f>0)?hsPartials[m][hsfr-1].a:0;
428 ene+=(a*a+b*b+a*b)/3;
429 }
430
431 if (hsfr==hsFr-1) ene+=a*a;
432 else
433 {
434 double b=(hsPartials[m][hsfr+1].f>0)?hsPartials[m][hsfr+1].a:0;
435 ene+=(a*a+b*b+a*b)/3;
436 }
437 }
438 framesa[fr1+hsfr]=ene;//*2;
439 if (framesa[fr1+hsfr]<frames[fr1+hsfr]) consi*=frames[fr1+hsfr]/framesa[fr1+hsfr];
440 else consi*=framesa[fr1+hsfr]/frames[fr1+hsfr];
441 if (framesa[fr1+hsfr]>frames[fr1+hsfr]) framesa[fr1+hsfr]=0;
442 else framesa[fr1+hsfr]=1-framesa[fr1+hsfr]/frames[fr1+hsfr];
443 }
444 consi=pow(consi, 1.0/hsFr);
445
446 THS* HS=new THS;
447 HS->M=hsM; HS->Fr=hsFr; HS->Partials=hsPartials;
448 printf("%d (%.2fs), %d (%.2fs), inconsistency %f\n", start, start*offst*1.0/sps, hsFr, hsFr*offst*1.0/sps, consi-1);
449 if (consi<1.5)
450 {
451 HS->WriteToStream(File);
452 printf("Write note %d\n", writecount++);
453 }
454 delete HS;
455
456 start=fr1+hsFr;
457
458 }
459 delete File; delete Sp;
460 delete List;
461 printf("Completed\n"); getch(); return 0;
462 }