Mercurial > hg > x
comparison main.cpp @ 9:91301b3d02c5
Example main program.
author | Wen X <xue.wen@elec.qmul.ac.uk> |
---|---|
date | Fri, 15 Oct 2010 14:54:51 +0100 |
parents | |
children | c6528c38b23c |
comparison
equal
deleted
inserted
replaced
8:f0d2c9b5d3a3 | 9:91301b3d02c5 |
---|---|
1 /* | |
2 Sample program: analysis of harmonic sinusoid in low-noise monophonic context. | |
3 | |
4 Syntax: | |
5 (executable path) filename <setting=value> <setting=value> ... <setting=value> | |
6 | |
7 filename: path input wave form audio file, 16-bit PCM | |
8 setting: optional algorithmic parameters; search "stricmp" in the source code for a complete list | |
9 value: non-default values given to the algorithmic parameters | |
10 | |
11 Output: | |
12 a *.evt file containing RIFF chunks, each starting with id "EVT\0" and hosting a harmonic sinusoid. | |
13 | |
14 The "EVT\0" chunk body contains a header ("HDR\0") chunk followed by atom ("ATM\0") chunks. | |
15 | |
16 The "HDR\0" chunk body contains 4 __int32 values: channel index, number of partials, number of | |
17 measurement points (frames), and a tag. | |
18 | |
19 The "ATM\0" chunk body contains an atom structure (see hs.h). | |
20 | |
21 */ | |
22 | |
23 #include <QtCore/QCoreApplication> | |
24 #include "arrayalloc.h" | |
25 #include "hs.h" | |
26 #include "matrix.h" | |
27 #include "quickspec.h" | |
28 #include "procedures.h" | |
29 #include "vibrato.h" | |
30 #include <string.h> | |
31 #include <stdio.h> | |
32 #include <conio.h> | |
33 | |
34 //structure hosting wave header data | |
35 struct wavehdr | |
36 { | |
37 __int16 fmttag; | |
38 __int16 channels; | |
39 __int32 samplespersec; | |
40 __int32 bytespersec; | |
41 __int16 blockalign; | |
42 __int16 bitspersample; | |
43 }; | |
44 | |
45 //tells if a wave header contains consistent data | |
46 bool isvalidwavehdr(wavehdr* hdr) | |
47 { | |
48 if (hdr->fmttag!=1) return false; | |
49 if (hdr->bitspersample!=8 && hdr->bitspersample!=16) return false; | |
50 if (hdr->channels*hdr->bitspersample!=hdr->blockalign*8) return false; | |
51 if (hdr->samplespersec*hdr->blockalign!=hdr->bytespersec) return false; | |
52 return true; | |
53 } | |
54 | |
55 //structure hosting a waveform audio | |
56 struct waveaudio | |
57 { | |
58 union | |
59 { | |
60 wavehdr hdr; | |
61 struct | |
62 { | |
63 __int16 fmttag; | |
64 __int16 channels; | |
65 __int32 samplespersec; | |
66 __int32 bytespersec; | |
67 __int16 blockalign; | |
68 __int16 bitspersample; | |
69 }; | |
70 }; | |
71 int length; | |
72 union | |
73 { | |
74 unsigned char* data; | |
75 unsigned char* data8; | |
76 __int16* data16; | |
77 }; | |
78 waveaudio(){memset(this, 0, sizeof(waveaudio));} | |
79 ~waveaudio(){delete[] data;} | |
80 }; | |
81 | |
82 void freewaveaudio(waveaudio* wa) | |
83 { | |
84 delete[] wa->data; | |
85 memset(wa, 0, sizeof(waveaudio)); | |
86 } | |
87 | |
88 //error messages for reading waveform audio file | |
89 enum err_wavefile | |
90 { | |
91 err_success, | |
92 err_RIFF_hdr, | |
93 err_wave_hdr, | |
94 err_data_hdr, | |
95 err_data_chunk, | |
96 err_io | |
97 }; | |
98 | |
99 //reads wave header from file stream | |
100 int loadwavehdr(wavehdr* hdr, FILE* fp) | |
101 { | |
102 int rc, hdrlen; | |
103 char s[5]; | |
104 | |
105 rc=fread(s, 1, 4, fp); s[4]=0; | |
106 if (rc<4) return err_wave_hdr; | |
107 if (strcmp(s, "WAVE")) return err_wave_hdr; | |
108 | |
109 rc=fread(s, 1, 4, fp); s[4]=0; | |
110 if (rc<4) return err_wave_hdr; | |
111 if (strcmp(s, "fmt ")) return err_wave_hdr; | |
112 | |
113 rc=fread(&hdrlen, 1, 4, fp); | |
114 if (rc<4) return err_wave_hdr; | |
115 if (hdrlen<16) return err_wave_hdr; | |
116 | |
117 rc=fread(hdr, 1, sizeof(wavehdr), fp); | |
118 if (rc<sizeof(wavehdr)) return err_wave_hdr; | |
119 if (!isvalidwavehdr(hdr)) return err_wave_hdr; | |
120 | |
121 if (hdrlen>16) | |
122 { | |
123 rc=fseek(fp, hdrlen-16, SEEK_CUR); | |
124 if (rc!=0) return err_wave_hdr; | |
125 } | |
126 | |
127 return err_success; | |
128 } | |
129 | |
130 //reads waveform audio, including header, from a file stream | |
131 int loadwaveaudio(waveaudio* wa, FILE* fp) | |
132 { | |
133 int rc, mainlen; | |
134 char s[5]; | |
135 wavehdr hdr; | |
136 | |
137 rc=fread(s, 1, 4, fp); s[4]=0; | |
138 if (rc<4) return err_RIFF_hdr; | |
139 if (strcmp(s, "RIFF")) return err_RIFF_hdr; | |
140 | |
141 rc=fread(&mainlen, 1, 4, fp); | |
142 if (rc<4) return err_RIFF_hdr; | |
143 | |
144 rc=loadwavehdr(&hdr, fp); | |
145 if (rc!=err_success) return rc; | |
146 | |
147 rc=fread(s, 1, 4, fp); s[4]=0; | |
148 if (rc<4) return err_data_hdr; | |
149 if (strcmp(s, "data")) return err_data_hdr; | |
150 | |
151 rc=fread(&mainlen, 1, 4, fp); | |
152 if (rc<4) return err_data_hdr; | |
153 | |
154 if (mainlen<=0) return err_data_hdr; | |
155 | |
156 unsigned char* data=new unsigned char[mainlen]; | |
157 rc=fread(data, 1, mainlen, fp); | |
158 if (rc<mainlen) | |
159 { | |
160 delete[] data; | |
161 return err_data_chunk; | |
162 } | |
163 | |
164 wa->hdr=hdr; | |
165 wa->length=rc/hdr.blockalign; | |
166 delete[] wa->data; | |
167 wa->data=data; | |
168 | |
169 return err_success; | |
170 } | |
171 | |
172 //reads waveform audio, including header, from a file | |
173 int loadwavefile(waveaudio* wa, char* filename) | |
174 { | |
175 FILE* fp; | |
176 if ((fp=fopen(filename, "rb"))==NULL) return err_io; | |
177 int result=loadwaveaudio(wa, fp); | |
178 fclose(fp); | |
179 return result; | |
180 } | |
181 | |
182 //returns new file path with the specific extension replacing the original one | |
183 char* ChangeFileExt(char* FileName, char* Ext) | |
184 { | |
185 char* oldext=strrchr(FileName, '.'); | |
186 int namelen=strlen(FileName)-strlen(oldext); | |
187 char* dest=new char[namelen+strlen(Ext)+1]; | |
188 memcpy(dest, FileName, namelen); dest[namelen]=0; | |
189 strcat(dest, Ext); | |
190 return dest; | |
191 } | |
192 | |
193 //ACPower for __int16 data input | |
194 double ACPower(__int16* data, int Count) | |
195 { | |
196 if (Count<=0) return 0; | |
197 double power=0, avg=0, tmp; | |
198 for (int i=0; i<Count; i++) | |
199 { | |
200 tmp=*(data++); | |
201 power+=tmp*tmp; | |
202 avg+=tmp; | |
203 } | |
204 power=(power-avg*avg/Count)/Count; | |
205 return power; | |
206 }//ACPower | |
207 | |
208 //double QIE(double* y, double& x){double a=0.5*(y[1]+y[-1])-y[0], b=0.5*(y[1]-y[-1]); x=-0.5*b/a; return y[0]-0.25*b*b/a;} | |
209 | |
210 //correlation coefficient | |
211 double InnerC(int N, __int16* x, __int16* y) | |
212 { | |
213 double inner=0, inn1=0, inn2=0; | |
214 for (int n=0; n<N; n++) inner+=1.0*x[n]*y[n], inn1+=1.0*x[n]*x[n], inn2+=1.0*y[n]*y[n]; | |
215 double inn=sqrt(inn1*inn2); | |
216 return (inn<=0)?0:inner/inn; | |
217 } | |
218 | |
219 //monophonic pitch estimation by autocorrelation | |
220 double pitchautocor(__int16* data, int wid, double minf0bin, double maxf0bin, double& hsr) | |
221 { | |
222 int hwid=wid/2, minT=wid/maxf0bin, maxT=wid/minf0bin; | |
223 double ene=InnerC(wid, data, data); | |
224 if (ene<=0){hsr=0; return 0;} | |
225 double* autocor=new double[hwid]; | |
226 for (int i=1; i<maxT+2; i++) | |
227 { | |
228 autocor[i]=InnerC(wid-i, data, &data[i]); | |
229 } | |
230 | |
231 int prd=0, m=minT; | |
232 while(m<=maxT) | |
233 { | |
234 while (m<=maxT && (autocor[m]<0 || autocor[m]<autocor[m-1] || autocor[m]<autocor[m+1])) m++; | |
235 if (m<=maxT) | |
236 { | |
237 int mm=ceil(m*0.6667), mp=floor(m*1.3333), cont=0; | |
238 for (int im=mm; im<=mp; im++) if (m!=im && autocor[im]>=autocor[m]) {cont=1; break;} | |
239 if (cont==0) | |
240 { | |
241 if (prd==0 || autocor[m]>autocor[prd]*1.05) prd=m; | |
242 } | |
243 m++; | |
244 } | |
245 } | |
246 | |
247 double pitchbin=0; | |
248 if (prd>=minT && prd<=maxT) | |
249 { | |
250 double mshift; hsr=QIE(&autocor[prd], mshift); pitchbin=wid/(prd+mshift); | |
251 } | |
252 else{hsr=0;} | |
253 delete[] autocor; | |
254 return pitchbin; | |
255 } | |
256 | |
257 //main function | |
258 int main(int argc, char* argv[]) | |
259 { | |
260 //read audio file | |
261 if (argc<2){printf("Please specify input wave file."); getch(); return 0;} | |
262 printf("Loading wave file %s... ", argv[1]); | |
263 waveaudio* wa=new waveaudio; | |
264 int waverr=loadwavefile(wa, argv[1]); printf("[%d]\n", waverr); | |
265 if (waverr!=err_success){printf("Aborted: error loading wave file."); getch(); return 0;} | |
266 if (wa->bitspersample!=16){printf("Aborted: this program accepts 16-bit pcm only."); getch(); return 0;} | |
267 | |
268 int length=wa->length, sps=wa->samplespersec; | |
269 __int16* data16=wa->data16; | |
270 if (wa->channels>1) | |
271 { | |
272 printf("Extracting channel 0... "); | |
273 for (int i=1; i<length; i++) data16[i]=data16[i*wa->channels]; | |
274 printf("[0]\n"); | |
275 } | |
276 wa->data=0; | |
277 delete wa; | |
278 | |
279 //default settings | |
280 int maxhscount=100; | |
281 float wid_s=0.02, offstwidratio=0.5; | |
282 float dur_s=0.5; | |
283 float intv_s=0.05; //duration, in seconds, to scan for peaks | |
284 float minf0=55, maxf0=3520; | |
285 WindowType wintype=wtHann; | |
286 | |
287 NMSettings settings; memset(&settings, 0, sizeof(NMSettings)); | |
288 settings.hB=3; //spectral truncation half width | |
289 settings.maxp=50; //maximal number of partials | |
290 settings.maxB=0.001; //stiffness coefficient upper bound | |
291 settings.epf=1e-4; //frequency estimation error tolerance for LSE estimation | |
292 settings.epf0=1; //input frequency error bound for harmonic grouping | |
293 settings.delm=1.1; //frequency error bound for harmonic grouping | |
294 settings.delp=1.1; //pitch jump upper bound | |
295 | |
296 double mina=0.5; | |
297 | |
298 //user settings through commandline arguments | |
299 for (int i=2; i<argc; i++) | |
300 { | |
301 char* s1=strchr(argv[i], '='); | |
302 if (s1) | |
303 { | |
304 s1[0]=0; s1++; | |
305 if (!stricmp(argv[i], "hB")){settings.hB=atof(s1); continue;} | |
306 if (!stricmp(argv[i], "maxp")){settings.maxp=atoi(s1); continue;} | |
307 if (!stricmp(argv[i], "maxB")){settings.maxB=atof(s1); continue;} | |
308 if (!stricmp(argv[i], "epf")){settings.epf=atof(s1); continue;} | |
309 if (!stricmp(argv[i], "epf0")){settings.epf0=atof(s1); continue;} | |
310 if (!stricmp(argv[i], "delm")){settings.delm=atof(s1); continue;} | |
311 if (!stricmp(argv[i], "delp")){settings.delp=atof(s1); continue;} | |
312 if (!stricmp(argv[i], "minf0")){minf0=atof(s1); continue;} | |
313 if (!stricmp(argv[i], "maxf0")){maxf0=atof(s1); continue;} | |
314 if (!stricmp(argv[i], "wid_s")){wid_s=atof(s1); continue;} | |
315 if (!stricmp(argv[i], "offstwidratio")){offstwidratio=atof(s1); continue;} | |
316 if (!stricmp(argv[i], "dur_s")){dur_s=atof(s1); continue;} | |
317 if (!stricmp(argv[i], "intv_s")){intv_s=atof(s1); continue;} | |
318 if (!stricmp(argv[i], "maxhscount")){maxhscount=atoi(s1); continue;} | |
319 if (!stricmp(argv[i], "wintype")) | |
320 { | |
321 if (!stricmp(s1, "hann")){wintype=wtHann; continue;} | |
322 if (!stricmp(s1, "hamming")){wintype=wtHamming; continue;} | |
323 if (!stricmp(s1, "blackman")){wintype=wtBlackman; continue;} | |
324 if (!stricmp(s1, "rectangle")){wintype=wtRectangle; continue;} | |
325 if (!stricmp(s1, "hannsqr")){wintype=wtHannSqr; continue;} | |
326 } | |
327 } | |
328 } | |
329 | |
330 // | |
331 MList* List=new MList; List->Add(data16, 1); | |
332 | |
333 //analysis window size and hop size | |
334 int wid=1<<((int)floor(log2(wid_s*sps)+1)), offst=wid*offstwidratio; | |
335 | |
336 //set up the spectrogram | |
337 TQuickSpectrogram* Sp=new TQuickSpectrogram(0, 0, true, false, 0); | |
338 Sp->Data=data16; Sp->DataLength=length; Sp->BytesPerSample=2; | |
339 Sp->Wid=wid; Sp->Offst=offst; Sp->WinType=wintype; | |
340 Sp->Spec(-1); //this sets Sp->Capacity to the number of accessible frames and does nothing else | |
341 | |
342 //get number of frames | |
343 int Fr=Sp->Capacity; | |
344 | |
345 //the program searches every intv_fr frames over a duration of dur_fr frames for a spectral peak to start tracking. | |
346 int dur_fr=sps*dur_s/offst, intv_fr=sps*intv_s/offst; | |
347 if (dur_fr<10) dur_fr=10; | |
348 if (intv_fr<1) intv_fr=1; | |
349 | |
350 //minimal and maximal fundamental frequency, in bins | |
351 settings.minf0=minf0/sps*wid; | |
352 settings.maxf0=maxf0/sps*wid; | |
353 if (settings.minf0<settings.hB) settings.minf0=settings.hB; | |
354 | |
355 windowspec(wintype, wid, &settings.M, settings.c, &settings.iH2); | |
356 double *fps=new double[wid], *vps=&fps[wid/2]; List->Add(fps, 1); | |
357 cdouble *x=new cdouble[wid/2+1]; List->Add(x, 1); | |
358 | |
359 //output file | |
360 char* filename=ChangeFileExt(argv[1], ".evt"); List->Add(filename, 1); | |
361 TFileStream* File=new TFileStream(filename, fmWrite); | |
362 | |
363 double* frames=new double[Fr]; memset(frames, 0, sizeof(double)*Fr); List->Add(frames, 1); | |
364 double* framesa=new double[Fr]; memset(framesa, 0, sizeof(double)*Fr); List->Add(framesa, 1); | |
365 double* rsr=new double[Fr]; List->Add(rsr, 1); memset(rsr, 0, sizeof(double)*Fr); | |
366 double* f0s=new double[Fr]; List->Add(f0s, 1); memset(f0s, 0, sizeof(double)*Fr); | |
367 for (int fr=0; fr<Fr-1; fr++) | |
368 { | |
369 __int16* ldata16=&data16[offst*fr]; | |
370 frames[fr]=ACPower(ldata16, wid); | |
371 framesa[fr]=1; | |
372 // f0s[fr]=pitchautocor(ldata16, wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr]; | |
373 } | |
374 frames[Fr-1]=ACPower(&data16[(Fr-1)*offst], length-(Fr-1)*offst); | |
375 framesa[Fr-1]=1; | |
376 | |
377 int start=0, writecount=0; | |
378 | |
379 while (writecount<maxhscount) | |
380 { | |
381 int fr_m=start+intv_fr; | |
382 | |
383 //search for the highest spectral peak to start tracking | |
384 double fp_m=0, vp_m=0; | |
385 for (int fr=start+intv_fr; fr<start+dur_fr && fr<Fr; fr+=intv_fr) | |
386 { | |
387 cmplx<QSPEC_FORMAT>* speci=Sp->Spec(fr); | |
388 for (int k=0; k<=wid/2; k++) x[k]=speci[k]; | |
389 int pc=QuickPeaks(fps, vps, wid, x, settings.M, settings.c, settings.iH2, mina, 0, wid/4); | |
390 | |
391 for(int p=0; p<pc; p++) | |
392 if (vps[p]>vp_m) | |
393 { | |
394 if (f0s[fr]==0 && rsr[fr]==0) f0s[fr]=pitchautocor(&data16[offst*fr], wid, settings.minf0, settings.maxf0, rsr[fr]); rsr[fr]=1-rsr[fr]; | |
395 if (f0s[fr]>0) | |
396 { | |
397 double dpind=fps[p]/f0s[fr], pind=floor(dpind+0.5); | |
398 if (fabs(dpind-pind)<0.1) vp_m=vps[p], fr_m=fr, fp_m=fps[p]; | |
399 } | |
400 } | |
401 } | |
402 | |
403 if (fp_m<=0){start=start+dur_fr; if (start>Fr) break; continue;} | |
404 | |
405 f0s[fr_m]=pitchautocor(&data16[offst*fr_m], wid, settings.minf0, settings.maxf0, rsr[fr_m]); rsr[fr_m]=1-rsr[fr_m]; | |
406 | |
407 int _t=fr_m*offst+wid/2; | |
408 double _f=fp_m/wid; | |
409 int hsM, hsFr, frst=start, fren=Fr; | |
410 atom** hsPartials=0; | |
411 | |
412 settings.pin0=floor(fp_m/f0s[fr_m]+0.5); | |
413 int tag=FindNote(_t, _f, hsM, hsFr, hsPartials, frst, fren, wid, offst, Sp, settings); | |
414 | |
415 int fr1=(hsPartials[0][0].t-wid/2)/offst; | |
416 double consi=1; | |
417 for (int hsfr=0; hsfr<hsFr; hsfr++) | |
418 { | |
419 double ene=0; | |
420 for (int m=0; m<hsM; m++) | |
421 { | |
422 double a=(hsPartials[m][hsfr].f>0)?hsPartials[m][hsfr].a:0; | |
423 | |
424 if (hsfr==0) ene+=a*a; | |
425 else | |
426 { | |
427 double b=(hsPartials[m][hsfr-1].f>0)?hsPartials[m][hsfr-1].a:0; | |
428 ene+=(a*a+b*b+a*b)/3; | |
429 } | |
430 | |
431 if (hsfr==hsFr-1) ene+=a*a; | |
432 else | |
433 { | |
434 double b=(hsPartials[m][hsfr+1].f>0)?hsPartials[m][hsfr+1].a:0; | |
435 ene+=(a*a+b*b+a*b)/3; | |
436 } | |
437 } | |
438 framesa[fr1+hsfr]=ene;//*2; | |
439 if (framesa[fr1+hsfr]<frames[fr1+hsfr]) consi*=frames[fr1+hsfr]/framesa[fr1+hsfr]; | |
440 else consi*=framesa[fr1+hsfr]/frames[fr1+hsfr]; | |
441 if (framesa[fr1+hsfr]>frames[fr1+hsfr]) framesa[fr1+hsfr]=0; | |
442 else framesa[fr1+hsfr]=1-framesa[fr1+hsfr]/frames[fr1+hsfr]; | |
443 } | |
444 consi=pow(consi, 1.0/hsFr); | |
445 | |
446 THS* HS=new THS; | |
447 HS->M=hsM; HS->Fr=hsFr; HS->Partials=hsPartials; | |
448 printf("%d (%.2fs), %d (%.2fs), inconsistency %f\n", start, start*offst*1.0/sps, hsFr, hsFr*offst*1.0/sps, consi-1); | |
449 if (consi<1.5) | |
450 { | |
451 HS->WriteToStream(File); | |
452 printf("Write note %d\n", writecount++); | |
453 } | |
454 delete HS; | |
455 | |
456 start=fr1+hsFr; | |
457 | |
458 } | |
459 delete File; delete Sp; | |
460 delete List; | |
461 printf("Completed\n"); getch(); return 0; | |
462 } |