comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/mironsets.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function varargout = mironsets(x,varargin)
2 % o = mironsets(x) shows a temporal curve where peaks relate to the
3 % position of note onset times, and estimates those note onset
4 % positions.
5 % Optional arguments:
6 % mironsets(...,f) selects the strategy for the computation of the
7 % onset detection function.
8 % f = 'Envelope': Envelope of the audio signal. (Default choice).
9 % With two methods for envelope extraction:
10 % mironsets(...,'Spectro') (Default):
11 % mironsets(...,'SpectroFrame',fl,fh) species the frame
12 % length fl (in s.) and the hop factor fh (as a value
13 % between 0 and 1)
14 % Default values: fl = .1 s., fh = .1
15 % the frequency reassigment method can be specified:
16 % 'Freq' (default), 'Mel', 'Bark' or 'Cents' (cf. mirspectrum).
17 % mironsets(...,'Filter'):
18 % mironsets(...,'Filterbank',nc) specifies a preliminary
19 % filterbank decomposition into nc channels. If nc = 0,
20 % no decomposition is performed.
21 % Default value: 40.
22 % mironsets(...,'FilterbankType',ft) specifies the type of
23 % filterbank (see mirfilterbank).
24 % Default value: 'Gammatone';
25 % Options associated to the mirenvelope function can be
26 % passed here as well (see help mirenvelope):
27 % 'FilterType','Tau','PreDecim'
28 % mironsets(...,'Sum','no') does not sum back the channels at
29 % the end of the computation. The resulting onset curve
30 % remains therefore decomposed into several channels.
31 % Options associated to the mirenvelope function can be
32 % passed here as well (see help mirenvelope):
33 % 'HalfwaveCenter','Diff','HalfwaveDiff','Center',
34 % 'Smooth', 'Sampling','Log','Power','Lambda',
35 % ,'PostDecim','UpSample'
36 % f = 'SpectralFlux': Spectral flux of the audio signal.
37 % Options associated to the mirflux function can be
38 % passed here as well (see help mirflux):
39 % 'Inc' (toggled on by default here),
40 % 'Halfwave' (toggled on by default here),
41 % 'Complex' (toggled off by default),
42 % 'Median' (toggled on by default here)
43 % f = 'Pitch ':computes a frame-decomposed autocorrelation function ,
44 % of same default characteristics than those returned
45 % by mirpitch, with however a range of frequencies set by
46 % the following options:
47 % 'Min' (set by default to 30 Hz),
48 % 'Max' (set by default to 1000 Hz),
49 % and subsequently computes the novelty curve of the
50 % resulting similatrix matrix.
51 % Option associated to the mirnovelty function can be
52 % passed here as well (see help mirnovelty):
53 % 'KernelSize' (set by default to 32 samples)
54 % mironsets(...,'Detect',d) toggles on or off the onset detection,
55 % which is based on the onset detection function.
56 % (By default toggled on.)
57 % Option associated to the mirpeaks function can be specified as
58 % well:
59 % 'Contrast' with default value c = .01
60 % 'Threshold' with default value t = 0
61 % mironsets(...,'Attack') (or 'Attacks') detects attack phases.
62 % mironsets(...,'Release') (or 'Releases') detects release phases.
63 % mironsets(...,'Gauss',o) estimate the attack and/or release
64 % points using a gaussian envelope smoothing of order o of the
65 % onset curve.
66 % mironsets(...,'Frame',...) decomposes into frames, with default frame
67 % length 3 seconds and hop factor .1
68 % Preselected onset detection models:
69 % mironsets(...,'Scheirer') corresponds to (Scheirer, 1998):
70 % mironsets(...,'FilterBankType','Scheirer',...
71 % 'FilterType','HalfHann','Sampling',200,...
72 % 'HalfWaveDiff','Sum',0,'Detect',0)
73 % mironsets(...,'Klapuri99') corresponds to most of (Klapuri, 1999).
74
75 %% options related to 'Envelope':
76
77 env.key = 'Envelope';
78 env.type = 'Boolean';
79 env.default = NaN;
80 option.env = env;
81
82 envmethod.key = 'Method'; % optional
83 envmethod.type = 'Boolean';
84 option.envmethod = envmethod;
85
86 envmeth.type = 'String';
87 envmeth.choice = {'Filter','Spectro'};
88 envmeth.default = 'Spectro';
89 option.envmeth = envmeth;
90
91 %% options related to 'Filter':
92
93 filter.key = 'FilterType';
94 filter.type = 'String';
95 filter.choice = {'IIR','HalfHann'};
96 filter.default = 'IIR';
97 option.filter = filter;
98
99 tau.key = 'Tau';
100 tau.type = 'Integer';
101 tau.default = .02;
102 option.tau = tau;
103
104 fb.key = {'Filterbank','NbChannels'};
105 fb.type = 'Integer';
106 fb.default = 40;
107 option.fb = fb;
108
109 filtertype.key = 'FilterbankType';
110 filtertype.type = 'String';
111 %filtertype.choice = {'Gammatone','2Channels','Scheirer','Klapuri'};
112 filtertype.default = 'Gammatone';
113 option.filtertype = filtertype;
114
115 decim.key = {'Decim','PreDecim'};
116 decim.type = 'Integer';
117 decim.default = 0;
118 option.decim = decim;
119
120 %% options related to 'Spectro':
121
122 band.type = 'String';
123 band.choice = {'Freq','Mel','Bark','Cents'};
124 band.default = 'Freq';
125 option.band = band;
126
127 specframe.key = 'SpectroFrame';
128 specframe.type = 'Integer';
129 specframe.number = 2;
130 specframe.default = [.1 .1];
131 option.specframe = specframe;
132
133 sum.key = 'Sum';
134 sum.type = 'Boolean';
135 sum.default = 1;
136 option.sum = sum;
137
138 chwr.key = 'HalfwaveCenter';
139 chwr.type = 'Boolean';
140 chwr.default = 0;
141 chwr.when = 'After';
142 option.chwr = chwr;
143
144 mu.key = 'Mu';
145 mu.type = 'Boolean';
146 mu.default = 0;
147 mu.when = 'After';
148 option.mu = mu;
149
150 oplog.key = 'Log';
151 oplog.type = 'Boolean';
152 oplog.default = 0;
153 oplog.when = 'After';
154 option.log = oplog;
155
156 oppow.key = 'Power';
157 oppow.type = 'Boolean';
158 oppow.default = 0;
159 oppow.when = 'After';
160 option.power = oppow;
161
162 diffenv.key = 'DiffEnvelope'; % obsolete, replaced by 'Diff'
163 diffenv.type = 'Boolean';
164 diffenv.default = 0;
165 option.diffenv = diffenv;
166
167 diff.key = 'Diff';
168 diff.type = 'Integer';
169 diff.default = 0;
170 diff.keydefault = 1;
171 diff.when = 'After';
172 option.diff = diff;
173
174 diffhwr.key = 'HalfwaveDiff';
175 diffhwr.type = 'Integer';
176 diffhwr.default = 0;
177 diffhwr.keydefault = 1;
178 diffhwr.when = 'After';
179 option.diffhwr = diffhwr;
180
181 lambda.key = 'Lambda';
182 lambda.type = 'Integer';
183 lambda.default = 1;
184 lambda.when = 'After';
185 option.lambda = lambda;
186
187 c.key = 'Center';
188 c.type = 'Boolean';
189 c.default = 0;
190 c.when = 'After';
191 option.c = c;
192
193 aver.key = 'Smooth';
194 aver.type = 'Integer';
195 aver.default = 0;
196 aver.keydefault = 30;
197 aver.when = 'After';
198 option.aver = aver;
199
200 ds.key = {'Down','PostDecim'};
201 ds.type = 'Integer';
202 if isamir(x,'mirenvelope')
203 ds.default = 1;
204 else
205 ds.default = NaN;
206 end
207 ds.when = 'After';
208 ds.chunkcombine = 'During';
209 option.ds = ds;
210
211 sampling.key = 'Sampling';
212 sampling.type = 'Integer';
213 sampling.default = 0;
214 sampling.when = 'After';
215 option.sampling = sampling;
216
217 up.key = {'UpSample'};
218 up.type = 'Integer';
219 up.default = 0;
220 up.keydefault = 2;
221 option.up = up;
222
223 %% options related to 'SpectralFlux'
224 flux.key = 'SpectralFlux';
225 flux.type = 'Boolean';
226 flux.default = 0;
227 option.flux = flux;
228
229 complex.key = 'Complex';
230 complex.type = 'Boolean';
231 complex.when = 'Both';
232 complex.default = 0;
233 option.complex = complex;
234
235 inc.key = 'Inc';
236 inc.type = 'Boolean';
237 inc.default = 1;
238 option.inc = inc;
239
240 median.key = 'Median';
241 median.type = 'Integer';
242 median.number = 2;
243 median.default = [.2 1.3];
244 median.when = 'After';
245 option.median = median;
246
247 hw.key = 'Halfwave';
248 hw.type = 'Boolean';
249 hw.default = 1;
250 hw.when = 'After';
251 option.hw = hw;
252
253 %% options related to 'Pitch':
254 pitch.key = 'Pitch';
255 pitch.type = 'Boolean';
256 pitch.default = 0;
257 option.pitch = pitch;
258
259 min.key = 'Min';
260 min.type = 'Integer';
261 min.default = 30;
262 option.min = min;
263
264 max.key = 'Max';
265 max.type = 'Integer';
266 max.default = 1000;
267 option.max = max;
268
269 kernelsize.key = 'KernelSize';
270 kernelsize.type = 'Integer';
271 kernelsize.default = 32;
272 option.kernelsize = kernelsize;
273
274 %% options related to event detection
275 detect.key = 'Detect';
276 detect.type = 'String';
277 detect.choice = {'Peaks','Valleys',0,'no','off'};
278 detect.default = 'Peaks';
279 detect.keydefault = 'Peaks';
280 detect.when = 'After';
281 option.detect = detect;
282
283 cthr.key = 'Contrast';
284 cthr.type = 'Integer';
285 cthr.default = NaN;
286 cthr.when = 'After';
287 option.cthr = cthr;
288
289 thr.key = 'Threshold';
290 thr.type = 'Integer';
291 thr.default = 0;
292 thr.when = 'After';
293 option.thr = thr;
294
295 attack.key = {'Attack','Attacks'};
296 attack.type = 'Boolean';
297 attack.default = 0;
298 attack.when = 'After';
299 option.attack = attack;
300
301 release.key = {'Release','Releases'};
302 release.type = 'String';
303 release.choice = {'Olivier','Valeri',0,'no','off'};
304 release.default = 0;
305 release.keydefault = 'Olivier';
306 release.when = 'After';
307 option.release = release;
308
309 gauss.key = 'Gauss';
310 gauss.type = 'Integer';
311 gauss.default = 0;
312 gauss.when = 'After';
313 option.gauss = gauss;
314
315 %% preselection
316 presel.choice = {'Scheirer','Klapuri99'};
317 presel.type = 'String';
318 presel.default = 0;
319 option.presel = presel;
320
321
322 %% 'Frame' option
323 frame.key = 'Frame';
324 frame.type = 'Integer';
325 frame.when = 'Both';
326 frame.number = 2;
327 frame.default = [0 0];
328 frame.keydefault = [3 .1];
329 option.frame = frame;
330
331 specif.option = option;
332
333 specif.eachchunk = 'Normal';
334 specif.combinechunk = 'Concat';
335 specif.extensive = 1;
336
337 specif.title = 'Onset curve'; %used for miroptions
338
339 varargout = mirfunction(@mironsets,x,varargin,nargout,specif,@init,@main);
340
341
342 %% INIT
343
344 function [y type] = init(x,option)
345 if iscell(x)
346 x = x{1};
347 end
348 if ischar(option.presel)
349 if strcmpi(option.presel,'Scheirer')
350 option.filtertype = 'Scheirer';
351 option.filter = 'HalfHann';
352 option.envmeth = 'Filter';
353 elseif strcmpi(option.presel,'Klapuri99')
354 option.filtertype = 'Klapuri';
355 option.filter = 'HalfHann';
356 option.envmeth = 'Filter';
357 option.decim = 180;
358 end
359 end
360 if option.diffenv
361 option.env = 1;
362 end
363 if isnan(option.env)
364 if option.flux || option.pitch
365 option.env = 0;
366 else
367 option.env = 1;
368 end
369 end
370 if isamir(x,'miraudio')
371 if option.env
372 if strcmpi(option.envmeth,'Filter') && option.fb>1
373 fb = mirfilterbank(x,option.filtertype,'NbChannels',option.fb);
374 else
375 fb = x;
376 end
377 y = mirenvelope(fb,option.envmeth,option.band,...
378 'Frame',option.specframe(1),option.specframe(2),...
379 'FilterType',option.filter,...
380 'Tau',option.tau,'UpSample',option.up,...
381 'PreDecim',option.decim,'PostDecim',0);
382 type = 'mirenvelope';
383 elseif option.flux
384 x = mirframenow(x,option);
385 y = mirflux(x,'Inc',option.inc,'Complex',option.complex);
386 type = 'mirscalar';
387 elseif option.pitch
388 [unused ac] = mirpitch(x,'Frame','Min',option.min,'Max',option.max);
389 y = mirnovelty(ac,'KernelSize',option.kernelsize);
390 type = 'mirscalar';
391 end
392 elseif (option.pitch && not(isamir(x,'mirscalar'))) ...
393 || isamir(x,'mirsimatrix')
394 y = mirnovelty(x,'KernelSize',option.kernelsize);
395 type = 'mirscalar';
396 elseif isamir(x,'mirscalar') || isamir(x,'mirenvelope')
397 y = x; %mirframenow(x,option);
398 type = mirtype(x);
399 else
400 x = mirframenow(x,option);
401 y = mirflux(x,'Inc',option.inc,'Complex',option.complex); %Not used...
402 type = 'mirscalar';
403 end
404
405
406 %% MAIN
407
408 function o = main(o,option,postoption)
409 if not(isempty(option)) && ischar(option.presel)
410 if strcmpi(option.presel,'Scheirer')
411 postoption.sampling = 200;
412 postoption.diffhwr = 1;
413 option.sum = 0;
414 postoption.detect = 0;
415 elseif strcmpi(option.presel,'Klapuri99')
416 postoption.mu = 1;
417 postoption.diffhwr = 1;
418 option.sum = 0;
419 postoption.ds = 0;
420 o2 = o;
421 end
422 end
423 if iscell(o)
424 o = o{1};
425 end
426 if not(isempty(option)) && option.diffenv
427 postoption.diff = 1;
428 end
429 if isa(o,'mirenvelope')
430 if isfield(postoption,'sampling') && postoption.sampling
431 o = mirenvelope(o,'Sampling',postoption.sampling);
432 elseif isfield(postoption,'ds')
433 if isnan(postoption.ds)
434 if option.decim || strcmpi(option.envmeth,'Spectro')
435 postoption.ds = 0;
436 else
437 postoption.ds = 16;
438 end
439 end
440 if postoption.ds
441 o = mirenvelope(o,'Down',postoption.ds);
442 end
443 end
444 end
445 if isfield(postoption,'cthr')
446 if isa(o,'mirenvelope')
447 if postoption.mu
448 o = mirenvelope(o,'Mu');
449 end
450 if postoption.log
451 o = mirenvelope(o,'Log');
452 end
453 if postoption.power
454 o = mirenvelope(o,'Power');
455 end
456 if postoption.diff
457 o = mirenvelope(o,'Diff',postoption.diff,...
458 'Lambda',postoption.lambda,...
459 'Complex',postoption.complex);
460 end
461 if postoption.diffhwr
462 o = mirenvelope(o,'HalfwaveDiff',postoption.diffhwr,...
463 'Lambda',postoption.lambda,...
464 'Complex',postoption.complex);
465 end
466 if postoption.aver
467 o = mirenvelope(o,'Smooth',postoption.aver);
468 end
469 if postoption.chwr
470 o = mirenvelope(o,'HalfwaveCenter');
471 end
472 if postoption.c
473 o = mirenvelope(o,'Center');
474 end
475 elseif isa(o,'mirscalar') && strcmp(get(o,'Title'),'Spectral flux')
476 if postoption.median
477 o = mirflux(o,'Median',postoption.median(1),postoption.median(2),...
478 'Halfwave',postoption.hw);
479 else
480 o = mirflux(o,'Halfwave',postoption.hw);
481 end
482 end
483 end
484 if isfield(option,'sum') && option.sum
485 o = mirsum(o,'Adjacent',option.sum);
486 end
487 if isfield(option,'presel') && ...
488 ischar(option.presel) && strcmpi(option.presel,'Klapuri99')
489 % o, already computed, corresponds to mirenvelope(o,'Mu','HalfwaveDiff');
490 % o is the relative distance function W in (Klapuri, 99);
491 o2 = mirenvelope(o2,'HalfwaveDiff');
492 % o2 is the absolute distance function D in (Klapuri, 99);
493 p = mirpeaks(o,'Contrast',.2,'Chrono');
494 p2 = mirpeaks(o2,'ScanForward',p,'Chrono');
495 o = combinepeaks(p,p2,.05);
496 clear o2 p p2
497 filtfreq = 44*[2.^ ([ 0:2, ( 9+(0:17) )/3 ]) ];% Center frequencies of bands
498 o = mirsum(o,'Weights',(filtfreq(1:end-1)+filtfreq(2:end))/2);
499 o = mirenvelope(o,'Smooth',12);
500 end
501 if not(isa(o,'mirscalar'))
502 o = mirframenow(o,postoption);
503 end
504 if isfield(postoption,'detect') && ischar(postoption.detect)
505 if isnan(postoption.cthr) || not(postoption.cthr)
506 if ischar(postoption.detect) || postoption.detect
507 postoption.cthr = .01;
508 end
509 elseif postoption.cthr
510 if not(ischar(postoption.detect) || postoption.detect)
511 postoption.detect = 'Peaks';
512 end
513 end
514 if strcmpi(postoption.detect,'Peaks')
515 o = mirpeaks(o,'Total',Inf,'SelectFirst',...
516 'Threshold',postoption.thr,'Contrast',postoption.cthr,...
517 'Order','Abscissa','NoBegin','NoEnd');
518 elseif strcmpi(postoption.detect,'Valleys')
519 o = mirpeaks(o,'Total',Inf,'SelectFirst',...
520 'Threshold',postoption.thr,'Contrast',postoption.cthr,...
521 'Valleys','Order','Abscissa','NoBegin','NoEnd');
522 end
523 nop = cell(size(get(o,'Data')));
524 o = set(o,'AttackPos',nop,'ReleasePos',nop);
525 end
526 if (isfield(postoption,'attack') && postoption.attack) || ...
527 (isfield(postoption,'release') && postoption.release)
528 p = get(o,'PeakPos');
529 pm = get(o,'PeakMode');
530 d = get(o,'Data');
531 if postoption.attack
532 [st p pm] = mircompute(@startattack,d,p,pm);
533 end
534 if ischar(postoption.release) && ~strcmpi(postoption.release,'No') ...
535 && ~strcmpi(postoption.release,'Off')
536 [rl p pm st] = mircompute(@endrelease,d,p,pm,st,postoption.release);
537 o = set(o,'ReleasePos',rl);
538 end
539 o = set(o,'AttackPos',st,'PeakPos',p,'PeakMode',pm);
540 end
541 title = get(o,'Title');
542 if not(length(title)>11 && strcmp(title(1:11),'Onset curve'))
543 o = set(o,'Title',['Onset curve (',title,')']);
544 end
545
546
547 function st = startattack(d,z,pm)
548 z = sort(z{1});
549 pm = pm{1};
550 st = zeros(size(z));
551 i = 1;
552 dd = diff(d,1,1); % d'
553 ddd = diff(dd,1,1); % d''
554 dddd = diff(ddd,1,1); % d'''
555 while i<=length(z)
556 % Start attack is identified to previous peak in d''.
557 p = find(dddd((z(i)-1)-1:-1:1)<0,1); % previous decreasing d''
558 if isempty(p)
559 st(i) = 1;
560 else
561 n = find(dddd((z(i)-1)-p-1:-1:1)>0,1); % previous increasing d''
562 if isempty(n)
563 st(i) = 1;
564 else
565 st(i) = ((z(i)-1)-p-(n-1))+1;
566 end
567 if i>1 && st(i-1)==st(i)
568 if d(z(i))>d(z(i-1))
569 del = i-1;
570 else
571 del = i;
572 end
573 st(del) = [];
574 z(del) = [];
575 pm(del) = [];
576 i = i-1;
577 end
578 end
579 i = i+1;
580 end
581 st = {{st} {z} {pm}};
582
583
584 function rt = endrelease(d,z,pm,st,meth)
585 z = sort(z{1});
586 pm = pm{1};
587 if not(isempty(st))
588 st = st{1};
589 end
590 rt = zeros(size(z));
591 i = 1;
592 dd = diff(d,1,1); % d'
593 ddd = diff(dd,1,1); % d''
594 dddd = diff(ddd,1,1); % d'''
595 while i<=length(z)
596 if strcmpi(meth,'Olivier')
597 % Release attack is identified to next (sufficiently positive) peak
598 % in d''.
599 l = find(ddd((z(i)-1):end)<min(ddd)/100,1);
600 % next d'' sufficiently negative
601 if isempty(l)
602 rt(i) = length(d);
603 else
604 p = find(ddd((z(i)-1)+(l-1)+1:end)>max(ddd)/100,1); % next increasing d''
605 if isempty(p)
606 rt(i) = length(d);
607 else
608 n = find(dddd((z(i)-1)+(l-1)+p+1:end)<0,1); % next decreasing d''
609 if isempty(n)
610 rt(i) = length(d);
611 else
612 rt(i) = ((z(i)-1)+(l-1)+p+n)+1;
613 end
614 end
615 end
616 elseif strcmpi(meth,'Valeri')
617 p = find(dd((z(i)-1)+1:end)>min(dd)/100,1); % find point nearest to min(dd)/100 from current peak.
618 if isempty(p)
619 rt(i) = length(d);
620 elseif p<=3 %that means if p is less than 3 points away from the peak then it can not be considered as the end point of release.
621 %Assumption is that the whole DSR(decay sustain release) section can not be shorter than 30 ms (sampling rate is 100 Hz), also, no successive note can be nearer than 30ms.
622 rt(i) = z(i)+3;
623 else
624 rt(i) = (z(i)-1)+(p-1);
625 end
626 end
627 if i>1 && rt(i-1)==rt(i)
628 if d(z(i))>d(z(i-1))
629 del = i-1;
630 else
631 del = i;
632 end
633 rt(del) = [];
634 z(del) = [];
635 pm(del) = [];
636 if not(isempty(st))
637 st(del) = [];
638 end
639 i = i-1;
640 end
641 i = i+1;
642 end
643 rt = {{rt} {z} {pm} {st}};