diff toolboxes/MIRtoolbox1.3.2/MIRToolbox/mironsets.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/MIRtoolbox1.3.2/MIRToolbox/mironsets.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,643 @@
+function varargout = mironsets(x,varargin)
+%   o = mironsets(x) shows a temporal curve where peaks relate to the 
+%       position of note onset times, and estimates those note onset 
+%       positions.
+%   Optional arguments:
+%       mironsets(...,f) selects the strategy for the computation of the
+%           onset detection function.
+%           f = 'Envelope': Envelope of the audio signal. (Default choice).
+%           With two methods for envelope extraction:
+%               mironsets(...,'Spectro') (Default):
+%                   mironsets(...,'SpectroFrame',fl,fh) species the frame
+%                       length fl (in s.) and the hop factor fh (as a value
+%                       between 0 and 1)
+%                       Default values: fl = .1 s., fh = .1
+%                    the frequency reassigment method can be specified:
+%                    'Freq' (default), 'Mel', 'Bark' or 'Cents' (cf. mirspectrum).
+%               mironsets(...,'Filter'):
+%                   mironsets(...,'Filterbank',nc) specifies a preliminary
+%                       filterbank decomposition into nc channels. If nc = 0,
+%                       no decomposition is performed.
+%                       Default value: 40.
+%                   mironsets(...,'FilterbankType',ft) specifies the type of
+%                       filterbank (see mirfilterbank).
+%                       Default value: 'Gammatone';
+%                   Options associated to the mirenvelope function can be
+%                       passed here as well (see help mirenvelope):
+%                      'FilterType','Tau','PreDecim'
+%               mironsets(...,'Sum','no') does not sum back the channels at
+%                   the end of the computation. The resulting onset curve
+%                   remains therefore decomposed into several channels.
+%               Options associated to the mirenvelope function can be
+%                   passed here as well (see help mirenvelope):
+%                   'HalfwaveCenter','Diff','HalfwaveDiff','Center',
+%                   'Smooth', 'Sampling','Log','Power','Lambda',
+%                  ,'PostDecim','UpSample'
+%           f = 'SpectralFlux': Spectral flux of the audio signal.
+%               Options associated to the mirflux function can be
+%               passed here as well (see help mirflux):
+%                   'Inc' (toggled on by default here),
+%                   'Halfwave' (toggled on by default here),
+%                   'Complex' (toggled off by default),
+%                   'Median' (toggled on by default here)
+%           f = 'Pitch ':computes a frame-decomposed autocorrelation function ,
+%                of same default characteristics than those returned
+%                by mirpitch, with however a range of frequencies set by 
+%                the following options:
+%                   'Min' (set by default to 30 Hz),
+%                   'Max' (set by default to 1000 Hz),
+%                and subsequently computes the novelty curve of the 
+%                resulting similatrix matrix.
+%               Option associated to the mirnovelty function can be
+%               passed here as well (see help mirnovelty):
+%                   'KernelSize' (set by default to 32 samples)
+%       mironsets(...,'Detect',d) toggles on or off the onset detection, 
+%           which is based on the onset detection function.
+%           (By default toggled on.)
+%           Option associated to the mirpeaks function can be specified as
+%               well:
+%               'Contrast' with default value c = .01
+%               'Threshold' with default value t = 0
+%       mironsets(...,'Attack') (or 'Attacks') detects attack phases.
+%       mironsets(...,'Release') (or 'Releases') detects release phases.
+%           mironsets(...,'Gauss',o) estimate the attack and/or release
+%               points using a gaussian envelope smoothing of order o of the
+%               onset curve.
+%       mironsets(...,'Frame',...) decomposes into frames, with default frame
+%           length 3 seconds and hop factor .1
+%   Preselected onset detection models:
+%       mironsets(...,'Scheirer') corresponds to (Scheirer, 1998):
+%           mironsets(...,'FilterBankType','Scheirer',...
+%                         'FilterType','HalfHann','Sampling',200,...
+%                         'HalfWaveDiff','Sum',0,'Detect',0)
+%       mironsets(...,'Klapuri99') corresponds to most of (Klapuri, 1999).
+        
+%% options related to 'Envelope':
+
+        env.key = 'Envelope';
+        env.type = 'Boolean';
+        env.default = NaN;
+    option.env = env;
+
+        envmethod.key = 'Method'; % optional
+        envmethod.type = 'Boolean';
+    option.envmethod = envmethod;
+    
+        envmeth.type = 'String';
+        envmeth.choice = {'Filter','Spectro'};
+        envmeth.default = 'Spectro';
+    option.envmeth = envmeth;
+ 
+%%      options related to 'Filter':
+
+            filter.key = 'FilterType';
+            filter.type = 'String';
+            filter.choice = {'IIR','HalfHann'};
+            filter.default = 'IIR';
+        option.filter = filter;
+
+            tau.key = 'Tau';
+            tau.type = 'Integer';
+            tau.default = .02;
+        option.tau = tau;
+
+            fb.key = {'Filterbank','NbChannels'};
+            fb.type = 'Integer';
+            fb.default = 40;
+        option.fb = fb;
+
+            filtertype.key = 'FilterbankType';
+            filtertype.type = 'String';
+            %filtertype.choice = {'Gammatone','2Channels','Scheirer','Klapuri'};
+            filtertype.default = 'Gammatone';
+        option.filtertype = filtertype;
+
+            decim.key = {'Decim','PreDecim'};
+            decim.type = 'Integer';
+            decim.default = 0;
+        option.decim = decim;
+    
+%%      options related to 'Spectro':
+
+            band.type = 'String';
+            band.choice = {'Freq','Mel','Bark','Cents'};
+            band.default = 'Freq';
+        option.band = band;
+        
+            specframe.key = 'SpectroFrame';
+            specframe.type = 'Integer';
+            specframe.number = 2;
+            specframe.default = [.1 .1];
+        option.specframe = specframe;
+                    
+        sum.key = 'Sum';
+        sum.type = 'Boolean';
+        sum.default = 1;
+    option.sum = sum;
+
+        chwr.key = 'HalfwaveCenter';
+        chwr.type = 'Boolean';
+        chwr.default = 0;
+        chwr.when = 'After';
+    option.chwr = chwr;
+    
+        mu.key = 'Mu';
+        mu.type = 'Boolean';
+        mu.default = 0;
+        mu.when = 'After';
+    option.mu = mu;
+    
+        oplog.key = 'Log';
+        oplog.type = 'Boolean';
+        oplog.default = 0;
+        oplog.when = 'After';
+    option.log = oplog;
+
+        oppow.key = 'Power';
+        oppow.type = 'Boolean';
+        oppow.default = 0;
+        oppow.when = 'After';
+    option.power = oppow;
+    
+        diffenv.key = 'DiffEnvelope'; % obsolete, replaced by 'Diff'
+        diffenv.type = 'Boolean';
+        diffenv.default = 0;
+    option.diffenv = diffenv;
+
+        diff.key = 'Diff';
+        diff.type = 'Integer';
+        diff.default = 0;
+        diff.keydefault = 1;
+        diff.when = 'After';
+    option.diff = diff;
+    
+        diffhwr.key = 'HalfwaveDiff';
+        diffhwr.type = 'Integer';
+        diffhwr.default = 0;
+        diffhwr.keydefault = 1;
+        diffhwr.when = 'After';
+    option.diffhwr = diffhwr;
+
+        lambda.key = 'Lambda';
+        lambda.type = 'Integer';
+        lambda.default = 1;
+        lambda.when = 'After';
+    option.lambda = lambda;
+
+        c.key = 'Center';
+        c.type = 'Boolean';
+        c.default = 0;
+        c.when = 'After';
+    option.c = c;
+    
+        aver.key = 'Smooth';
+        aver.type = 'Integer';
+        aver.default = 0;
+        aver.keydefault = 30;
+        aver.when = 'After';
+    option.aver = aver;
+    
+        ds.key = {'Down','PostDecim'};
+        ds.type = 'Integer';
+        if isamir(x,'mirenvelope')
+            ds.default = 1;
+        else
+            ds.default = NaN;
+        end
+        ds.when = 'After';
+        ds.chunkcombine = 'During';
+    option.ds = ds;
+
+        sampling.key = 'Sampling';
+        sampling.type = 'Integer';
+        sampling.default = 0;
+        sampling.when = 'After';
+    option.sampling = sampling;
+    
+        up.key = {'UpSample'};
+        up.type = 'Integer';
+        up.default = 0;
+        up.keydefault = 2;
+    option.up = up;
+
+%% options related to 'SpectralFlux'
+        flux.key = 'SpectralFlux';
+        flux.type = 'Boolean';
+        flux.default = 0;
+    option.flux = flux;
+    
+        complex.key = 'Complex';
+        complex.type = 'Boolean';
+        complex.when = 'Both';
+        complex.default = 0;
+    option.complex = complex;
+    
+        inc.key = 'Inc';
+        inc.type = 'Boolean';
+        inc.default = 1;
+    option.inc = inc;
+    
+        median.key = 'Median';
+        median.type = 'Integer';
+        median.number = 2;
+        median.default = [.2 1.3];
+        median.when = 'After';
+    option.median = median;
+
+        hw.key = 'Halfwave';
+        hw.type = 'Boolean';
+        hw.default = 1;
+        hw.when = 'After';
+    option.hw = hw;
+    
+%% options related to 'Pitch':
+        pitch.key = 'Pitch';
+        pitch.type = 'Boolean';
+        pitch.default = 0;
+    option.pitch = pitch;
+
+        min.key = 'Min';
+        min.type = 'Integer';
+        min.default = 30;
+    option.min = min;
+
+        max.key = 'Max';
+        max.type = 'Integer';
+        max.default = 1000;
+    option.max = max;
+
+        kernelsize.key = 'KernelSize';
+        kernelsize.type = 'Integer';
+        kernelsize.default = 32;
+    option.kernelsize = kernelsize;
+    
+%% options related to event detection
+        detect.key = 'Detect';
+        detect.type = 'String';
+        detect.choice = {'Peaks','Valleys',0,'no','off'};
+        detect.default = 'Peaks';
+        detect.keydefault = 'Peaks';
+        detect.when = 'After';
+    option.detect = detect;
+    
+        cthr.key = 'Contrast';
+        cthr.type = 'Integer';
+        cthr.default = NaN;
+        cthr.when = 'After';
+    option.cthr = cthr;
+
+        thr.key = 'Threshold';
+        thr.type = 'Integer';
+        thr.default = 0;
+        thr.when = 'After';
+    option.thr = thr;
+
+        attack.key = {'Attack','Attacks'};
+        attack.type = 'Boolean';
+        attack.default = 0;
+        attack.when = 'After';
+    option.attack = attack;
+        
+        release.key = {'Release','Releases'};
+        release.type = 'String';
+        release.choice = {'Olivier','Valeri',0,'no','off'};
+        release.default = 0;
+        release.keydefault = 'Olivier';
+        release.when = 'After';
+    option.release = release;
+
+        gauss.key = 'Gauss';
+        gauss.type = 'Integer';
+        gauss.default = 0;
+        gauss.when = 'After';
+    option.gauss = gauss;
+    
+%% preselection
+        presel.choice = {'Scheirer','Klapuri99'};
+        presel.type = 'String';
+        presel.default = 0;
+    option.presel = presel;
+
+            
+%% 'Frame' option
+        frame.key = 'Frame';
+        frame.type = 'Integer';
+        frame.when = 'Both';
+        frame.number = 2;
+        frame.default = [0 0];
+        frame.keydefault = [3 .1];
+    option.frame = frame;
+
+specif.option = option;
+
+specif.eachchunk = 'Normal';
+specif.combinechunk = 'Concat';
+specif.extensive = 1;
+
+specif.title = 'Onset curve'; %used for miroptions
+
+varargout = mirfunction(@mironsets,x,varargin,nargout,specif,@init,@main);
+
+
+%% INIT
+
+function [y type] = init(x,option)
+if iscell(x)
+    x = x{1};
+end
+if ischar(option.presel)
+    if strcmpi(option.presel,'Scheirer')
+        option.filtertype = 'Scheirer';
+        option.filter = 'HalfHann';
+        option.envmeth = 'Filter';
+    elseif strcmpi(option.presel,'Klapuri99')
+        option.filtertype = 'Klapuri';
+        option.filter = 'HalfHann';
+        option.envmeth = 'Filter';
+        option.decim = 180;
+    end
+end
+if option.diffenv
+    option.env = 1;
+end
+if isnan(option.env)
+    if option.flux || option.pitch
+        option.env = 0;
+    else
+        option.env = 1;
+    end
+end
+if isamir(x,'miraudio')
+    if option.env
+        if strcmpi(option.envmeth,'Filter') && option.fb>1
+            fb = mirfilterbank(x,option.filtertype,'NbChannels',option.fb);
+        else
+            fb = x;
+        end
+        y = mirenvelope(fb,option.envmeth,option.band,...
+                          'Frame',option.specframe(1),option.specframe(2),...
+                          'FilterType',option.filter,...
+                          'Tau',option.tau,'UpSample',option.up,...
+                          'PreDecim',option.decim,'PostDecim',0);
+        type = 'mirenvelope';
+    elseif option.flux
+        x = mirframenow(x,option);
+        y = mirflux(x,'Inc',option.inc,'Complex',option.complex);
+        type = 'mirscalar';
+    elseif option.pitch
+        [unused ac] = mirpitch(x,'Frame','Min',option.min,'Max',option.max);
+        y = mirnovelty(ac,'KernelSize',option.kernelsize);
+        type = 'mirscalar';
+    end
+elseif (option.pitch && not(isamir(x,'mirscalar'))) ...
+        || isamir(x,'mirsimatrix')
+    y = mirnovelty(x,'KernelSize',option.kernelsize);
+    type = 'mirscalar';
+elseif isamir(x,'mirscalar') || isamir(x,'mirenvelope')
+    y = x; %mirframenow(x,option);
+    type = mirtype(x);
+else
+    x = mirframenow(x,option);
+    y = mirflux(x,'Inc',option.inc,'Complex',option.complex); %Not used...
+    type = 'mirscalar';
+end
+
+
+%% MAIN
+
+function o = main(o,option,postoption)
+if not(isempty(option)) && ischar(option.presel)
+    if strcmpi(option.presel,'Scheirer')
+        postoption.sampling = 200;
+        postoption.diffhwr = 1;
+        option.sum = 0;
+        postoption.detect = 0;
+    elseif strcmpi(option.presel,'Klapuri99')
+        postoption.mu = 1;
+        postoption.diffhwr = 1;
+        option.sum = 0;
+        postoption.ds = 0;
+        o2 = o;
+    end
+end
+if iscell(o)
+    o = o{1};
+end
+if not(isempty(option)) && option.diffenv
+    postoption.diff = 1;
+end
+if isa(o,'mirenvelope')
+    if isfield(postoption,'sampling') && postoption.sampling
+        o = mirenvelope(o,'Sampling',postoption.sampling);
+    elseif isfield(postoption,'ds') 
+        if isnan(postoption.ds)
+            if option.decim || strcmpi(option.envmeth,'Spectro')
+                postoption.ds = 0;
+            else
+                postoption.ds = 16;
+            end
+        end
+        if postoption.ds
+            o = mirenvelope(o,'Down',postoption.ds);
+        end
+    end
+end
+if isfield(postoption,'cthr')
+    if isa(o,'mirenvelope')
+        if postoption.mu
+            o = mirenvelope(o,'Mu');
+        end
+        if postoption.log
+            o = mirenvelope(o,'Log');
+        end
+        if postoption.power
+            o = mirenvelope(o,'Power');
+        end
+        if postoption.diff
+            o = mirenvelope(o,'Diff',postoption.diff,...
+                              'Lambda',postoption.lambda,...
+                              'Complex',postoption.complex);
+        end
+        if postoption.diffhwr
+            o = mirenvelope(o,'HalfwaveDiff',postoption.diffhwr,...
+                              'Lambda',postoption.lambda,...
+                              'Complex',postoption.complex);
+        end
+        if postoption.aver
+            o = mirenvelope(o,'Smooth',postoption.aver);
+        end    
+        if postoption.chwr
+            o = mirenvelope(o,'HalfwaveCenter');
+        end
+        if postoption.c
+            o = mirenvelope(o,'Center');
+        end
+    elseif isa(o,'mirscalar') && strcmp(get(o,'Title'),'Spectral flux')
+        if postoption.median
+            o = mirflux(o,'Median',postoption.median(1),postoption.median(2),...
+                          'Halfwave',postoption.hw);
+        else
+            o = mirflux(o,'Halfwave',postoption.hw);
+        end
+    end
+end
+if isfield(option,'sum') && option.sum
+    o = mirsum(o,'Adjacent',option.sum);
+end
+if isfield(option,'presel') && ...
+        ischar(option.presel) && strcmpi(option.presel,'Klapuri99')
+    % o, already computed, corresponds to mirenvelope(o,'Mu','HalfwaveDiff');
+    % o is the relative distance function W in (Klapuri, 99);
+    o2 = mirenvelope(o2,'HalfwaveDiff');
+    % o2 is the absolute distance function D in (Klapuri, 99);
+    p = mirpeaks(o,'Contrast',.2,'Chrono');
+    p2 = mirpeaks(o2,'ScanForward',p,'Chrono');
+    o = combinepeaks(p,p2,.05);
+    clear o2 p p2
+    filtfreq = 44*[2.^ ([ 0:2, ( 9+(0:17) )/3 ]) ];% Center frequencies of bands
+    o = mirsum(o,'Weights',(filtfreq(1:end-1)+filtfreq(2:end))/2);
+    o = mirenvelope(o,'Smooth',12);
+end
+if not(isa(o,'mirscalar'))
+    o = mirframenow(o,postoption);
+end
+if isfield(postoption,'detect') && ischar(postoption.detect)
+    if isnan(postoption.cthr) || not(postoption.cthr)
+        if ischar(postoption.detect) || postoption.detect
+            postoption.cthr = .01;
+        end
+    elseif postoption.cthr
+        if not(ischar(postoption.detect) || postoption.detect)
+            postoption.detect = 'Peaks';
+        end
+    end
+    if strcmpi(postoption.detect,'Peaks')
+        o = mirpeaks(o,'Total',Inf,'SelectFirst',...
+            'Threshold',postoption.thr,'Contrast',postoption.cthr,...
+            'Order','Abscissa','NoBegin','NoEnd');
+    elseif strcmpi(postoption.detect,'Valleys')
+        o = mirpeaks(o,'Total',Inf,'SelectFirst',...
+            'Threshold',postoption.thr,'Contrast',postoption.cthr,...
+            'Valleys','Order','Abscissa','NoBegin','NoEnd');
+    end
+    nop = cell(size(get(o,'Data')));
+    o = set(o,'AttackPos',nop,'ReleasePos',nop);
+end
+if (isfield(postoption,'attack') && postoption.attack) || ...
+        (isfield(postoption,'release') && postoption.release)
+    p = get(o,'PeakPos');
+    pm = get(o,'PeakMode');
+    d = get(o,'Data');
+    if postoption.attack
+        [st p pm] = mircompute(@startattack,d,p,pm);
+    end
+    if ischar(postoption.release) && ~strcmpi(postoption.release,'No') ...
+                                  && ~strcmpi(postoption.release,'Off')
+        [rl p pm st] = mircompute(@endrelease,d,p,pm,st,postoption.release);
+        o = set(o,'ReleasePos',rl);
+    end
+    o = set(o,'AttackPos',st,'PeakPos',p,'PeakMode',pm);
+end
+title = get(o,'Title');
+if not(length(title)>11 && strcmp(title(1:11),'Onset curve'))
+    o = set(o,'Title',['Onset curve (',title,')']);
+end
+
+
+function st = startattack(d,z,pm)
+z = sort(z{1});
+pm = pm{1};
+st = zeros(size(z));
+i = 1;
+dd = diff(d,1,1);       % d'
+ddd = diff(dd,1,1);     % d''
+dddd = diff(ddd,1,1);   % d'''
+while i<=length(z)
+    % Start attack is identified to previous peak in d''.
+    p = find(dddd((z(i)-1)-1:-1:1)<0,1); % previous decreasing d''
+    if isempty(p)
+        st(i) = 1;
+    else
+        n = find(dddd((z(i)-1)-p-1:-1:1)>0,1); % previous increasing d''
+        if isempty(n)
+            st(i) = 1;
+        else
+            st(i) = ((z(i)-1)-p-(n-1))+1;
+        end
+        if i>1 && st(i-1)==st(i)
+            if d(z(i))>d(z(i-1))
+                del = i-1;
+            else
+                del = i;
+            end
+            st(del) = [];
+            z(del) = [];
+            pm(del) = [];
+            i = i-1;
+        end
+    end
+    i = i+1;
+end
+st = {{st} {z} {pm}};
+
+
+function rt = endrelease(d,z,pm,st,meth)
+z = sort(z{1});
+pm = pm{1};
+if not(isempty(st))
+    st = st{1};
+end
+rt = zeros(size(z));
+i = 1;
+dd = diff(d,1,1);       % d'
+ddd = diff(dd,1,1);     % d''
+dddd = diff(ddd,1,1);   % d'''
+while i<=length(z)
+    if strcmpi(meth,'Olivier')
+        % Release attack is identified to next (sufficiently positive) peak 
+        % in d''.
+        l = find(ddd((z(i)-1):end)<min(ddd)/100,1); 
+            % next d'' sufficiently negative
+        if isempty(l)
+            rt(i) = length(d);
+        else
+            p = find(ddd((z(i)-1)+(l-1)+1:end)>max(ddd)/100,1); % next increasing d''
+            if isempty(p)
+                rt(i) = length(d);
+            else
+                n = find(dddd((z(i)-1)+(l-1)+p+1:end)<0,1); % next decreasing d''
+                if isempty(n)
+                    rt(i) = length(d);
+                else
+                    rt(i) = ((z(i)-1)+(l-1)+p+n)+1;
+                end
+            end
+        end
+    elseif strcmpi(meth,'Valeri')
+        p = find(dd((z(i)-1)+1:end)>min(dd)/100,1); % find point nearest to min(dd)/100 from current peak. 
+        if isempty(p)
+            rt(i) = length(d);
+        elseif p<=3                                 %that means if p is less than 3 points away from the peak then it can not be considered as the end point of release.
+                                                  %Assumption is that the whole DSR(decay sustain release) section can not be shorter than 30 ms (sampling rate is 100 Hz), also, no successive note can be nearer than 30ms.
+            rt(i) = z(i)+3;
+        else
+            rt(i) = (z(i)-1)+(p-1);
+        end
+    end
+    if i>1 && rt(i-1)==rt(i)
+        if d(z(i))>d(z(i-1))
+            del = i-1;
+        else
+            del = i;
+        end
+        rt(del) = [];
+        z(del) = [];
+        pm(del) = [];
+        if not(isempty(st))
+            st(del) = [];
+        end
+        i = i-1;
+    end
+    i = i+1;
+end
+rt = {{rt} {z} {pm} {st}};
\ No newline at end of file