diff toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirpitch/mirpitch.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirpitch/mirpitch.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,378 @@
+function varargout = mirpitch(orig,varargin)
+%   p = mirpitch(x) evaluates the pitch frequencies (in Hz).
+%   Specification of the method(s) for pitch estimation (these methods can
+%       be combined):
+%       mirpitch(...,'Autocor') computes an autocorrelation function
+%           (Default method)
+%           mirpitch(...'Enhanced',a) computes enhanced autocorrelation
+%               (see help mirautocor)
+%              toggled on by default
+%           mirpitch(...,'Compress',k) performs magnitude compression
+%               (see help mirautocor)
+%           mirpitch(...,fb) specifies a type of filterbank.
+%               Possible values:
+%                   fb = 'NoFilterBank': no filterbank decomposition
+%                   fb = '2Channels' (default value)
+%                   fb = 'Gammatone' 
+%       mirpitch(...,'AutocorSpectrum') computes the autocorrelation of
+%           the FFT spectrum
+%       mirpitch(...,'Cepstrum') computes the cepstrum
+%       Alternatively, an autocorrelation or a cepstrum can be directly
+%           given as first argument of the mirpitch function.
+%   Peak picking options:
+%       mirpitch(...,'Total',m) selects the m best pitches.
+%           Default value: m = Inf, no limit is set concerning the number
+%           of pitches to be detected.
+%       mirpitch(...,'Mono') corresponds to morpitch(...,'Total',1)
+%       mirpitch(...,'Min',mi) indicates the lowest frequency taken into
+%           consideration.
+%           Default value: 75 Hz. (Praat)
+%       mirpitch(...,'Max',ma) indicates the highest frequency taken into
+%           consideration. 
+%           Default value: 2400 Hz. Because there seems to be some problems
+%           with higher frequency, due probably to the absence of 
+%           pre-whitening in our implementation of Tolonen and Karjalainen
+%           approach (used by default, cf. below).
+%       mirpitch(...,'Contrast',thr) specifies a threshold value.
+%           (see help peaks)
+%           Default value: thr = .1
+%       mirpitch(...,'Order',o) specifies the ordering for the peak picking.
+%           Default value: o = 'Amplitude'.
+%       Alternatively, the result of a mirpeaks computation can be directly
+%           given as first argument of the mirpitch function.
+%   Post-processing options:
+%       mirpitch(...,'Sum','no') does not sum back the channels at the end 
+%           of the computation. The resulting pitch information remains
+%           therefore decomposed into several channels.
+%       mirpitch(...,'Median') performs a median filtering of the pitch
+%           curve. When several pitches are extracted in each frame, the
+%           pitch curve contains the best peak of each successive frame.
+%       mirpitch(...,'Stable',th,n) remove pitch values when the difference 
+%           (or more precisely absolute logarithmic quotient) with the
+%           n precedent frames exceeds the threshold th. 
+%           if th is not specified, the default value .1 is used
+%           if n is not specified, the default value 3 is used
+%       mirpitch(...'Reso',r) removes peaks whose distance to one or
+%           several higher peaks is lower than a given threshold.
+%           Possible value for the threshold r:
+%               'SemiTone': ratio between the two peak positions equal to
+%                   2^(1/12)
+%       mirpitch(...,'Frame',l,h) orders a frame decomposition of window
+%           length l (in seconds) and hop factor h, expressed relatively to
+%           the window length. For instance h = 1 indicates no overlap.
+%           Default values: l = 46.4 ms and h = 10 ms (Tolonen and
+%           Karjalainen, 2000)
+%   Preset model:
+%       mirpitch(...,'Tolonen') implements (part of) the model proposed in
+%           (Tolonen & Karjalainen, 2000). It is equivalent to
+%           mirpitch(...,'Enhanced',2:10,'Generalized',.67,'2Channels')
+%   [p,a] = mirpitch(...) also displays the result of the method chosen for
+%       pitch estimation, and shows in particular the peaks corresponding
+%       to the pitch values.
+%   p = mirpitch(f,a,<r>) creates a mirpitch object based on the frequencies
+%       specified in f and the related amplitudes specified in a, using a
+%       frame sampling rate of r Hz (set by default to 100 Hz).
+%
+%   T. Tolonen, M. Karjalainen, "A Computationally Efficient Multipitch 
+%       Analysis Model", IEEE TRANSACTIONS ON SPEECH AND AUDIO PROCESSING,
+%       VOL. 8, NO. 6, NOVEMBER 2000
+
+        ac.key = 'Autocor';
+        ac.type = 'Boolean';
+        ac.default = 0;
+    option.ac = ac;
+    
+            enh.key = 'Enhanced';
+            enh.type = 'Integer';
+            enh.default = 2:10;
+        option.enh = enh;
+
+            filtertype.type = 'String';
+            filtertype.choice = {'NoFilterBank','2Channels','Gammatone'};
+            filtertype.default = '2Channels';
+        option.filtertype = filtertype;
+
+            gener.key = {'Generalized','Compress'};
+            gener.type = 'Integer';
+            gener.default = .5;
+        option.gener = gener;
+
+        as.key = 'AutocorSpectrum';
+        as.type = 'Boolean';
+        as.default = 0;
+    option.as = as;
+    
+        s.key = 'Spectrum';
+        s.type = 'Boolean';
+        s.default = 0;
+    option.s = s;
+        
+        ce.key = 'Cepstrum';
+        ce.type = 'Boolean';
+        ce.default = 0;
+    option.ce = ce;
+        
+%% peak picking options
+
+        m.key = 'Total';
+        m.type = 'Integer';
+        m.default = Inf;
+    option.m = m;
+    
+        multi.key = 'Multi';
+        multi.type = 'Boolean';
+        multi.default = 0;
+    option.multi = multi;
+
+        mono.key = 'Mono';
+        mono.type = 'Boolean';
+        mono.default = 0;
+    option.mono = mono;
+
+        mi.key = 'Min';
+        mi.type = 'Integer';
+        mi.default = 75;
+    option.mi = mi;
+        
+        ma.key = 'Max';
+        ma.type = 'Integer';
+        ma.default = 2400;
+    option.ma = ma;
+        
+        thr.key = 'Contrast';
+        thr.type = 'Integer';
+        thr.default = .1;
+    option.thr = thr;
+    
+        order.key = 'Order';
+        order.type = 'String';
+        order.choice = {'Amplitude','Abscissa'};
+        order.default = 'Amplitude';
+    option.order = order;    
+
+        reso.key = 'Reso';
+        reso.type = 'String';
+        reso.choice = {0,'SemiTone'};
+        reso.default = 0;
+    option.reso = reso;
+        
+        track.key = 'Track';        % Not used yet
+        track.type = 'Boolean';
+        track.default = 0;
+    option.track = track;
+
+%% post-processing options
+        
+        stable.key = 'Stable';
+        stable.type = 'Integer';
+        stable.number = 2;
+        stable.default = [Inf 0];
+        stable.keydefault = [.1 3];
+    option.stable = stable;
+    
+        median.key = 'Median';
+        median.type = 'Integer';
+        median.default = 0;
+        median.keydefault = .1;
+    option.median = median;
+    
+        frame.key = 'Frame';
+        frame.type = 'Integer';
+        frame.number = 2;
+        frame.default = [0 0];
+        frame.keydefault = [NaN NaN];
+    option.frame = frame;
+
+        sum.key = 'Sum';
+        sum.type = 'Boolean';
+        sum.default = 1;
+    option.sum = sum;
+    
+%% preset model
+
+        tolo.key = 'Tolonen';
+        tolo.type = 'Boolean';
+        tolo.default = 0;
+    option.tolo = tolo;
+    
+specif.option = option;
+specif.chunkframebefore = 1;
+
+if isnumeric(orig)
+    if nargin<3
+        f = 100;
+    else
+        f = varargin{2};
+    end
+    fp = (0:size(orig,1)-1)/f;
+    fp = [fp;fp+1/f];
+    p.amplitude = {{varargin{1}'}};
+    s = mirscalar([],'Data',{{orig'}},'Title','Pitch','Unit','Hz',...
+                     'FramePos',{{fp}},'Sampling',f,'Name',{inputname(1)});
+    p = class(p,'mirpitch',s);
+    varargout = {p};
+else
+    varargout = mirfunction(@mirpitch,orig,varargin,nargout,specif,@init,@main);
+end
+
+
+
+function [y type] = init(orig,option)
+if option.tolo
+    option.enh = 2:10;
+    option.gener = .67;
+    option.filtertype = '2Channels';
+end
+if not(option.ac) && not(option.as) && not(option.ce) && not(option.s)
+    option.ac = 1;
+end
+if isnan(option.frame.length.val)
+    option.frame.length.val = .0464;
+end
+if isnan(option.frame.hop.val)
+    option.frame.hop.val = .01;
+    option.frame.hop.unit = 's';
+end
+if isamir(orig,'mirscalar') || haspeaks(orig)
+    y = orig;
+else
+    if isamir(orig,'mirautocor')
+        y = mirautocor(orig,'Min',option.mi,'Hz','Max',option.ma,'Hz','Freq');
+    elseif isamir(orig,'mircepstrum')
+        y = orig;
+    elseif isamir(orig,'mirspectrum')
+        if not(option.as) && not(option.ce) && not(option.s)
+            option.ce = 1;
+        end
+        if option.as
+            y = mirautocor(orig,...
+                            'Min',option.mi,'Hz','Max',option.ma,'Hz');
+        end
+        if option.ce
+            ce = mircepstrum(orig,'freq',...
+                            'Min',option.mi,'Hz','Max',option.ma,'Hz');
+            if option.as
+                y = y*ce;
+            else
+                y = ce;
+            end
+        end
+    else
+        if option.ac
+            x = orig;
+            if not(strcmpi(option.filtertype,'NoFilterBank'))
+                x = mirfilterbank(x,option.filtertype);
+            end
+            x = mirframenow(x,option);
+            y = mirautocor(x,'Generalized',option.gener,...
+                                'Min',option.mi,'Hz','Max',option.ma,'Hz');
+            if option.sum
+                y = mirsummary(y);
+            end
+            y = mirautocor(y,'Enhanced',option.enh,'Freq');
+        end
+        if option.as || option.ce || option.s
+            x = mirframenow(orig,option);
+            y = mirspectrum(x);
+            if option.as
+                as = mirautocor(y,...
+                                'Min',option.mi,'Hz','Max',option.ma,'Hz');
+                if option.ac
+                    y = y*as;
+                else
+                    y = as;
+                end
+            end
+            if option.ce
+                ce = mircepstrum(y,'freq',...
+                                'Min',option.mi,'Hz','Max',option.ma,'Hz');
+                if option.ac || option.as
+                    y = y*ce;
+                else
+                    y = ce;
+                end
+            end
+        end
+    end
+end
+type = {'mirpitch',mirtype(y)};
+    
+
+function o = main(x,option,postoption)
+if option.multi && option.m == 1
+    option.m = Inf;
+end
+if option.mono && option.m == Inf
+    option.m = 1;
+end
+if iscell(x)
+    x = x{1};
+end
+if not(isa(x,'mirpitch'))
+    x = mirpeaks(x,'Total',option.m,'Track',option.track,...
+                   'Contrast',option.thr,'Threshold',.4,...
+                   'Reso',option.reso,'NoBegin','NoEnd',...
+                   'Order',option.order);
+end
+if isa(x,'mirscalar')
+    pf = get(x,'Data');
+else
+    pf = get(x,'PeakPrecisePos');
+    pa = get(x,'PeakPreciseVal');
+end
+fp = get(x,'FramePos');
+if option.stable(1) < Inf
+    for i = 1:length(pf)
+        for j = 1:length(pf{i})
+            for k = 1:size(pf{i}{j},3)
+                for l = size(pf{i}{j},2):-1:option.stable(2)+1
+                    for m = length(pf{i}{j}{1,l,k}):-1:1
+                        found = 0;
+                        for h = 1:option.stable(2)
+                            for n = 1:length(pf{i}{j}{1,l-h,k})
+                                if abs(log10(pf{i}{j}{1,l,k}(m) ...
+                                            /pf{i}{j}{1,l-h,k}(n))) ...
+                                       < option.stable(1)
+                                    found = 1;
+                                end
+                            end
+                        end
+                        if not(found)
+                            pf{i}{j}{1,l,k}(m) = [];
+                        end
+                    end
+                    pf{i}{j}{1,1,k} = zeros(1,0);
+                end
+            end
+        end
+    end
+end
+if option.median
+    sr = get(x,'Sampling');
+    for i = 1:length(pf)
+        for j = 1:length(pf{i})
+            if size(fp{i}{j},2) > 1
+                npf = zeros(size(pf{i}{j}));
+                for k = 1:size(pf{i}{j},3)
+                    for l = 1:size(pf{i}{j},2)
+                        if isempty(pf{i}{j}{1,l,k})
+                            npf(1,l,k) = NaN;
+                        else
+                            npf(1,l,k) = pf{i}{j}{1,l,k}(1);
+                        end
+                    end
+                end
+                pf{i}{j} = medfilt1(npf,...
+                     round(option.median/(fp{i}{j}(1,2)-fp{i}{j}(1,1))));
+            end
+        end
+    end
+end
+if isa(x,'mirscalar')
+    p.amplitude = 0;
+else
+    p.amplitude = pa;
+end
+s = mirscalar(x,'Data',pf,'Title','Pitch','Unit','Hz');
+p = class(p,'mirpitch',s);
+o = {p,x};
\ No newline at end of file