view toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirpitch/mirpitch.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function varargout = mirpitch(orig,varargin)
%   p = mirpitch(x) evaluates the pitch frequencies (in Hz).
%   Specification of the method(s) for pitch estimation (these methods can
%       be combined):
%       mirpitch(...,'Autocor') computes an autocorrelation function
%           (Default method)
%           mirpitch(...'Enhanced',a) computes enhanced autocorrelation
%               (see help mirautocor)
%              toggled on by default
%           mirpitch(...,'Compress',k) performs magnitude compression
%               (see help mirautocor)
%           mirpitch(...,fb) specifies a type of filterbank.
%               Possible values:
%                   fb = 'NoFilterBank': no filterbank decomposition
%                   fb = '2Channels' (default value)
%                   fb = 'Gammatone' 
%       mirpitch(...,'AutocorSpectrum') computes the autocorrelation of
%           the FFT spectrum
%       mirpitch(...,'Cepstrum') computes the cepstrum
%       Alternatively, an autocorrelation or a cepstrum can be directly
%           given as first argument of the mirpitch function.
%   Peak picking options:
%       mirpitch(...,'Total',m) selects the m best pitches.
%           Default value: m = Inf, no limit is set concerning the number
%           of pitches to be detected.
%       mirpitch(...,'Mono') corresponds to morpitch(...,'Total',1)
%       mirpitch(...,'Min',mi) indicates the lowest frequency taken into
%           consideration.
%           Default value: 75 Hz. (Praat)
%       mirpitch(...,'Max',ma) indicates the highest frequency taken into
%           consideration. 
%           Default value: 2400 Hz. Because there seems to be some problems
%           with higher frequency, due probably to the absence of 
%           pre-whitening in our implementation of Tolonen and Karjalainen
%           approach (used by default, cf. below).
%       mirpitch(...,'Contrast',thr) specifies a threshold value.
%           (see help peaks)
%           Default value: thr = .1
%       mirpitch(...,'Order',o) specifies the ordering for the peak picking.
%           Default value: o = 'Amplitude'.
%       Alternatively, the result of a mirpeaks computation can be directly
%           given as first argument of the mirpitch function.
%   Post-processing options:
%       mirpitch(...,'Sum','no') does not sum back the channels at the end 
%           of the computation. The resulting pitch information remains
%           therefore decomposed into several channels.
%       mirpitch(...,'Median') performs a median filtering of the pitch
%           curve. When several pitches are extracted in each frame, the
%           pitch curve contains the best peak of each successive frame.
%       mirpitch(...,'Stable',th,n) remove pitch values when the difference 
%           (or more precisely absolute logarithmic quotient) with the
%           n precedent frames exceeds the threshold th. 
%           if th is not specified, the default value .1 is used
%           if n is not specified, the default value 3 is used
%       mirpitch(...'Reso',r) removes peaks whose distance to one or
%           several higher peaks is lower than a given threshold.
%           Possible value for the threshold r:
%               'SemiTone': ratio between the two peak positions equal to
%                   2^(1/12)
%       mirpitch(...,'Frame',l,h) orders a frame decomposition of window
%           length l (in seconds) and hop factor h, expressed relatively to
%           the window length. For instance h = 1 indicates no overlap.
%           Default values: l = 46.4 ms and h = 10 ms (Tolonen and
%           Karjalainen, 2000)
%   Preset model:
%       mirpitch(...,'Tolonen') implements (part of) the model proposed in
%           (Tolonen & Karjalainen, 2000). It is equivalent to
%           mirpitch(...,'Enhanced',2:10,'Generalized',.67,'2Channels')
%   [p,a] = mirpitch(...) also displays the result of the method chosen for
%       pitch estimation, and shows in particular the peaks corresponding
%       to the pitch values.
%   p = mirpitch(f,a,<r>) creates a mirpitch object based on the frequencies
%       specified in f and the related amplitudes specified in a, using a
%       frame sampling rate of r Hz (set by default to 100 Hz).
%
%   T. Tolonen, M. Karjalainen, "A Computationally Efficient Multipitch 
%       Analysis Model", IEEE TRANSACTIONS ON SPEECH AND AUDIO PROCESSING,
%       VOL. 8, NO. 6, NOVEMBER 2000

        ac.key = 'Autocor';
        ac.type = 'Boolean';
        ac.default = 0;
    option.ac = ac;
    
            enh.key = 'Enhanced';
            enh.type = 'Integer';
            enh.default = 2:10;
        option.enh = enh;

            filtertype.type = 'String';
            filtertype.choice = {'NoFilterBank','2Channels','Gammatone'};
            filtertype.default = '2Channels';
        option.filtertype = filtertype;

            gener.key = {'Generalized','Compress'};
            gener.type = 'Integer';
            gener.default = .5;
        option.gener = gener;

        as.key = 'AutocorSpectrum';
        as.type = 'Boolean';
        as.default = 0;
    option.as = as;
    
        s.key = 'Spectrum';
        s.type = 'Boolean';
        s.default = 0;
    option.s = s;
        
        ce.key = 'Cepstrum';
        ce.type = 'Boolean';
        ce.default = 0;
    option.ce = ce;
        
%% peak picking options

        m.key = 'Total';
        m.type = 'Integer';
        m.default = Inf;
    option.m = m;
    
        multi.key = 'Multi';
        multi.type = 'Boolean';
        multi.default = 0;
    option.multi = multi;

        mono.key = 'Mono';
        mono.type = 'Boolean';
        mono.default = 0;
    option.mono = mono;

        mi.key = 'Min';
        mi.type = 'Integer';
        mi.default = 75;
    option.mi = mi;
        
        ma.key = 'Max';
        ma.type = 'Integer';
        ma.default = 2400;
    option.ma = ma;
        
        thr.key = 'Contrast';
        thr.type = 'Integer';
        thr.default = .1;
    option.thr = thr;
    
        order.key = 'Order';
        order.type = 'String';
        order.choice = {'Amplitude','Abscissa'};
        order.default = 'Amplitude';
    option.order = order;    

        reso.key = 'Reso';
        reso.type = 'String';
        reso.choice = {0,'SemiTone'};
        reso.default = 0;
    option.reso = reso;
        
        track.key = 'Track';        % Not used yet
        track.type = 'Boolean';
        track.default = 0;
    option.track = track;

%% post-processing options
        
        stable.key = 'Stable';
        stable.type = 'Integer';
        stable.number = 2;
        stable.default = [Inf 0];
        stable.keydefault = [.1 3];
    option.stable = stable;
    
        median.key = 'Median';
        median.type = 'Integer';
        median.default = 0;
        median.keydefault = .1;
    option.median = median;
    
        frame.key = 'Frame';
        frame.type = 'Integer';
        frame.number = 2;
        frame.default = [0 0];
        frame.keydefault = [NaN NaN];
    option.frame = frame;

        sum.key = 'Sum';
        sum.type = 'Boolean';
        sum.default = 1;
    option.sum = sum;
    
%% preset model

        tolo.key = 'Tolonen';
        tolo.type = 'Boolean';
        tolo.default = 0;
    option.tolo = tolo;
    
specif.option = option;
specif.chunkframebefore = 1;

if isnumeric(orig)
    if nargin<3
        f = 100;
    else
        f = varargin{2};
    end
    fp = (0:size(orig,1)-1)/f;
    fp = [fp;fp+1/f];
    p.amplitude = {{varargin{1}'}};
    s = mirscalar([],'Data',{{orig'}},'Title','Pitch','Unit','Hz',...
                     'FramePos',{{fp}},'Sampling',f,'Name',{inputname(1)});
    p = class(p,'mirpitch',s);
    varargout = {p};
else
    varargout = mirfunction(@mirpitch,orig,varargin,nargout,specif,@init,@main);
end



function [y type] = init(orig,option)
if option.tolo
    option.enh = 2:10;
    option.gener = .67;
    option.filtertype = '2Channels';
end
if not(option.ac) && not(option.as) && not(option.ce) && not(option.s)
    option.ac = 1;
end
if isnan(option.frame.length.val)
    option.frame.length.val = .0464;
end
if isnan(option.frame.hop.val)
    option.frame.hop.val = .01;
    option.frame.hop.unit = 's';
end
if isamir(orig,'mirscalar') || haspeaks(orig)
    y = orig;
else
    if isamir(orig,'mirautocor')
        y = mirautocor(orig,'Min',option.mi,'Hz','Max',option.ma,'Hz','Freq');
    elseif isamir(orig,'mircepstrum')
        y = orig;
    elseif isamir(orig,'mirspectrum')
        if not(option.as) && not(option.ce) && not(option.s)
            option.ce = 1;
        end
        if option.as
            y = mirautocor(orig,...
                            'Min',option.mi,'Hz','Max',option.ma,'Hz');
        end
        if option.ce
            ce = mircepstrum(orig,'freq',...
                            'Min',option.mi,'Hz','Max',option.ma,'Hz');
            if option.as
                y = y*ce;
            else
                y = ce;
            end
        end
    else
        if option.ac
            x = orig;
            if not(strcmpi(option.filtertype,'NoFilterBank'))
                x = mirfilterbank(x,option.filtertype);
            end
            x = mirframenow(x,option);
            y = mirautocor(x,'Generalized',option.gener,...
                                'Min',option.mi,'Hz','Max',option.ma,'Hz');
            if option.sum
                y = mirsummary(y);
            end
            y = mirautocor(y,'Enhanced',option.enh,'Freq');
        end
        if option.as || option.ce || option.s
            x = mirframenow(orig,option);
            y = mirspectrum(x);
            if option.as
                as = mirautocor(y,...
                                'Min',option.mi,'Hz','Max',option.ma,'Hz');
                if option.ac
                    y = y*as;
                else
                    y = as;
                end
            end
            if option.ce
                ce = mircepstrum(y,'freq',...
                                'Min',option.mi,'Hz','Max',option.ma,'Hz');
                if option.ac || option.as
                    y = y*ce;
                else
                    y = ce;
                end
            end
        end
    end
end
type = {'mirpitch',mirtype(y)};
    

function o = main(x,option,postoption)
if option.multi && option.m == 1
    option.m = Inf;
end
if option.mono && option.m == Inf
    option.m = 1;
end
if iscell(x)
    x = x{1};
end
if not(isa(x,'mirpitch'))
    x = mirpeaks(x,'Total',option.m,'Track',option.track,...
                   'Contrast',option.thr,'Threshold',.4,...
                   'Reso',option.reso,'NoBegin','NoEnd',...
                   'Order',option.order);
end
if isa(x,'mirscalar')
    pf = get(x,'Data');
else
    pf = get(x,'PeakPrecisePos');
    pa = get(x,'PeakPreciseVal');
end
fp = get(x,'FramePos');
if option.stable(1) < Inf
    for i = 1:length(pf)
        for j = 1:length(pf{i})
            for k = 1:size(pf{i}{j},3)
                for l = size(pf{i}{j},2):-1:option.stable(2)+1
                    for m = length(pf{i}{j}{1,l,k}):-1:1
                        found = 0;
                        for h = 1:option.stable(2)
                            for n = 1:length(pf{i}{j}{1,l-h,k})
                                if abs(log10(pf{i}{j}{1,l,k}(m) ...
                                            /pf{i}{j}{1,l-h,k}(n))) ...
                                       < option.stable(1)
                                    found = 1;
                                end
                            end
                        end
                        if not(found)
                            pf{i}{j}{1,l,k}(m) = [];
                        end
                    end
                    pf{i}{j}{1,1,k} = zeros(1,0);
                end
            end
        end
    end
end
if option.median
    sr = get(x,'Sampling');
    for i = 1:length(pf)
        for j = 1:length(pf{i})
            if size(fp{i}{j},2) > 1
                npf = zeros(size(pf{i}{j}));
                for k = 1:size(pf{i}{j},3)
                    for l = 1:size(pf{i}{j},2)
                        if isempty(pf{i}{j}{1,l,k})
                            npf(1,l,k) = NaN;
                        else
                            npf(1,l,k) = pf{i}{j}{1,l,k}(1);
                        end
                    end
                end
                pf{i}{j} = medfilt1(npf,...
                     round(option.median/(fp{i}{j}(1,2)-fp{i}{j}(1,1))));
            end
        end
    end
end
if isa(x,'mirscalar')
    p.amplitude = 0;
else
    p.amplitude = pa;
end
s = mirscalar(x,'Data',pf,'Title','Pitch','Unit','Hz');
p = class(p,'mirpitch',s);
o = {p,x};