Mercurial > hg > camir-aes2014
diff toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirpitch/mirpitch.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirpitch/mirpitch.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,378 @@ +function varargout = mirpitch(orig,varargin) +% p = mirpitch(x) evaluates the pitch frequencies (in Hz). +% Specification of the method(s) for pitch estimation (these methods can +% be combined): +% mirpitch(...,'Autocor') computes an autocorrelation function +% (Default method) +% mirpitch(...'Enhanced',a) computes enhanced autocorrelation +% (see help mirautocor) +% toggled on by default +% mirpitch(...,'Compress',k) performs magnitude compression +% (see help mirautocor) +% mirpitch(...,fb) specifies a type of filterbank. +% Possible values: +% fb = 'NoFilterBank': no filterbank decomposition +% fb = '2Channels' (default value) +% fb = 'Gammatone' +% mirpitch(...,'AutocorSpectrum') computes the autocorrelation of +% the FFT spectrum +% mirpitch(...,'Cepstrum') computes the cepstrum +% Alternatively, an autocorrelation or a cepstrum can be directly +% given as first argument of the mirpitch function. +% Peak picking options: +% mirpitch(...,'Total',m) selects the m best pitches. +% Default value: m = Inf, no limit is set concerning the number +% of pitches to be detected. +% mirpitch(...,'Mono') corresponds to morpitch(...,'Total',1) +% mirpitch(...,'Min',mi) indicates the lowest frequency taken into +% consideration. +% Default value: 75 Hz. (Praat) +% mirpitch(...,'Max',ma) indicates the highest frequency taken into +% consideration. +% Default value: 2400 Hz. Because there seems to be some problems +% with higher frequency, due probably to the absence of +% pre-whitening in our implementation of Tolonen and Karjalainen +% approach (used by default, cf. below). +% mirpitch(...,'Contrast',thr) specifies a threshold value. +% (see help peaks) +% Default value: thr = .1 +% mirpitch(...,'Order',o) specifies the ordering for the peak picking. +% Default value: o = 'Amplitude'. +% Alternatively, the result of a mirpeaks computation can be directly +% given as first argument of the mirpitch function. +% Post-processing options: +% mirpitch(...,'Sum','no') does not sum back the channels at the end +% of the computation. The resulting pitch information remains +% therefore decomposed into several channels. +% mirpitch(...,'Median') performs a median filtering of the pitch +% curve. When several pitches are extracted in each frame, the +% pitch curve contains the best peak of each successive frame. +% mirpitch(...,'Stable',th,n) remove pitch values when the difference +% (or more precisely absolute logarithmic quotient) with the +% n precedent frames exceeds the threshold th. +% if th is not specified, the default value .1 is used +% if n is not specified, the default value 3 is used +% mirpitch(...'Reso',r) removes peaks whose distance to one or +% several higher peaks is lower than a given threshold. +% Possible value for the threshold r: +% 'SemiTone': ratio between the two peak positions equal to +% 2^(1/12) +% mirpitch(...,'Frame',l,h) orders a frame decomposition of window +% length l (in seconds) and hop factor h, expressed relatively to +% the window length. For instance h = 1 indicates no overlap. +% Default values: l = 46.4 ms and h = 10 ms (Tolonen and +% Karjalainen, 2000) +% Preset model: +% mirpitch(...,'Tolonen') implements (part of) the model proposed in +% (Tolonen & Karjalainen, 2000). It is equivalent to +% mirpitch(...,'Enhanced',2:10,'Generalized',.67,'2Channels') +% [p,a] = mirpitch(...) also displays the result of the method chosen for +% pitch estimation, and shows in particular the peaks corresponding +% to the pitch values. +% p = mirpitch(f,a,<r>) creates a mirpitch object based on the frequencies +% specified in f and the related amplitudes specified in a, using a +% frame sampling rate of r Hz (set by default to 100 Hz). +% +% T. Tolonen, M. Karjalainen, "A Computationally Efficient Multipitch +% Analysis Model", IEEE TRANSACTIONS ON SPEECH AND AUDIO PROCESSING, +% VOL. 8, NO. 6, NOVEMBER 2000 + + ac.key = 'Autocor'; + ac.type = 'Boolean'; + ac.default = 0; + option.ac = ac; + + enh.key = 'Enhanced'; + enh.type = 'Integer'; + enh.default = 2:10; + option.enh = enh; + + filtertype.type = 'String'; + filtertype.choice = {'NoFilterBank','2Channels','Gammatone'}; + filtertype.default = '2Channels'; + option.filtertype = filtertype; + + gener.key = {'Generalized','Compress'}; + gener.type = 'Integer'; + gener.default = .5; + option.gener = gener; + + as.key = 'AutocorSpectrum'; + as.type = 'Boolean'; + as.default = 0; + option.as = as; + + s.key = 'Spectrum'; + s.type = 'Boolean'; + s.default = 0; + option.s = s; + + ce.key = 'Cepstrum'; + ce.type = 'Boolean'; + ce.default = 0; + option.ce = ce; + +%% peak picking options + + m.key = 'Total'; + m.type = 'Integer'; + m.default = Inf; + option.m = m; + + multi.key = 'Multi'; + multi.type = 'Boolean'; + multi.default = 0; + option.multi = multi; + + mono.key = 'Mono'; + mono.type = 'Boolean'; + mono.default = 0; + option.mono = mono; + + mi.key = 'Min'; + mi.type = 'Integer'; + mi.default = 75; + option.mi = mi; + + ma.key = 'Max'; + ma.type = 'Integer'; + ma.default = 2400; + option.ma = ma; + + thr.key = 'Contrast'; + thr.type = 'Integer'; + thr.default = .1; + option.thr = thr; + + order.key = 'Order'; + order.type = 'String'; + order.choice = {'Amplitude','Abscissa'}; + order.default = 'Amplitude'; + option.order = order; + + reso.key = 'Reso'; + reso.type = 'String'; + reso.choice = {0,'SemiTone'}; + reso.default = 0; + option.reso = reso; + + track.key = 'Track'; % Not used yet + track.type = 'Boolean'; + track.default = 0; + option.track = track; + +%% post-processing options + + stable.key = 'Stable'; + stable.type = 'Integer'; + stable.number = 2; + stable.default = [Inf 0]; + stable.keydefault = [.1 3]; + option.stable = stable; + + median.key = 'Median'; + median.type = 'Integer'; + median.default = 0; + median.keydefault = .1; + option.median = median; + + frame.key = 'Frame'; + frame.type = 'Integer'; + frame.number = 2; + frame.default = [0 0]; + frame.keydefault = [NaN NaN]; + option.frame = frame; + + sum.key = 'Sum'; + sum.type = 'Boolean'; + sum.default = 1; + option.sum = sum; + +%% preset model + + tolo.key = 'Tolonen'; + tolo.type = 'Boolean'; + tolo.default = 0; + option.tolo = tolo; + +specif.option = option; +specif.chunkframebefore = 1; + +if isnumeric(orig) + if nargin<3 + f = 100; + else + f = varargin{2}; + end + fp = (0:size(orig,1)-1)/f; + fp = [fp;fp+1/f]; + p.amplitude = {{varargin{1}'}}; + s = mirscalar([],'Data',{{orig'}},'Title','Pitch','Unit','Hz',... + 'FramePos',{{fp}},'Sampling',f,'Name',{inputname(1)}); + p = class(p,'mirpitch',s); + varargout = {p}; +else + varargout = mirfunction(@mirpitch,orig,varargin,nargout,specif,@init,@main); +end + + + +function [y type] = init(orig,option) +if option.tolo + option.enh = 2:10; + option.gener = .67; + option.filtertype = '2Channels'; +end +if not(option.ac) && not(option.as) && not(option.ce) && not(option.s) + option.ac = 1; +end +if isnan(option.frame.length.val) + option.frame.length.val = .0464; +end +if isnan(option.frame.hop.val) + option.frame.hop.val = .01; + option.frame.hop.unit = 's'; +end +if isamir(orig,'mirscalar') || haspeaks(orig) + y = orig; +else + if isamir(orig,'mirautocor') + y = mirautocor(orig,'Min',option.mi,'Hz','Max',option.ma,'Hz','Freq'); + elseif isamir(orig,'mircepstrum') + y = orig; + elseif isamir(orig,'mirspectrum') + if not(option.as) && not(option.ce) && not(option.s) + option.ce = 1; + end + if option.as + y = mirautocor(orig,... + 'Min',option.mi,'Hz','Max',option.ma,'Hz'); + end + if option.ce + ce = mircepstrum(orig,'freq',... + 'Min',option.mi,'Hz','Max',option.ma,'Hz'); + if option.as + y = y*ce; + else + y = ce; + end + end + else + if option.ac + x = orig; + if not(strcmpi(option.filtertype,'NoFilterBank')) + x = mirfilterbank(x,option.filtertype); + end + x = mirframenow(x,option); + y = mirautocor(x,'Generalized',option.gener,... + 'Min',option.mi,'Hz','Max',option.ma,'Hz'); + if option.sum + y = mirsummary(y); + end + y = mirautocor(y,'Enhanced',option.enh,'Freq'); + end + if option.as || option.ce || option.s + x = mirframenow(orig,option); + y = mirspectrum(x); + if option.as + as = mirautocor(y,... + 'Min',option.mi,'Hz','Max',option.ma,'Hz'); + if option.ac + y = y*as; + else + y = as; + end + end + if option.ce + ce = mircepstrum(y,'freq',... + 'Min',option.mi,'Hz','Max',option.ma,'Hz'); + if option.ac || option.as + y = y*ce; + else + y = ce; + end + end + end + end +end +type = {'mirpitch',mirtype(y)}; + + +function o = main(x,option,postoption) +if option.multi && option.m == 1 + option.m = Inf; +end +if option.mono && option.m == Inf + option.m = 1; +end +if iscell(x) + x = x{1}; +end +if not(isa(x,'mirpitch')) + x = mirpeaks(x,'Total',option.m,'Track',option.track,... + 'Contrast',option.thr,'Threshold',.4,... + 'Reso',option.reso,'NoBegin','NoEnd',... + 'Order',option.order); +end +if isa(x,'mirscalar') + pf = get(x,'Data'); +else + pf = get(x,'PeakPrecisePos'); + pa = get(x,'PeakPreciseVal'); +end +fp = get(x,'FramePos'); +if option.stable(1) < Inf + for i = 1:length(pf) + for j = 1:length(pf{i}) + for k = 1:size(pf{i}{j},3) + for l = size(pf{i}{j},2):-1:option.stable(2)+1 + for m = length(pf{i}{j}{1,l,k}):-1:1 + found = 0; + for h = 1:option.stable(2) + for n = 1:length(pf{i}{j}{1,l-h,k}) + if abs(log10(pf{i}{j}{1,l,k}(m) ... + /pf{i}{j}{1,l-h,k}(n))) ... + < option.stable(1) + found = 1; + end + end + end + if not(found) + pf{i}{j}{1,l,k}(m) = []; + end + end + pf{i}{j}{1,1,k} = zeros(1,0); + end + end + end + end +end +if option.median + sr = get(x,'Sampling'); + for i = 1:length(pf) + for j = 1:length(pf{i}) + if size(fp{i}{j},2) > 1 + npf = zeros(size(pf{i}{j})); + for k = 1:size(pf{i}{j},3) + for l = 1:size(pf{i}{j},2) + if isempty(pf{i}{j}{1,l,k}) + npf(1,l,k) = NaN; + else + npf(1,l,k) = pf{i}{j}{1,l,k}(1); + end + end + end + pf{i}{j} = medfilt1(npf,... + round(option.median/(fp{i}{j}(1,2)-fp{i}{j}(1,1)))); + end + end + end +end +if isa(x,'mirscalar') + p.amplitude = 0; +else + p.amplitude = pa; +end +s = mirscalar(x,'Data',pf,'Title','Pitch','Unit','Hz'); +p = class(p,'mirpitch',s); +o = {p,x}; \ No newline at end of file