comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirpitch/mirpitch.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function varargout = mirpitch(orig,varargin)
2 % p = mirpitch(x) evaluates the pitch frequencies (in Hz).
3 % Specification of the method(s) for pitch estimation (these methods can
4 % be combined):
5 % mirpitch(...,'Autocor') computes an autocorrelation function
6 % (Default method)
7 % mirpitch(...'Enhanced',a) computes enhanced autocorrelation
8 % (see help mirautocor)
9 % toggled on by default
10 % mirpitch(...,'Compress',k) performs magnitude compression
11 % (see help mirautocor)
12 % mirpitch(...,fb) specifies a type of filterbank.
13 % Possible values:
14 % fb = 'NoFilterBank': no filterbank decomposition
15 % fb = '2Channels' (default value)
16 % fb = 'Gammatone'
17 % mirpitch(...,'AutocorSpectrum') computes the autocorrelation of
18 % the FFT spectrum
19 % mirpitch(...,'Cepstrum') computes the cepstrum
20 % Alternatively, an autocorrelation or a cepstrum can be directly
21 % given as first argument of the mirpitch function.
22 % Peak picking options:
23 % mirpitch(...,'Total',m) selects the m best pitches.
24 % Default value: m = Inf, no limit is set concerning the number
25 % of pitches to be detected.
26 % mirpitch(...,'Mono') corresponds to morpitch(...,'Total',1)
27 % mirpitch(...,'Min',mi) indicates the lowest frequency taken into
28 % consideration.
29 % Default value: 75 Hz. (Praat)
30 % mirpitch(...,'Max',ma) indicates the highest frequency taken into
31 % consideration.
32 % Default value: 2400 Hz. Because there seems to be some problems
33 % with higher frequency, due probably to the absence of
34 % pre-whitening in our implementation of Tolonen and Karjalainen
35 % approach (used by default, cf. below).
36 % mirpitch(...,'Contrast',thr) specifies a threshold value.
37 % (see help peaks)
38 % Default value: thr = .1
39 % mirpitch(...,'Order',o) specifies the ordering for the peak picking.
40 % Default value: o = 'Amplitude'.
41 % Alternatively, the result of a mirpeaks computation can be directly
42 % given as first argument of the mirpitch function.
43 % Post-processing options:
44 % mirpitch(...,'Sum','no') does not sum back the channels at the end
45 % of the computation. The resulting pitch information remains
46 % therefore decomposed into several channels.
47 % mirpitch(...,'Median') performs a median filtering of the pitch
48 % curve. When several pitches are extracted in each frame, the
49 % pitch curve contains the best peak of each successive frame.
50 % mirpitch(...,'Stable',th,n) remove pitch values when the difference
51 % (or more precisely absolute logarithmic quotient) with the
52 % n precedent frames exceeds the threshold th.
53 % if th is not specified, the default value .1 is used
54 % if n is not specified, the default value 3 is used
55 % mirpitch(...'Reso',r) removes peaks whose distance to one or
56 % several higher peaks is lower than a given threshold.
57 % Possible value for the threshold r:
58 % 'SemiTone': ratio between the two peak positions equal to
59 % 2^(1/12)
60 % mirpitch(...,'Frame',l,h) orders a frame decomposition of window
61 % length l (in seconds) and hop factor h, expressed relatively to
62 % the window length. For instance h = 1 indicates no overlap.
63 % Default values: l = 46.4 ms and h = 10 ms (Tolonen and
64 % Karjalainen, 2000)
65 % Preset model:
66 % mirpitch(...,'Tolonen') implements (part of) the model proposed in
67 % (Tolonen & Karjalainen, 2000). It is equivalent to
68 % mirpitch(...,'Enhanced',2:10,'Generalized',.67,'2Channels')
69 % [p,a] = mirpitch(...) also displays the result of the method chosen for
70 % pitch estimation, and shows in particular the peaks corresponding
71 % to the pitch values.
72 % p = mirpitch(f,a,<r>) creates a mirpitch object based on the frequencies
73 % specified in f and the related amplitudes specified in a, using a
74 % frame sampling rate of r Hz (set by default to 100 Hz).
75 %
76 % T. Tolonen, M. Karjalainen, "A Computationally Efficient Multipitch
77 % Analysis Model", IEEE TRANSACTIONS ON SPEECH AND AUDIO PROCESSING,
78 % VOL. 8, NO. 6, NOVEMBER 2000
79
80 ac.key = 'Autocor';
81 ac.type = 'Boolean';
82 ac.default = 0;
83 option.ac = ac;
84
85 enh.key = 'Enhanced';
86 enh.type = 'Integer';
87 enh.default = 2:10;
88 option.enh = enh;
89
90 filtertype.type = 'String';
91 filtertype.choice = {'NoFilterBank','2Channels','Gammatone'};
92 filtertype.default = '2Channels';
93 option.filtertype = filtertype;
94
95 gener.key = {'Generalized','Compress'};
96 gener.type = 'Integer';
97 gener.default = .5;
98 option.gener = gener;
99
100 as.key = 'AutocorSpectrum';
101 as.type = 'Boolean';
102 as.default = 0;
103 option.as = as;
104
105 s.key = 'Spectrum';
106 s.type = 'Boolean';
107 s.default = 0;
108 option.s = s;
109
110 ce.key = 'Cepstrum';
111 ce.type = 'Boolean';
112 ce.default = 0;
113 option.ce = ce;
114
115 %% peak picking options
116
117 m.key = 'Total';
118 m.type = 'Integer';
119 m.default = Inf;
120 option.m = m;
121
122 multi.key = 'Multi';
123 multi.type = 'Boolean';
124 multi.default = 0;
125 option.multi = multi;
126
127 mono.key = 'Mono';
128 mono.type = 'Boolean';
129 mono.default = 0;
130 option.mono = mono;
131
132 mi.key = 'Min';
133 mi.type = 'Integer';
134 mi.default = 75;
135 option.mi = mi;
136
137 ma.key = 'Max';
138 ma.type = 'Integer';
139 ma.default = 2400;
140 option.ma = ma;
141
142 thr.key = 'Contrast';
143 thr.type = 'Integer';
144 thr.default = .1;
145 option.thr = thr;
146
147 order.key = 'Order';
148 order.type = 'String';
149 order.choice = {'Amplitude','Abscissa'};
150 order.default = 'Amplitude';
151 option.order = order;
152
153 reso.key = 'Reso';
154 reso.type = 'String';
155 reso.choice = {0,'SemiTone'};
156 reso.default = 0;
157 option.reso = reso;
158
159 track.key = 'Track'; % Not used yet
160 track.type = 'Boolean';
161 track.default = 0;
162 option.track = track;
163
164 %% post-processing options
165
166 stable.key = 'Stable';
167 stable.type = 'Integer';
168 stable.number = 2;
169 stable.default = [Inf 0];
170 stable.keydefault = [.1 3];
171 option.stable = stable;
172
173 median.key = 'Median';
174 median.type = 'Integer';
175 median.default = 0;
176 median.keydefault = .1;
177 option.median = median;
178
179 frame.key = 'Frame';
180 frame.type = 'Integer';
181 frame.number = 2;
182 frame.default = [0 0];
183 frame.keydefault = [NaN NaN];
184 option.frame = frame;
185
186 sum.key = 'Sum';
187 sum.type = 'Boolean';
188 sum.default = 1;
189 option.sum = sum;
190
191 %% preset model
192
193 tolo.key = 'Tolonen';
194 tolo.type = 'Boolean';
195 tolo.default = 0;
196 option.tolo = tolo;
197
198 specif.option = option;
199 specif.chunkframebefore = 1;
200
201 if isnumeric(orig)
202 if nargin<3
203 f = 100;
204 else
205 f = varargin{2};
206 end
207 fp = (0:size(orig,1)-1)/f;
208 fp = [fp;fp+1/f];
209 p.amplitude = {{varargin{1}'}};
210 s = mirscalar([],'Data',{{orig'}},'Title','Pitch','Unit','Hz',...
211 'FramePos',{{fp}},'Sampling',f,'Name',{inputname(1)});
212 p = class(p,'mirpitch',s);
213 varargout = {p};
214 else
215 varargout = mirfunction(@mirpitch,orig,varargin,nargout,specif,@init,@main);
216 end
217
218
219
220 function [y type] = init(orig,option)
221 if option.tolo
222 option.enh = 2:10;
223 option.gener = .67;
224 option.filtertype = '2Channels';
225 end
226 if not(option.ac) && not(option.as) && not(option.ce) && not(option.s)
227 option.ac = 1;
228 end
229 if isnan(option.frame.length.val)
230 option.frame.length.val = .0464;
231 end
232 if isnan(option.frame.hop.val)
233 option.frame.hop.val = .01;
234 option.frame.hop.unit = 's';
235 end
236 if isamir(orig,'mirscalar') || haspeaks(orig)
237 y = orig;
238 else
239 if isamir(orig,'mirautocor')
240 y = mirautocor(orig,'Min',option.mi,'Hz','Max',option.ma,'Hz','Freq');
241 elseif isamir(orig,'mircepstrum')
242 y = orig;
243 elseif isamir(orig,'mirspectrum')
244 if not(option.as) && not(option.ce) && not(option.s)
245 option.ce = 1;
246 end
247 if option.as
248 y = mirautocor(orig,...
249 'Min',option.mi,'Hz','Max',option.ma,'Hz');
250 end
251 if option.ce
252 ce = mircepstrum(orig,'freq',...
253 'Min',option.mi,'Hz','Max',option.ma,'Hz');
254 if option.as
255 y = y*ce;
256 else
257 y = ce;
258 end
259 end
260 else
261 if option.ac
262 x = orig;
263 if not(strcmpi(option.filtertype,'NoFilterBank'))
264 x = mirfilterbank(x,option.filtertype);
265 end
266 x = mirframenow(x,option);
267 y = mirautocor(x,'Generalized',option.gener,...
268 'Min',option.mi,'Hz','Max',option.ma,'Hz');
269 if option.sum
270 y = mirsummary(y);
271 end
272 y = mirautocor(y,'Enhanced',option.enh,'Freq');
273 end
274 if option.as || option.ce || option.s
275 x = mirframenow(orig,option);
276 y = mirspectrum(x);
277 if option.as
278 as = mirautocor(y,...
279 'Min',option.mi,'Hz','Max',option.ma,'Hz');
280 if option.ac
281 y = y*as;
282 else
283 y = as;
284 end
285 end
286 if option.ce
287 ce = mircepstrum(y,'freq',...
288 'Min',option.mi,'Hz','Max',option.ma,'Hz');
289 if option.ac || option.as
290 y = y*ce;
291 else
292 y = ce;
293 end
294 end
295 end
296 end
297 end
298 type = {'mirpitch',mirtype(y)};
299
300
301 function o = main(x,option,postoption)
302 if option.multi && option.m == 1
303 option.m = Inf;
304 end
305 if option.mono && option.m == Inf
306 option.m = 1;
307 end
308 if iscell(x)
309 x = x{1};
310 end
311 if not(isa(x,'mirpitch'))
312 x = mirpeaks(x,'Total',option.m,'Track',option.track,...
313 'Contrast',option.thr,'Threshold',.4,...
314 'Reso',option.reso,'NoBegin','NoEnd',...
315 'Order',option.order);
316 end
317 if isa(x,'mirscalar')
318 pf = get(x,'Data');
319 else
320 pf = get(x,'PeakPrecisePos');
321 pa = get(x,'PeakPreciseVal');
322 end
323 fp = get(x,'FramePos');
324 if option.stable(1) < Inf
325 for i = 1:length(pf)
326 for j = 1:length(pf{i})
327 for k = 1:size(pf{i}{j},3)
328 for l = size(pf{i}{j},2):-1:option.stable(2)+1
329 for m = length(pf{i}{j}{1,l,k}):-1:1
330 found = 0;
331 for h = 1:option.stable(2)
332 for n = 1:length(pf{i}{j}{1,l-h,k})
333 if abs(log10(pf{i}{j}{1,l,k}(m) ...
334 /pf{i}{j}{1,l-h,k}(n))) ...
335 < option.stable(1)
336 found = 1;
337 end
338 end
339 end
340 if not(found)
341 pf{i}{j}{1,l,k}(m) = [];
342 end
343 end
344 pf{i}{j}{1,1,k} = zeros(1,0);
345 end
346 end
347 end
348 end
349 end
350 if option.median
351 sr = get(x,'Sampling');
352 for i = 1:length(pf)
353 for j = 1:length(pf{i})
354 if size(fp{i}{j},2) > 1
355 npf = zeros(size(pf{i}{j}));
356 for k = 1:size(pf{i}{j},3)
357 for l = 1:size(pf{i}{j},2)
358 if isempty(pf{i}{j}{1,l,k})
359 npf(1,l,k) = NaN;
360 else
361 npf(1,l,k) = pf{i}{j}{1,l,k}(1);
362 end
363 end
364 end
365 pf{i}{j} = medfilt1(npf,...
366 round(option.median/(fp{i}{j}(1,2)-fp{i}{j}(1,1))));
367 end
368 end
369 end
370 end
371 if isa(x,'mirscalar')
372 p.amplitude = 0;
373 else
374 p.amplitude = pa;
375 end
376 s = mirscalar(x,'Data',pf,'Title','Pitch','Unit','Hz');
377 p = class(p,'mirpitch',s);
378 o = {p,x};