wolffd@0
|
1 function varargout = mirpitch(orig,varargin)
|
wolffd@0
|
2 % p = mirpitch(x) evaluates the pitch frequencies (in Hz).
|
wolffd@0
|
3 % Specification of the method(s) for pitch estimation (these methods can
|
wolffd@0
|
4 % be combined):
|
wolffd@0
|
5 % mirpitch(...,'Autocor') computes an autocorrelation function
|
wolffd@0
|
6 % (Default method)
|
wolffd@0
|
7 % mirpitch(...'Enhanced',a) computes enhanced autocorrelation
|
wolffd@0
|
8 % (see help mirautocor)
|
wolffd@0
|
9 % toggled on by default
|
wolffd@0
|
10 % mirpitch(...,'Compress',k) performs magnitude compression
|
wolffd@0
|
11 % (see help mirautocor)
|
wolffd@0
|
12 % mirpitch(...,fb) specifies a type of filterbank.
|
wolffd@0
|
13 % Possible values:
|
wolffd@0
|
14 % fb = 'NoFilterBank': no filterbank decomposition
|
wolffd@0
|
15 % fb = '2Channels' (default value)
|
wolffd@0
|
16 % fb = 'Gammatone'
|
wolffd@0
|
17 % mirpitch(...,'AutocorSpectrum') computes the autocorrelation of
|
wolffd@0
|
18 % the FFT spectrum
|
wolffd@0
|
19 % mirpitch(...,'Cepstrum') computes the cepstrum
|
wolffd@0
|
20 % Alternatively, an autocorrelation or a cepstrum can be directly
|
wolffd@0
|
21 % given as first argument of the mirpitch function.
|
wolffd@0
|
22 % Peak picking options:
|
wolffd@0
|
23 % mirpitch(...,'Total',m) selects the m best pitches.
|
wolffd@0
|
24 % Default value: m = Inf, no limit is set concerning the number
|
wolffd@0
|
25 % of pitches to be detected.
|
wolffd@0
|
26 % mirpitch(...,'Mono') corresponds to morpitch(...,'Total',1)
|
wolffd@0
|
27 % mirpitch(...,'Min',mi) indicates the lowest frequency taken into
|
wolffd@0
|
28 % consideration.
|
wolffd@0
|
29 % Default value: 75 Hz. (Praat)
|
wolffd@0
|
30 % mirpitch(...,'Max',ma) indicates the highest frequency taken into
|
wolffd@0
|
31 % consideration.
|
wolffd@0
|
32 % Default value: 2400 Hz. Because there seems to be some problems
|
wolffd@0
|
33 % with higher frequency, due probably to the absence of
|
wolffd@0
|
34 % pre-whitening in our implementation of Tolonen and Karjalainen
|
wolffd@0
|
35 % approach (used by default, cf. below).
|
wolffd@0
|
36 % mirpitch(...,'Contrast',thr) specifies a threshold value.
|
wolffd@0
|
37 % (see help peaks)
|
wolffd@0
|
38 % Default value: thr = .1
|
wolffd@0
|
39 % mirpitch(...,'Order',o) specifies the ordering for the peak picking.
|
wolffd@0
|
40 % Default value: o = 'Amplitude'.
|
wolffd@0
|
41 % Alternatively, the result of a mirpeaks computation can be directly
|
wolffd@0
|
42 % given as first argument of the mirpitch function.
|
wolffd@0
|
43 % Post-processing options:
|
wolffd@0
|
44 % mirpitch(...,'Sum','no') does not sum back the channels at the end
|
wolffd@0
|
45 % of the computation. The resulting pitch information remains
|
wolffd@0
|
46 % therefore decomposed into several channels.
|
wolffd@0
|
47 % mirpitch(...,'Median') performs a median filtering of the pitch
|
wolffd@0
|
48 % curve. When several pitches are extracted in each frame, the
|
wolffd@0
|
49 % pitch curve contains the best peak of each successive frame.
|
wolffd@0
|
50 % mirpitch(...,'Stable',th,n) remove pitch values when the difference
|
wolffd@0
|
51 % (or more precisely absolute logarithmic quotient) with the
|
wolffd@0
|
52 % n precedent frames exceeds the threshold th.
|
wolffd@0
|
53 % if th is not specified, the default value .1 is used
|
wolffd@0
|
54 % if n is not specified, the default value 3 is used
|
wolffd@0
|
55 % mirpitch(...'Reso',r) removes peaks whose distance to one or
|
wolffd@0
|
56 % several higher peaks is lower than a given threshold.
|
wolffd@0
|
57 % Possible value for the threshold r:
|
wolffd@0
|
58 % 'SemiTone': ratio between the two peak positions equal to
|
wolffd@0
|
59 % 2^(1/12)
|
wolffd@0
|
60 % mirpitch(...,'Frame',l,h) orders a frame decomposition of window
|
wolffd@0
|
61 % length l (in seconds) and hop factor h, expressed relatively to
|
wolffd@0
|
62 % the window length. For instance h = 1 indicates no overlap.
|
wolffd@0
|
63 % Default values: l = 46.4 ms and h = 10 ms (Tolonen and
|
wolffd@0
|
64 % Karjalainen, 2000)
|
wolffd@0
|
65 % Preset model:
|
wolffd@0
|
66 % mirpitch(...,'Tolonen') implements (part of) the model proposed in
|
wolffd@0
|
67 % (Tolonen & Karjalainen, 2000). It is equivalent to
|
wolffd@0
|
68 % mirpitch(...,'Enhanced',2:10,'Generalized',.67,'2Channels')
|
wolffd@0
|
69 % [p,a] = mirpitch(...) also displays the result of the method chosen for
|
wolffd@0
|
70 % pitch estimation, and shows in particular the peaks corresponding
|
wolffd@0
|
71 % to the pitch values.
|
wolffd@0
|
72 % p = mirpitch(f,a,<r>) creates a mirpitch object based on the frequencies
|
wolffd@0
|
73 % specified in f and the related amplitudes specified in a, using a
|
wolffd@0
|
74 % frame sampling rate of r Hz (set by default to 100 Hz).
|
wolffd@0
|
75 %
|
wolffd@0
|
76 % T. Tolonen, M. Karjalainen, "A Computationally Efficient Multipitch
|
wolffd@0
|
77 % Analysis Model", IEEE TRANSACTIONS ON SPEECH AND AUDIO PROCESSING,
|
wolffd@0
|
78 % VOL. 8, NO. 6, NOVEMBER 2000
|
wolffd@0
|
79
|
wolffd@0
|
80 ac.key = 'Autocor';
|
wolffd@0
|
81 ac.type = 'Boolean';
|
wolffd@0
|
82 ac.default = 0;
|
wolffd@0
|
83 option.ac = ac;
|
wolffd@0
|
84
|
wolffd@0
|
85 enh.key = 'Enhanced';
|
wolffd@0
|
86 enh.type = 'Integer';
|
wolffd@0
|
87 enh.default = 2:10;
|
wolffd@0
|
88 option.enh = enh;
|
wolffd@0
|
89
|
wolffd@0
|
90 filtertype.type = 'String';
|
wolffd@0
|
91 filtertype.choice = {'NoFilterBank','2Channels','Gammatone'};
|
wolffd@0
|
92 filtertype.default = '2Channels';
|
wolffd@0
|
93 option.filtertype = filtertype;
|
wolffd@0
|
94
|
wolffd@0
|
95 gener.key = {'Generalized','Compress'};
|
wolffd@0
|
96 gener.type = 'Integer';
|
wolffd@0
|
97 gener.default = .5;
|
wolffd@0
|
98 option.gener = gener;
|
wolffd@0
|
99
|
wolffd@0
|
100 as.key = 'AutocorSpectrum';
|
wolffd@0
|
101 as.type = 'Boolean';
|
wolffd@0
|
102 as.default = 0;
|
wolffd@0
|
103 option.as = as;
|
wolffd@0
|
104
|
wolffd@0
|
105 s.key = 'Spectrum';
|
wolffd@0
|
106 s.type = 'Boolean';
|
wolffd@0
|
107 s.default = 0;
|
wolffd@0
|
108 option.s = s;
|
wolffd@0
|
109
|
wolffd@0
|
110 ce.key = 'Cepstrum';
|
wolffd@0
|
111 ce.type = 'Boolean';
|
wolffd@0
|
112 ce.default = 0;
|
wolffd@0
|
113 option.ce = ce;
|
wolffd@0
|
114
|
wolffd@0
|
115 %% peak picking options
|
wolffd@0
|
116
|
wolffd@0
|
117 m.key = 'Total';
|
wolffd@0
|
118 m.type = 'Integer';
|
wolffd@0
|
119 m.default = Inf;
|
wolffd@0
|
120 option.m = m;
|
wolffd@0
|
121
|
wolffd@0
|
122 multi.key = 'Multi';
|
wolffd@0
|
123 multi.type = 'Boolean';
|
wolffd@0
|
124 multi.default = 0;
|
wolffd@0
|
125 option.multi = multi;
|
wolffd@0
|
126
|
wolffd@0
|
127 mono.key = 'Mono';
|
wolffd@0
|
128 mono.type = 'Boolean';
|
wolffd@0
|
129 mono.default = 0;
|
wolffd@0
|
130 option.mono = mono;
|
wolffd@0
|
131
|
wolffd@0
|
132 mi.key = 'Min';
|
wolffd@0
|
133 mi.type = 'Integer';
|
wolffd@0
|
134 mi.default = 75;
|
wolffd@0
|
135 option.mi = mi;
|
wolffd@0
|
136
|
wolffd@0
|
137 ma.key = 'Max';
|
wolffd@0
|
138 ma.type = 'Integer';
|
wolffd@0
|
139 ma.default = 2400;
|
wolffd@0
|
140 option.ma = ma;
|
wolffd@0
|
141
|
wolffd@0
|
142 thr.key = 'Contrast';
|
wolffd@0
|
143 thr.type = 'Integer';
|
wolffd@0
|
144 thr.default = .1;
|
wolffd@0
|
145 option.thr = thr;
|
wolffd@0
|
146
|
wolffd@0
|
147 order.key = 'Order';
|
wolffd@0
|
148 order.type = 'String';
|
wolffd@0
|
149 order.choice = {'Amplitude','Abscissa'};
|
wolffd@0
|
150 order.default = 'Amplitude';
|
wolffd@0
|
151 option.order = order;
|
wolffd@0
|
152
|
wolffd@0
|
153 reso.key = 'Reso';
|
wolffd@0
|
154 reso.type = 'String';
|
wolffd@0
|
155 reso.choice = {0,'SemiTone'};
|
wolffd@0
|
156 reso.default = 0;
|
wolffd@0
|
157 option.reso = reso;
|
wolffd@0
|
158
|
wolffd@0
|
159 track.key = 'Track'; % Not used yet
|
wolffd@0
|
160 track.type = 'Boolean';
|
wolffd@0
|
161 track.default = 0;
|
wolffd@0
|
162 option.track = track;
|
wolffd@0
|
163
|
wolffd@0
|
164 %% post-processing options
|
wolffd@0
|
165
|
wolffd@0
|
166 stable.key = 'Stable';
|
wolffd@0
|
167 stable.type = 'Integer';
|
wolffd@0
|
168 stable.number = 2;
|
wolffd@0
|
169 stable.default = [Inf 0];
|
wolffd@0
|
170 stable.keydefault = [.1 3];
|
wolffd@0
|
171 option.stable = stable;
|
wolffd@0
|
172
|
wolffd@0
|
173 median.key = 'Median';
|
wolffd@0
|
174 median.type = 'Integer';
|
wolffd@0
|
175 median.default = 0;
|
wolffd@0
|
176 median.keydefault = .1;
|
wolffd@0
|
177 option.median = median;
|
wolffd@0
|
178
|
wolffd@0
|
179 frame.key = 'Frame';
|
wolffd@0
|
180 frame.type = 'Integer';
|
wolffd@0
|
181 frame.number = 2;
|
wolffd@0
|
182 frame.default = [0 0];
|
wolffd@0
|
183 frame.keydefault = [NaN NaN];
|
wolffd@0
|
184 option.frame = frame;
|
wolffd@0
|
185
|
wolffd@0
|
186 sum.key = 'Sum';
|
wolffd@0
|
187 sum.type = 'Boolean';
|
wolffd@0
|
188 sum.default = 1;
|
wolffd@0
|
189 option.sum = sum;
|
wolffd@0
|
190
|
wolffd@0
|
191 %% preset model
|
wolffd@0
|
192
|
wolffd@0
|
193 tolo.key = 'Tolonen';
|
wolffd@0
|
194 tolo.type = 'Boolean';
|
wolffd@0
|
195 tolo.default = 0;
|
wolffd@0
|
196 option.tolo = tolo;
|
wolffd@0
|
197
|
wolffd@0
|
198 specif.option = option;
|
wolffd@0
|
199 specif.chunkframebefore = 1;
|
wolffd@0
|
200
|
wolffd@0
|
201 if isnumeric(orig)
|
wolffd@0
|
202 if nargin<3
|
wolffd@0
|
203 f = 100;
|
wolffd@0
|
204 else
|
wolffd@0
|
205 f = varargin{2};
|
wolffd@0
|
206 end
|
wolffd@0
|
207 fp = (0:size(orig,1)-1)/f;
|
wolffd@0
|
208 fp = [fp;fp+1/f];
|
wolffd@0
|
209 p.amplitude = {{varargin{1}'}};
|
wolffd@0
|
210 s = mirscalar([],'Data',{{orig'}},'Title','Pitch','Unit','Hz',...
|
wolffd@0
|
211 'FramePos',{{fp}},'Sampling',f,'Name',{inputname(1)});
|
wolffd@0
|
212 p = class(p,'mirpitch',s);
|
wolffd@0
|
213 varargout = {p};
|
wolffd@0
|
214 else
|
wolffd@0
|
215 varargout = mirfunction(@mirpitch,orig,varargin,nargout,specif,@init,@main);
|
wolffd@0
|
216 end
|
wolffd@0
|
217
|
wolffd@0
|
218
|
wolffd@0
|
219
|
wolffd@0
|
220 function [y type] = init(orig,option)
|
wolffd@0
|
221 if option.tolo
|
wolffd@0
|
222 option.enh = 2:10;
|
wolffd@0
|
223 option.gener = .67;
|
wolffd@0
|
224 option.filtertype = '2Channels';
|
wolffd@0
|
225 end
|
wolffd@0
|
226 if not(option.ac) && not(option.as) && not(option.ce) && not(option.s)
|
wolffd@0
|
227 option.ac = 1;
|
wolffd@0
|
228 end
|
wolffd@0
|
229 if isnan(option.frame.length.val)
|
wolffd@0
|
230 option.frame.length.val = .0464;
|
wolffd@0
|
231 end
|
wolffd@0
|
232 if isnan(option.frame.hop.val)
|
wolffd@0
|
233 option.frame.hop.val = .01;
|
wolffd@0
|
234 option.frame.hop.unit = 's';
|
wolffd@0
|
235 end
|
wolffd@0
|
236 if isamir(orig,'mirscalar') || haspeaks(orig)
|
wolffd@0
|
237 y = orig;
|
wolffd@0
|
238 else
|
wolffd@0
|
239 if isamir(orig,'mirautocor')
|
wolffd@0
|
240 y = mirautocor(orig,'Min',option.mi,'Hz','Max',option.ma,'Hz','Freq');
|
wolffd@0
|
241 elseif isamir(orig,'mircepstrum')
|
wolffd@0
|
242 y = orig;
|
wolffd@0
|
243 elseif isamir(orig,'mirspectrum')
|
wolffd@0
|
244 if not(option.as) && not(option.ce) && not(option.s)
|
wolffd@0
|
245 option.ce = 1;
|
wolffd@0
|
246 end
|
wolffd@0
|
247 if option.as
|
wolffd@0
|
248 y = mirautocor(orig,...
|
wolffd@0
|
249 'Min',option.mi,'Hz','Max',option.ma,'Hz');
|
wolffd@0
|
250 end
|
wolffd@0
|
251 if option.ce
|
wolffd@0
|
252 ce = mircepstrum(orig,'freq',...
|
wolffd@0
|
253 'Min',option.mi,'Hz','Max',option.ma,'Hz');
|
wolffd@0
|
254 if option.as
|
wolffd@0
|
255 y = y*ce;
|
wolffd@0
|
256 else
|
wolffd@0
|
257 y = ce;
|
wolffd@0
|
258 end
|
wolffd@0
|
259 end
|
wolffd@0
|
260 else
|
wolffd@0
|
261 if option.ac
|
wolffd@0
|
262 x = orig;
|
wolffd@0
|
263 if not(strcmpi(option.filtertype,'NoFilterBank'))
|
wolffd@0
|
264 x = mirfilterbank(x,option.filtertype);
|
wolffd@0
|
265 end
|
wolffd@0
|
266 x = mirframenow(x,option);
|
wolffd@0
|
267 y = mirautocor(x,'Generalized',option.gener,...
|
wolffd@0
|
268 'Min',option.mi,'Hz','Max',option.ma,'Hz');
|
wolffd@0
|
269 if option.sum
|
wolffd@0
|
270 y = mirsummary(y);
|
wolffd@0
|
271 end
|
wolffd@0
|
272 y = mirautocor(y,'Enhanced',option.enh,'Freq');
|
wolffd@0
|
273 end
|
wolffd@0
|
274 if option.as || option.ce || option.s
|
wolffd@0
|
275 x = mirframenow(orig,option);
|
wolffd@0
|
276 y = mirspectrum(x);
|
wolffd@0
|
277 if option.as
|
wolffd@0
|
278 as = mirautocor(y,...
|
wolffd@0
|
279 'Min',option.mi,'Hz','Max',option.ma,'Hz');
|
wolffd@0
|
280 if option.ac
|
wolffd@0
|
281 y = y*as;
|
wolffd@0
|
282 else
|
wolffd@0
|
283 y = as;
|
wolffd@0
|
284 end
|
wolffd@0
|
285 end
|
wolffd@0
|
286 if option.ce
|
wolffd@0
|
287 ce = mircepstrum(y,'freq',...
|
wolffd@0
|
288 'Min',option.mi,'Hz','Max',option.ma,'Hz');
|
wolffd@0
|
289 if option.ac || option.as
|
wolffd@0
|
290 y = y*ce;
|
wolffd@0
|
291 else
|
wolffd@0
|
292 y = ce;
|
wolffd@0
|
293 end
|
wolffd@0
|
294 end
|
wolffd@0
|
295 end
|
wolffd@0
|
296 end
|
wolffd@0
|
297 end
|
wolffd@0
|
298 type = {'mirpitch',mirtype(y)};
|
wolffd@0
|
299
|
wolffd@0
|
300
|
wolffd@0
|
301 function o = main(x,option,postoption)
|
wolffd@0
|
302 if option.multi && option.m == 1
|
wolffd@0
|
303 option.m = Inf;
|
wolffd@0
|
304 end
|
wolffd@0
|
305 if option.mono && option.m == Inf
|
wolffd@0
|
306 option.m = 1;
|
wolffd@0
|
307 end
|
wolffd@0
|
308 if iscell(x)
|
wolffd@0
|
309 x = x{1};
|
wolffd@0
|
310 end
|
wolffd@0
|
311 if not(isa(x,'mirpitch'))
|
wolffd@0
|
312 x = mirpeaks(x,'Total',option.m,'Track',option.track,...
|
wolffd@0
|
313 'Contrast',option.thr,'Threshold',.4,...
|
wolffd@0
|
314 'Reso',option.reso,'NoBegin','NoEnd',...
|
wolffd@0
|
315 'Order',option.order);
|
wolffd@0
|
316 end
|
wolffd@0
|
317 if isa(x,'mirscalar')
|
wolffd@0
|
318 pf = get(x,'Data');
|
wolffd@0
|
319 else
|
wolffd@0
|
320 pf = get(x,'PeakPrecisePos');
|
wolffd@0
|
321 pa = get(x,'PeakPreciseVal');
|
wolffd@0
|
322 end
|
wolffd@0
|
323 fp = get(x,'FramePos');
|
wolffd@0
|
324 if option.stable(1) < Inf
|
wolffd@0
|
325 for i = 1:length(pf)
|
wolffd@0
|
326 for j = 1:length(pf{i})
|
wolffd@0
|
327 for k = 1:size(pf{i}{j},3)
|
wolffd@0
|
328 for l = size(pf{i}{j},2):-1:option.stable(2)+1
|
wolffd@0
|
329 for m = length(pf{i}{j}{1,l,k}):-1:1
|
wolffd@0
|
330 found = 0;
|
wolffd@0
|
331 for h = 1:option.stable(2)
|
wolffd@0
|
332 for n = 1:length(pf{i}{j}{1,l-h,k})
|
wolffd@0
|
333 if abs(log10(pf{i}{j}{1,l,k}(m) ...
|
wolffd@0
|
334 /pf{i}{j}{1,l-h,k}(n))) ...
|
wolffd@0
|
335 < option.stable(1)
|
wolffd@0
|
336 found = 1;
|
wolffd@0
|
337 end
|
wolffd@0
|
338 end
|
wolffd@0
|
339 end
|
wolffd@0
|
340 if not(found)
|
wolffd@0
|
341 pf{i}{j}{1,l,k}(m) = [];
|
wolffd@0
|
342 end
|
wolffd@0
|
343 end
|
wolffd@0
|
344 pf{i}{j}{1,1,k} = zeros(1,0);
|
wolffd@0
|
345 end
|
wolffd@0
|
346 end
|
wolffd@0
|
347 end
|
wolffd@0
|
348 end
|
wolffd@0
|
349 end
|
wolffd@0
|
350 if option.median
|
wolffd@0
|
351 sr = get(x,'Sampling');
|
wolffd@0
|
352 for i = 1:length(pf)
|
wolffd@0
|
353 for j = 1:length(pf{i})
|
wolffd@0
|
354 if size(fp{i}{j},2) > 1
|
wolffd@0
|
355 npf = zeros(size(pf{i}{j}));
|
wolffd@0
|
356 for k = 1:size(pf{i}{j},3)
|
wolffd@0
|
357 for l = 1:size(pf{i}{j},2)
|
wolffd@0
|
358 if isempty(pf{i}{j}{1,l,k})
|
wolffd@0
|
359 npf(1,l,k) = NaN;
|
wolffd@0
|
360 else
|
wolffd@0
|
361 npf(1,l,k) = pf{i}{j}{1,l,k}(1);
|
wolffd@0
|
362 end
|
wolffd@0
|
363 end
|
wolffd@0
|
364 end
|
wolffd@0
|
365 pf{i}{j} = medfilt1(npf,...
|
wolffd@0
|
366 round(option.median/(fp{i}{j}(1,2)-fp{i}{j}(1,1))));
|
wolffd@0
|
367 end
|
wolffd@0
|
368 end
|
wolffd@0
|
369 end
|
wolffd@0
|
370 end
|
wolffd@0
|
371 if isa(x,'mirscalar')
|
wolffd@0
|
372 p.amplitude = 0;
|
wolffd@0
|
373 else
|
wolffd@0
|
374 p.amplitude = pa;
|
wolffd@0
|
375 end
|
wolffd@0
|
376 s = mirscalar(x,'Data',pf,'Title','Pitch','Unit','Hz');
|
wolffd@0
|
377 p = class(p,'mirpitch',s);
|
wolffd@0
|
378 o = {p,x}; |