Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirpitch/mirpitch.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function varargout = mirpitch(orig,varargin) | |
2 % p = mirpitch(x) evaluates the pitch frequencies (in Hz). | |
3 % Specification of the method(s) for pitch estimation (these methods can | |
4 % be combined): | |
5 % mirpitch(...,'Autocor') computes an autocorrelation function | |
6 % (Default method) | |
7 % mirpitch(...'Enhanced',a) computes enhanced autocorrelation | |
8 % (see help mirautocor) | |
9 % toggled on by default | |
10 % mirpitch(...,'Compress',k) performs magnitude compression | |
11 % (see help mirautocor) | |
12 % mirpitch(...,fb) specifies a type of filterbank. | |
13 % Possible values: | |
14 % fb = 'NoFilterBank': no filterbank decomposition | |
15 % fb = '2Channels' (default value) | |
16 % fb = 'Gammatone' | |
17 % mirpitch(...,'AutocorSpectrum') computes the autocorrelation of | |
18 % the FFT spectrum | |
19 % mirpitch(...,'Cepstrum') computes the cepstrum | |
20 % Alternatively, an autocorrelation or a cepstrum can be directly | |
21 % given as first argument of the mirpitch function. | |
22 % Peak picking options: | |
23 % mirpitch(...,'Total',m) selects the m best pitches. | |
24 % Default value: m = Inf, no limit is set concerning the number | |
25 % of pitches to be detected. | |
26 % mirpitch(...,'Mono') corresponds to morpitch(...,'Total',1) | |
27 % mirpitch(...,'Min',mi) indicates the lowest frequency taken into | |
28 % consideration. | |
29 % Default value: 75 Hz. (Praat) | |
30 % mirpitch(...,'Max',ma) indicates the highest frequency taken into | |
31 % consideration. | |
32 % Default value: 2400 Hz. Because there seems to be some problems | |
33 % with higher frequency, due probably to the absence of | |
34 % pre-whitening in our implementation of Tolonen and Karjalainen | |
35 % approach (used by default, cf. below). | |
36 % mirpitch(...,'Contrast',thr) specifies a threshold value. | |
37 % (see help peaks) | |
38 % Default value: thr = .1 | |
39 % mirpitch(...,'Order',o) specifies the ordering for the peak picking. | |
40 % Default value: o = 'Amplitude'. | |
41 % Alternatively, the result of a mirpeaks computation can be directly | |
42 % given as first argument of the mirpitch function. | |
43 % Post-processing options: | |
44 % mirpitch(...,'Sum','no') does not sum back the channels at the end | |
45 % of the computation. The resulting pitch information remains | |
46 % therefore decomposed into several channels. | |
47 % mirpitch(...,'Median') performs a median filtering of the pitch | |
48 % curve. When several pitches are extracted in each frame, the | |
49 % pitch curve contains the best peak of each successive frame. | |
50 % mirpitch(...,'Stable',th,n) remove pitch values when the difference | |
51 % (or more precisely absolute logarithmic quotient) with the | |
52 % n precedent frames exceeds the threshold th. | |
53 % if th is not specified, the default value .1 is used | |
54 % if n is not specified, the default value 3 is used | |
55 % mirpitch(...'Reso',r) removes peaks whose distance to one or | |
56 % several higher peaks is lower than a given threshold. | |
57 % Possible value for the threshold r: | |
58 % 'SemiTone': ratio between the two peak positions equal to | |
59 % 2^(1/12) | |
60 % mirpitch(...,'Frame',l,h) orders a frame decomposition of window | |
61 % length l (in seconds) and hop factor h, expressed relatively to | |
62 % the window length. For instance h = 1 indicates no overlap. | |
63 % Default values: l = 46.4 ms and h = 10 ms (Tolonen and | |
64 % Karjalainen, 2000) | |
65 % Preset model: | |
66 % mirpitch(...,'Tolonen') implements (part of) the model proposed in | |
67 % (Tolonen & Karjalainen, 2000). It is equivalent to | |
68 % mirpitch(...,'Enhanced',2:10,'Generalized',.67,'2Channels') | |
69 % [p,a] = mirpitch(...) also displays the result of the method chosen for | |
70 % pitch estimation, and shows in particular the peaks corresponding | |
71 % to the pitch values. | |
72 % p = mirpitch(f,a,<r>) creates a mirpitch object based on the frequencies | |
73 % specified in f and the related amplitudes specified in a, using a | |
74 % frame sampling rate of r Hz (set by default to 100 Hz). | |
75 % | |
76 % T. Tolonen, M. Karjalainen, "A Computationally Efficient Multipitch | |
77 % Analysis Model", IEEE TRANSACTIONS ON SPEECH AND AUDIO PROCESSING, | |
78 % VOL. 8, NO. 6, NOVEMBER 2000 | |
79 | |
80 ac.key = 'Autocor'; | |
81 ac.type = 'Boolean'; | |
82 ac.default = 0; | |
83 option.ac = ac; | |
84 | |
85 enh.key = 'Enhanced'; | |
86 enh.type = 'Integer'; | |
87 enh.default = 2:10; | |
88 option.enh = enh; | |
89 | |
90 filtertype.type = 'String'; | |
91 filtertype.choice = {'NoFilterBank','2Channels','Gammatone'}; | |
92 filtertype.default = '2Channels'; | |
93 option.filtertype = filtertype; | |
94 | |
95 gener.key = {'Generalized','Compress'}; | |
96 gener.type = 'Integer'; | |
97 gener.default = .5; | |
98 option.gener = gener; | |
99 | |
100 as.key = 'AutocorSpectrum'; | |
101 as.type = 'Boolean'; | |
102 as.default = 0; | |
103 option.as = as; | |
104 | |
105 s.key = 'Spectrum'; | |
106 s.type = 'Boolean'; | |
107 s.default = 0; | |
108 option.s = s; | |
109 | |
110 ce.key = 'Cepstrum'; | |
111 ce.type = 'Boolean'; | |
112 ce.default = 0; | |
113 option.ce = ce; | |
114 | |
115 %% peak picking options | |
116 | |
117 m.key = 'Total'; | |
118 m.type = 'Integer'; | |
119 m.default = Inf; | |
120 option.m = m; | |
121 | |
122 multi.key = 'Multi'; | |
123 multi.type = 'Boolean'; | |
124 multi.default = 0; | |
125 option.multi = multi; | |
126 | |
127 mono.key = 'Mono'; | |
128 mono.type = 'Boolean'; | |
129 mono.default = 0; | |
130 option.mono = mono; | |
131 | |
132 mi.key = 'Min'; | |
133 mi.type = 'Integer'; | |
134 mi.default = 75; | |
135 option.mi = mi; | |
136 | |
137 ma.key = 'Max'; | |
138 ma.type = 'Integer'; | |
139 ma.default = 2400; | |
140 option.ma = ma; | |
141 | |
142 thr.key = 'Contrast'; | |
143 thr.type = 'Integer'; | |
144 thr.default = .1; | |
145 option.thr = thr; | |
146 | |
147 order.key = 'Order'; | |
148 order.type = 'String'; | |
149 order.choice = {'Amplitude','Abscissa'}; | |
150 order.default = 'Amplitude'; | |
151 option.order = order; | |
152 | |
153 reso.key = 'Reso'; | |
154 reso.type = 'String'; | |
155 reso.choice = {0,'SemiTone'}; | |
156 reso.default = 0; | |
157 option.reso = reso; | |
158 | |
159 track.key = 'Track'; % Not used yet | |
160 track.type = 'Boolean'; | |
161 track.default = 0; | |
162 option.track = track; | |
163 | |
164 %% post-processing options | |
165 | |
166 stable.key = 'Stable'; | |
167 stable.type = 'Integer'; | |
168 stable.number = 2; | |
169 stable.default = [Inf 0]; | |
170 stable.keydefault = [.1 3]; | |
171 option.stable = stable; | |
172 | |
173 median.key = 'Median'; | |
174 median.type = 'Integer'; | |
175 median.default = 0; | |
176 median.keydefault = .1; | |
177 option.median = median; | |
178 | |
179 frame.key = 'Frame'; | |
180 frame.type = 'Integer'; | |
181 frame.number = 2; | |
182 frame.default = [0 0]; | |
183 frame.keydefault = [NaN NaN]; | |
184 option.frame = frame; | |
185 | |
186 sum.key = 'Sum'; | |
187 sum.type = 'Boolean'; | |
188 sum.default = 1; | |
189 option.sum = sum; | |
190 | |
191 %% preset model | |
192 | |
193 tolo.key = 'Tolonen'; | |
194 tolo.type = 'Boolean'; | |
195 tolo.default = 0; | |
196 option.tolo = tolo; | |
197 | |
198 specif.option = option; | |
199 specif.chunkframebefore = 1; | |
200 | |
201 if isnumeric(orig) | |
202 if nargin<3 | |
203 f = 100; | |
204 else | |
205 f = varargin{2}; | |
206 end | |
207 fp = (0:size(orig,1)-1)/f; | |
208 fp = [fp;fp+1/f]; | |
209 p.amplitude = {{varargin{1}'}}; | |
210 s = mirscalar([],'Data',{{orig'}},'Title','Pitch','Unit','Hz',... | |
211 'FramePos',{{fp}},'Sampling',f,'Name',{inputname(1)}); | |
212 p = class(p,'mirpitch',s); | |
213 varargout = {p}; | |
214 else | |
215 varargout = mirfunction(@mirpitch,orig,varargin,nargout,specif,@init,@main); | |
216 end | |
217 | |
218 | |
219 | |
220 function [y type] = init(orig,option) | |
221 if option.tolo | |
222 option.enh = 2:10; | |
223 option.gener = .67; | |
224 option.filtertype = '2Channels'; | |
225 end | |
226 if not(option.ac) && not(option.as) && not(option.ce) && not(option.s) | |
227 option.ac = 1; | |
228 end | |
229 if isnan(option.frame.length.val) | |
230 option.frame.length.val = .0464; | |
231 end | |
232 if isnan(option.frame.hop.val) | |
233 option.frame.hop.val = .01; | |
234 option.frame.hop.unit = 's'; | |
235 end | |
236 if isamir(orig,'mirscalar') || haspeaks(orig) | |
237 y = orig; | |
238 else | |
239 if isamir(orig,'mirautocor') | |
240 y = mirautocor(orig,'Min',option.mi,'Hz','Max',option.ma,'Hz','Freq'); | |
241 elseif isamir(orig,'mircepstrum') | |
242 y = orig; | |
243 elseif isamir(orig,'mirspectrum') | |
244 if not(option.as) && not(option.ce) && not(option.s) | |
245 option.ce = 1; | |
246 end | |
247 if option.as | |
248 y = mirautocor(orig,... | |
249 'Min',option.mi,'Hz','Max',option.ma,'Hz'); | |
250 end | |
251 if option.ce | |
252 ce = mircepstrum(orig,'freq',... | |
253 'Min',option.mi,'Hz','Max',option.ma,'Hz'); | |
254 if option.as | |
255 y = y*ce; | |
256 else | |
257 y = ce; | |
258 end | |
259 end | |
260 else | |
261 if option.ac | |
262 x = orig; | |
263 if not(strcmpi(option.filtertype,'NoFilterBank')) | |
264 x = mirfilterbank(x,option.filtertype); | |
265 end | |
266 x = mirframenow(x,option); | |
267 y = mirautocor(x,'Generalized',option.gener,... | |
268 'Min',option.mi,'Hz','Max',option.ma,'Hz'); | |
269 if option.sum | |
270 y = mirsummary(y); | |
271 end | |
272 y = mirautocor(y,'Enhanced',option.enh,'Freq'); | |
273 end | |
274 if option.as || option.ce || option.s | |
275 x = mirframenow(orig,option); | |
276 y = mirspectrum(x); | |
277 if option.as | |
278 as = mirautocor(y,... | |
279 'Min',option.mi,'Hz','Max',option.ma,'Hz'); | |
280 if option.ac | |
281 y = y*as; | |
282 else | |
283 y = as; | |
284 end | |
285 end | |
286 if option.ce | |
287 ce = mircepstrum(y,'freq',... | |
288 'Min',option.mi,'Hz','Max',option.ma,'Hz'); | |
289 if option.ac || option.as | |
290 y = y*ce; | |
291 else | |
292 y = ce; | |
293 end | |
294 end | |
295 end | |
296 end | |
297 end | |
298 type = {'mirpitch',mirtype(y)}; | |
299 | |
300 | |
301 function o = main(x,option,postoption) | |
302 if option.multi && option.m == 1 | |
303 option.m = Inf; | |
304 end | |
305 if option.mono && option.m == Inf | |
306 option.m = 1; | |
307 end | |
308 if iscell(x) | |
309 x = x{1}; | |
310 end | |
311 if not(isa(x,'mirpitch')) | |
312 x = mirpeaks(x,'Total',option.m,'Track',option.track,... | |
313 'Contrast',option.thr,'Threshold',.4,... | |
314 'Reso',option.reso,'NoBegin','NoEnd',... | |
315 'Order',option.order); | |
316 end | |
317 if isa(x,'mirscalar') | |
318 pf = get(x,'Data'); | |
319 else | |
320 pf = get(x,'PeakPrecisePos'); | |
321 pa = get(x,'PeakPreciseVal'); | |
322 end | |
323 fp = get(x,'FramePos'); | |
324 if option.stable(1) < Inf | |
325 for i = 1:length(pf) | |
326 for j = 1:length(pf{i}) | |
327 for k = 1:size(pf{i}{j},3) | |
328 for l = size(pf{i}{j},2):-1:option.stable(2)+1 | |
329 for m = length(pf{i}{j}{1,l,k}):-1:1 | |
330 found = 0; | |
331 for h = 1:option.stable(2) | |
332 for n = 1:length(pf{i}{j}{1,l-h,k}) | |
333 if abs(log10(pf{i}{j}{1,l,k}(m) ... | |
334 /pf{i}{j}{1,l-h,k}(n))) ... | |
335 < option.stable(1) | |
336 found = 1; | |
337 end | |
338 end | |
339 end | |
340 if not(found) | |
341 pf{i}{j}{1,l,k}(m) = []; | |
342 end | |
343 end | |
344 pf{i}{j}{1,1,k} = zeros(1,0); | |
345 end | |
346 end | |
347 end | |
348 end | |
349 end | |
350 if option.median | |
351 sr = get(x,'Sampling'); | |
352 for i = 1:length(pf) | |
353 for j = 1:length(pf{i}) | |
354 if size(fp{i}{j},2) > 1 | |
355 npf = zeros(size(pf{i}{j})); | |
356 for k = 1:size(pf{i}{j},3) | |
357 for l = 1:size(pf{i}{j},2) | |
358 if isempty(pf{i}{j}{1,l,k}) | |
359 npf(1,l,k) = NaN; | |
360 else | |
361 npf(1,l,k) = pf{i}{j}{1,l,k}(1); | |
362 end | |
363 end | |
364 end | |
365 pf{i}{j} = medfilt1(npf,... | |
366 round(option.median/(fp{i}{j}(1,2)-fp{i}{j}(1,1)))); | |
367 end | |
368 end | |
369 end | |
370 end | |
371 if isa(x,'mirscalar') | |
372 p.amplitude = 0; | |
373 else | |
374 p.amplitude = pa; | |
375 end | |
376 s = mirscalar(x,'Data',pf,'Title','Pitch','Unit','Hz'); | |
377 p = class(p,'mirpitch',s); | |
378 o = {p,x}; |