Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirspectrum/mirspectrum.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function varargout = mirspectrum(orig,varargin) | |
2 % s = mirspectrum(x) computes the spectrum of the audio signal x, showing | |
3 % the distribution of the energy along the frequencies. | |
4 % (x can be the name of an audio file as well.) | |
5 % Optional argument: | |
6 % mirspectrum(...,'Frame',l,h) computes spectrogram, using frames of | |
7 % length l seconds and a hop rate h. | |
8 % Default values: l = .05 s, h = .5. | |
9 % mirspectrum(...,'Min',mi) indicates the lowest frequency taken into | |
10 % consideration, expressed in Hz. | |
11 % Default value: 0 Hz. | |
12 % mirspectrum(...,'Max',ma) indicates the highest frequency taken into | |
13 % consideration, expressed in Hz. | |
14 % Default value: the maximal possible frequency, corresponding to | |
15 % the sampling rate of x divided by 2. | |
16 % mirspectrum(...,'Window',w): windowing method: | |
17 % either w = 0 (no windowing) or any windowing function proposed | |
18 % in the Signal Processing Toolbox (help window). | |
19 % default value: w = 'hamming' | |
20 % | |
21 % mirspectrum(...,'Cents'): decomposes the energy in cents. | |
22 % mirspectrum(...,'Collapsed'): collapses the spectrum into one | |
23 % octave divided into 1200 cents. | |
24 % Redistribution of the frequencies into bands: | |
25 % mirspectrum(...,'Mel'): Mel bands. | |
26 % (Requires the Auditory Toolbox.) | |
27 % mirspectrum(...,'Bark'): Bark bands. | |
28 % (Code based on Pampalk's MA toolbox). | |
29 % If the audio signal was frame decomposed, the output s is a | |
30 % band-decomposed spectrogram. It is then possible to compute | |
31 % the spectrum of the temporal signal in each band, | |
32 % using the following syntax: | |
33 % mirspectrum(s,'AlongBands') | |
34 % This corresponds to fluctuation (cf. mirfluctuation). | |
35 % mirspectrum(...,'Mask'): Models masking phenomena in each band. | |
36 % (Code based on Pampalk's MA toolbox). | |
37 % mirspectrum(...,'Normal'): normalizes with respect to energy. | |
38 % mirspectrum(...,'NormalInput'): input signal is normalized from 0 to 1. | |
39 % mirspectrum(...,'Power'): squares the energy. | |
40 % mirspectrum(...,'dB'): represents the spectrum energy in decibel scale. | |
41 % mirspectrum(...,'dB',th): keeps only the highest energy over a | |
42 % range of th dB. | |
43 % mirspectrum(...,'Terhardt'): modulates the energy following | |
44 % (Terhardt, 1979) outer ear model. (Code based on Pampalk's MA | |
45 % toolbox). | |
46 % mirspectrum(...,'Resonance',r): multiplies the spectrum with a | |
47 % resonance curve. Possible value for r: | |
48 % 'ToiviainenSnyder': highlights best perceived meter | |
49 % (Toiviainen & Snyder 2003) | |
50 % (default value for spectrum of envelopes) | |
51 % 'Fluctuation': fluctuation strength (Fastl 1982) | |
52 % (default value for spectrum of band channels) | |
53 % mirspectrum(...,'Prod',m): Enhances components that have harmonics | |
54 % located at multiples of range(s) m of the signal's fundamental | |
55 % frequency. Computed by compressing the signal by the list of | |
56 % factors m, and by multiplying all the results with the original | |
57 % signa (Alonso et al, 2003). | |
58 % mirspectrum(...,'Sum',s): Similar option using summation instead of | |
59 % product. | |
60 % | |
61 % mirspectrum(...,'MinRes',mr): Indicates a minimal accepted | |
62 % frequency resolution, in Hz. The audio signal is zero-padded in | |
63 % order to reach the desired resolution. | |
64 % If the 'Mel' option is toggled on, 'MinRes' is set by default | |
65 % to 66 Hz. | |
66 % mirspectrum(...,'MinRes',mr,'OctaveRatio',tol): Indicates the | |
67 % minimal accepted resolution in terms of number of divisions of | |
68 % the octave. Low frequencies are ignored in order to reach the | |
69 % desired resolution. | |
70 % The corresponding required frequency resolution is equal to | |
71 % the difference between the first frequency bins, multiplied | |
72 % by the constraining multiplicative factor tol (set by | |
73 % default to .75). | |
74 % mirspectrum(...,'Res',r): Indicates the required precise frequency | |
75 % resolution, in Hz. The audio signal is zero-padded in order to | |
76 % reach the desired resolution. | |
77 % mirspectrum(...,'Length',l): Specifies the length of the FFT, | |
78 % overriding the FFT length initially planned. | |
79 % mirspectrum(...,'ZeroPad',s): Zero-padding of s samples. | |
80 % mirspectrum(...,'WarningRes',s): Indicates a required frequency | |
81 % resolution, in Hz, for the input signal. If the resolution does | |
82 % not reach that prerequisite, a warning is displayed. | |
83 % mirspectrum(...,'ConstantQ',nb): Carries out a Constant Q Transform | |
84 % instead of a FFT, with a number of bins per octave fixed to nb. | |
85 % Default value for nb: 12 bins per octave. | |
86 % | |
87 % mirspectrum(...,'Smooth',o): smooths the envelope using a movering | |
88 % average of order o. | |
89 % Default value when the option is toggled on: o=10 | |
90 % mirspectrum(...,'Gauss',o): smooths the envelope using a gaussian | |
91 % of standard deviation o samples. | |
92 % Default value when the option is toggled on: o=10 | |
93 % mirspectrum(...,'Phase',0): do not compute the FFT phase. | |
94 | |
95 win.key = 'Window'; | |
96 win.type = 'String'; | |
97 win.default = 'hamming'; | |
98 option.win = win; | |
99 | |
100 min.key = 'Min'; | |
101 min.type = 'Integer'; | |
102 min.default = 0; | |
103 option.min = min; | |
104 | |
105 max.key = 'Max'; | |
106 max.type = 'Integer'; | |
107 max.default = Inf; | |
108 option.max = max; | |
109 | |
110 mr.key = 'MinRes'; | |
111 mr.type = 'Integer'; | |
112 mr.default = 0; | |
113 option.mr = mr; | |
114 | |
115 res.key = 'Res'; | |
116 res.type = 'Integer'; | |
117 res.default = NaN; | |
118 option.res = res; | |
119 | |
120 length.key = 'Length'; | |
121 length.type = 'Integer'; | |
122 length.default = NaN; | |
123 option.length = length; | |
124 | |
125 zp.key = 'ZeroPad'; | |
126 zp.type = 'Integer'; | |
127 zp.default = 0; | |
128 zp.keydefault = Inf; | |
129 option.zp = zp; | |
130 | |
131 wr.key = 'WarningRes'; | |
132 wr.type = 'Integer'; | |
133 wr.default = 0; | |
134 option.wr = wr; | |
135 | |
136 octave.key = 'OctaveRatio'; | |
137 octave.type = 'Boolean'; | |
138 octave.default = 0; | |
139 option.octave = octave; | |
140 | |
141 constq.key = 'ConstantQ'; | |
142 constq.type = 'Integer'; | |
143 constq.default = 0; | |
144 constq.keydefault = 12; | |
145 option.constq = constq; | |
146 | |
147 alongbands.key = 'AlongBands'; | |
148 alongbands.type = 'Boolean'; | |
149 alongbands.default = 0; | |
150 option.alongbands = alongbands; | |
151 | |
152 ni.key = 'NormalInput'; | |
153 ni.type = 'Boolean'; | |
154 ni.default = 0; | |
155 option.ni = ni; | |
156 | |
157 norm.key = 'Normal'; | |
158 norm.type = 'Integer'; | |
159 norm.default = 0; | |
160 norm.keydefault = 1; | |
161 norm.when = 'After'; | |
162 option.norm = norm; | |
163 | |
164 band.type = 'String'; | |
165 band.choice = {'Freq','Mel','Bark','Cents'}; | |
166 band.default = 'Freq'; | |
167 band.when = 'Both'; | |
168 option.band = band; | |
169 | |
170 nbbands.key = 'Bands'; | |
171 nbbands.type = 'Integer'; | |
172 nbbands.default = 0; | |
173 nbbands.when = 'After'; | |
174 option.nbbands = nbbands; | |
175 | |
176 mprod.key = 'Prod'; | |
177 mprod.type = 'Integers'; | |
178 mprod.default = []; | |
179 mprod.keydefault = 2:6; | |
180 mprod.when = 'After'; | |
181 option.mprod = mprod; | |
182 | |
183 msum.key = 'Sum'; | |
184 msum.type = 'Integers'; | |
185 msum.default = []; | |
186 msum.keydefault = 2:6; | |
187 msum.when = 'After'; | |
188 option.msum = msum; | |
189 | |
190 reso.key = 'Resonance'; | |
191 reso.type = 'String'; | |
192 reso.choice = {'ToiviainenSnyder','Fluctuation','Meter',0,'no','off'}; | |
193 reso.default = 0; | |
194 reso.when = 'After'; | |
195 option.reso = reso; | |
196 | |
197 log.key = 'log'; | |
198 log.type = 'Boolean'; | |
199 log.default = 0; | |
200 log.when = 'After'; | |
201 option.log = log; | |
202 | |
203 db.key = 'dB'; | |
204 db.type = 'Integer'; | |
205 db.default = 0; | |
206 db.keydefault = Inf; | |
207 db.when = 'After'; | |
208 option.db = db; | |
209 | |
210 pow.key = 'Power'; | |
211 pow.type = 'Boolean'; | |
212 pow.default = 0; | |
213 pow.when = 'After'; | |
214 option.pow = pow; | |
215 | |
216 terhardt.key = 'Terhardt'; | |
217 terhardt.type = 'Boolean'; | |
218 terhardt.default = 0; | |
219 terhardt.when = 'After'; | |
220 option.terhardt = terhardt; | |
221 | |
222 mask.key = 'Mask'; | |
223 mask.type = 'Boolean'; | |
224 mask.default = 0; | |
225 mask.when = 'After'; | |
226 option.mask = mask; | |
227 | |
228 % e.key = 'Enhanced'; | |
229 % e.type = 'Integer'; | |
230 % e.default = []; | |
231 % e.keydefault = 2:10; | |
232 % e.when = 'After'; | |
233 %option.e = e; | |
234 | |
235 collapsed.key = 'Collapsed'; | |
236 collapsed.type = 'Boolean'; | |
237 collapsed.default = 0; | |
238 collapsed.when = 'Both'; | |
239 option.collapsed = collapsed; | |
240 | |
241 aver.key = 'Smooth'; | |
242 aver.type = 'Integer'; | |
243 aver.default = 0; | |
244 aver.keydefault = 10; | |
245 aver.when = 'After'; | |
246 option.aver = aver; | |
247 | |
248 gauss.key = 'Gauss'; | |
249 gauss.type = 'Integer'; | |
250 gauss.default = 0; | |
251 gauss.keydefault = 10; | |
252 gauss.when = 'After'; | |
253 option.gauss = gauss; | |
254 | |
255 rapid.key = 'Rapid'; | |
256 rapid.type = 'Boolean'; | |
257 rapid.default = 0; | |
258 option.rapid = rapid; | |
259 | |
260 phase.key = 'Phase'; | |
261 phase.type = 'Boolean'; | |
262 phase.default = 1; | |
263 option.phase = phase; | |
264 | |
265 specif.option = option; | |
266 | |
267 specif.defaultframelength = 0.05; | |
268 specif.defaultframehop = 0.5; | |
269 | |
270 specif.eachchunk = @eachchunk; | |
271 specif.combinechunk = @combinechunk; | |
272 | |
273 if isamir(orig,'mirscalar') || isamir(orig,'mirenvelope') | |
274 specif.nochunk = 1; | |
275 end | |
276 | |
277 varargout = mirfunction(@mirspectrum,orig,varargin,nargout,specif,@init,@main); | |
278 | |
279 | |
280 function [x type] = init(x,option) | |
281 type = 'mirspectrum'; | |
282 | |
283 | |
284 function s = main(orig,option,postoption) | |
285 if isstruct(option) | |
286 if option.collapsed | |
287 option.band = 'Cents'; | |
288 end | |
289 if isnan(option.res) && strcmpi(option.band,'Cents') && option.min | |
290 option.res = option.min *(2^(1/1200)-1)*.9; | |
291 end | |
292 end | |
293 if not(isempty(postoption)) | |
294 if not(strcmpi(postoption.band,'Freq') && isempty(postoption.msum) ... | |
295 || isempty(postoption.mprod)) || postoption.log || postoption.db ... | |
296 || postoption.pow || postoption.mask || postoption.collapsed ... | |
297 || postoption.aver || postoption.gauss | |
298 option.phase = 0; | |
299 end | |
300 end | |
301 if iscell(orig) | |
302 orig = orig{1}; | |
303 end | |
304 if isa(orig,'mirspectrum') && ... | |
305 not(isfield(option,'alongbands') && option.alongbands) | |
306 s = orig; | |
307 if isfield(option,'min') && ... | |
308 (option.min || iscell(option.max) || option.max < Inf) | |
309 magn = get(s,'Magnitude'); | |
310 freq = get(s,'Frequency'); | |
311 for k = 1:length(magn) | |
312 m = magn{k}; | |
313 f = freq{k}; | |
314 if iscell(option.max) | |
315 mi = option.min{k}; | |
316 ma = option.max{k}; | |
317 else | |
318 mi = option.min; | |
319 ma = option.max; | |
320 end | |
321 if not(iscell(m)) | |
322 m = {m}; | |
323 f = {f}; | |
324 end | |
325 for l = 1:length(m) | |
326 range = find(and(f{l}(:,1) >= mi,f{l}(:,1) <= ma)); | |
327 m{l} = m{l}(range,:,:); | |
328 f{l} = f{l}(range,:,:); | |
329 end | |
330 magn{k} = m; | |
331 freq{k} = f; | |
332 end | |
333 s = set(s,'Magnitude',magn,'Frequency',freq); | |
334 end | |
335 if not(isempty(postoption)) && not(isequal(postoption,0)) | |
336 s = post(s,postoption); | |
337 end | |
338 elseif ischar(orig) | |
339 s = mirspectrum(miraudio(orig),option,postoption); | |
340 else | |
341 if nargin == 0 | |
342 orig = []; | |
343 end | |
344 s.phase = []; | |
345 s.log = 0; | |
346 s.xscale = 'Freq'; | |
347 s.pow = 1; | |
348 s = class(s,'mirspectrum',mirdata(orig)); | |
349 s = purgedata(s); | |
350 s = set(s,'Title','Spectrum','Abs','frequency (Hz)','Ord','magnitude'); | |
351 %disp('Computing spectrum...') | |
352 sig = get(orig,'Data'); | |
353 t = get(orig,'Pos'); | |
354 if isempty(t) | |
355 t = get(orig,'FramePos'); | |
356 for k = 1:length(sig) | |
357 for l = 1:length(sig{k}) | |
358 sig{k}{l} = sig{k}{l}'; | |
359 t{k}{l} = t{k}{l}(1,:,:)'; | |
360 end | |
361 end | |
362 end | |
363 fs = get(orig,'Sampling'); | |
364 fp = get(orig,'FramePos'); | |
365 m = cell(1,length(sig)); | |
366 p = cell(1,length(sig)); | |
367 f = cell(1,length(sig)); | |
368 for i = 1:length(sig) | |
369 d = sig{i}; | |
370 fpi = fp{i}; | |
371 if not(iscell(d)) | |
372 d = {d}; | |
373 end | |
374 if option.alongbands | |
375 fsi = 1 / (fpi{1}(1,2) - fpi{1}(1,1)); | |
376 else | |
377 fsi = fs{i}; | |
378 end | |
379 mi = cell(1,length(d)); | |
380 phi = cell(1,length(d)); | |
381 fi = cell(1,length(d)); | |
382 for J = 1:length(d) | |
383 dj = d{J}; | |
384 if option.ni | |
385 mxdj = repmat(max(dj),[size(dj,1),1,1]); | |
386 mndj = repmat(min(dj),[size(dj,1),1,1]); | |
387 dj = (dj-mndj)./(mxdj-mndj); | |
388 end | |
389 if option.alongbands | |
390 if size(dj,1)>1 | |
391 error('ERROR IN MIRSPECTRUM: ''AlongBands'' is restricted to spectrum decomposed into bands, such as ''Mel'' and ''Bark''.') | |
392 end | |
393 dj = reshape(dj,[size(dj,2),1,size(dj,3)]); | |
394 fp{i}{J} = fp{i}{J}([1;end]); | |
395 end | |
396 | |
397 if option.constq | |
398 % Constant Q Transform | |
399 r = 2^(1/option.constq); | |
400 Q = 1 / (r - 1); | |
401 f_max = min(fsi/2,option.max); | |
402 f_min = option.min; | |
403 if not(f_min) | |
404 f_min = 16.3516; | |
405 end | |
406 B = floor(log(f_max/f_min) / log(r)); % number of bins | |
407 N0 = round(Q*fsi/f_min); % maximum Nkcq | |
408 j2piQn = -j*2*pi*Q*(0:N0-1)'; | |
409 | |
410 fj = f_min * r.^(0:B-1)'; | |
411 transf = NaN(B,size(dj,2),size(dj,3)); | |
412 for kcq = 1:B | |
413 Nkcq = round(Q*fsi/fj(kcq)); | |
414 win = mirwindow(dj,option.win,Nkcq); | |
415 exq = repmat(exp(j2piQn(1:Nkcq)/Nkcq),... | |
416 [1,size(win,2),size(win,3)]); | |
417 transf(kcq,:) = sum(win.* exq) / Nkcq; | |
418 end | |
419 else | |
420 % FFT | |
421 dj = mirwindow(dj,option.win); | |
422 if option.zp | |
423 if option.zp < Inf | |
424 dj = [dj;zeros(option.zp,size(dj,2),size(dj,3))]; | |
425 else | |
426 dj = [dj;zeros(size(dj))]; | |
427 end | |
428 end | |
429 if isstruct(postoption) | |
430 if strcmpi(postoption.band,'Mel') && ... | |
431 (not(option.mr) || option.mr > 66) | |
432 option.mr = 66; | |
433 end | |
434 else | |
435 %warning('WARNING in MIRSPECTRUM (for debugging purposes): By default, minimum resolution specified.') | |
436 if not(option.mr) | |
437 option.mr = 66; | |
438 end | |
439 end | |
440 if option.octave | |
441 N = size(dj,1); | |
442 res = (2.^(1/option.mr)-1)*option.octave; | |
443 % Minimal freq ratio between 2 first bins. | |
444 % freq resolution should be > option.min * res | |
445 Nrec = fsi/(option.min*res); | |
446 % Recommended minimal sample length. | |
447 if Nrec > N | |
448 % If actual sample length is too small. | |
449 option.min = fsi/N / res; | |
450 warning('WARNING IN MIRSPECTRUM: The input signal is too short to obtain the desired octave resolution. Lowest frequencies will be ignored.'); | |
451 display(['(The recommended minimal input signal length would be ' num2str(Nrec/fsi) ' s.)']); | |
452 display(['New low frequency range: ' num2str(option.min) ' Hz.']); | |
453 end | |
454 N = 2^nextpow2(N); | |
455 elseif isnan(option.length) | |
456 if isnan(option.res) | |
457 N = size(dj,1); | |
458 if option.mr && N < fsi/option.mr | |
459 if option.wr && N < fsi/option.wr | |
460 warning('WARNING IN MIRSPECTRUM: The input signal is too short to obtain the desired frequency resolution. Performed zero-padding will not guarantee the quality of the results.'); | |
461 end | |
462 N = max(N,fsi/option.mr); | |
463 end | |
464 N = 2^nextpow2(N); | |
465 else | |
466 N = ceil(fsi/option.res); | |
467 end | |
468 else | |
469 N = option.length; | |
470 end | |
471 | |
472 % Here is the spectrum computation itself | |
473 transf = fft(dj,N); | |
474 | |
475 len = ceil(size(transf,1)/2); | |
476 fj = fsi/2 * linspace(0,1,len)'; | |
477 if option.max | |
478 maxf = find(fj>=option.max,1); | |
479 if isempty(maxf) | |
480 maxf = len; | |
481 end | |
482 else | |
483 maxf = len; | |
484 end | |
485 if option.min | |
486 minf = find(fj>=option.min,1); | |
487 if isempty(minf) | |
488 maxf = len; | |
489 end | |
490 else | |
491 minf = 1; | |
492 end | |
493 | |
494 transf = transf(minf:maxf,:,:); | |
495 fj = fj(minf:maxf); | |
496 end | |
497 | |
498 mi{J} = abs(transf); | |
499 if option.phase | |
500 phi{J} = angle(transf); | |
501 end | |
502 fi{J} = repmat(fj,[1,size(transf,2),size(transf,3)]); | |
503 end | |
504 if iscell(sig{i}) | |
505 m{i} = mi; | |
506 p{i} = phi; | |
507 f{i} = fi; | |
508 else | |
509 m{i} = mi{1}; | |
510 p{i} = phi{1}; | |
511 f{i} = fi{1}; | |
512 end | |
513 end | |
514 s = set(s,'Frequency',f,'Magnitude',m,'Phase',p,'FramePos',fp); | |
515 if not(isempty(postoption)) && isstruct(postoption) | |
516 s = post(s,postoption); | |
517 end | |
518 end | |
519 | |
520 | |
521 function s = post(s,option) | |
522 if option.collapsed | |
523 option.band = 'Cents'; | |
524 end | |
525 m = get(s,'Magnitude'); | |
526 f = get(s,'Frequency'); | |
527 for k = 1:length(m) | |
528 if not(iscell(m{k})) | |
529 m{k} = {m{k}}; | |
530 f{k} = {f{k}}; | |
531 end | |
532 end | |
533 if get(s,'Power') == 1 && ... | |
534 (option.pow || any(option.mprod) || any(option.msum)) | |
535 for h = 1:length(m) | |
536 for l = 1:length(m{k}) | |
537 m{h}{l} = m{h}{l}.^2; | |
538 end | |
539 end | |
540 s = set(s,'Power',2,'Title',['Power ',get(s,'Title')]); | |
541 end | |
542 if any(option.mprod) | |
543 for h = 1:length(m) | |
544 for l = 1:length(m{k}) | |
545 z0 = m{h}{l}; | |
546 z1 = z0; | |
547 for k = 1:length(option.mprod) | |
548 mpr = option.mprod(k); | |
549 if mpr | |
550 zi = ones(size(z0)); | |
551 zi(1:floor(end/mpr),:,:) = z0(mpr:mpr:end,:,:); | |
552 z1 = z1.*zi; | |
553 end | |
554 end | |
555 m{h}{l} = z1; | |
556 end | |
557 end | |
558 s = set(s,'Title','Spectral product'); | |
559 end | |
560 if any(option.msum) | |
561 for h = 1:length(m) | |
562 for l = 1:length(m{k}) | |
563 z0 = m{h}{l}; | |
564 z1 = z0; | |
565 for k = 1:length(option.msum) | |
566 mpr = option.msum(k); | |
567 if mpr | |
568 zi = ones(size(z0)); | |
569 zi(1:floor(end/mpr),:,:) = z0(mpr:mpr:end,:,:); | |
570 z1 = z1+zi; | |
571 end | |
572 end | |
573 m{h}{l} = z1; | |
574 end | |
575 end | |
576 s = set(s,'Title','Spectral sum'); | |
577 end | |
578 %for i = option.e % Enhancing procedure %%%% TO CHECK, t IS NOT DEFINED! | |
579 % for h = 1:length(m) | |
580 % for l = 1:length(m{k}) | |
581 % c = m{h}{l}; | |
582 % % option.e is the list of scaling factors | |
583 % % i is the scaling factor | |
584 % if i | |
585 % ic = zeros(size(c)); | |
586 % for g = 1:size(c,2) | |
587 % for h2 = 1:size(c,3) | |
588 % beg = find(t(:,g,h2)/i >= t(1,g,h2)) | |
589 % if not(isempty(beg)) | |
590 % ic(:,g,h2) = interp1(t(:,g,h2),c(:,g,h2),t(:,g,h2)/i); %,'cubic'); | |
591 % The scaled autocorrelation | |
592 % ic(1:beg(1)-1,g,h2) = 0; | |
593 % end | |
594 % end | |
595 % end | |
596 % ic(find(isnan(ic))) = Inf; % All the NaN values are changed into 0 in the resulting curve | |
597 % ic = max(ic,0); | |
598 % c = c - ic; % The scaled autocorrelation is substracted to the initial one | |
599 % c = max(c,0); % Half-wave rectification | |
600 %figure | |
601 %plot(c) | |
602 % end | |
603 % end | |
604 % end | |
605 %end | |
606 if option.norm | |
607 for k = 1:length(m) | |
608 for l = 1:length(m{k}) | |
609 mkl = m{k}{l}; | |
610 nkl = zeros(1,size(mkl,2),size(mkl,3)); | |
611 for kk = 1:size(mkl,2) | |
612 for ll = 1:size(mkl,3) | |
613 nkl(1,kk,l) = norm(mkl(:,kk,ll)); | |
614 end | |
615 end | |
616 m{k}{l} = mkl./repmat(nkl,[size(m{k}{k},1),1,1]); | |
617 end | |
618 end | |
619 end | |
620 if option.terhardt && not(isempty(find(f{1}{1}))) % This excludes the case where spectrum already along bands | |
621 % Code taken from Pampalk's MA Toolbox | |
622 for h = 1:length(m) | |
623 for l = 1:length(m{k}) | |
624 W_Adb = zeros(size(f{k}{l})); | |
625 W_Adb(2:size(f{k}{l},1),:,:) = ... | |
626 + 10.^((-3.64*(f{k}{l}(2:end,:,:)/1000).^-0.8 ... | |
627 + 6.5 * exp(-0.6 * (f{k}{l}(2:end,:,:)/1000 - 3.3).^2) ... | |
628 - 0.001*(f{k}{l}(2:end,:,:)/1000).^4)/20); | |
629 W_Adb = W_Adb.^2; | |
630 m{h}{l} = m{h}{l}.*W_Adb; | |
631 end | |
632 end | |
633 end | |
634 if option.reso | |
635 if not(ischar(option.reso)) | |
636 if strcmp(get(orig,'XScale'),'Mel') | |
637 option.reso = 'Fluctuation'; | |
638 else | |
639 option.reso = 'ToiviainenSnyder'; | |
640 end | |
641 end | |
642 %if strcmpi(option.reso,'ToiviainenSnyder') || ... | |
643 % strcmpi(option.reso,'Meter') || ... | |
644 % strcmpi(option.reso,'Fluctuation') | |
645 for k = 1:length(m) | |
646 for l = 1:length(m{k}) | |
647 if strcmpi(option.reso,'ToiviainenSnyder') || strcmpi(option.reso,'Meter') | |
648 w = max(0,... | |
649 1 - 0.25*(log2(max(1./max(f{k}{l},1e-12),1e-12)/0.5)).^2); | |
650 elseif strcmpi(option.reso,'Fluctuation') | |
651 w1 = f{k}{l} / 4; % ascending part of the fluctuation curve; | |
652 w2 = 1 - 0.3 * (f{k}{l} - 4)/6; % descending part; | |
653 w = min(w1,w2); | |
654 end | |
655 if max(w) == 0 | |
656 warning('The resonance curve, not defined for this range of delays, will not be applied.') | |
657 else | |
658 m{k}{l} = m{k}{l}.* w; | |
659 end | |
660 end | |
661 end | |
662 end | |
663 if strcmp(s.xscale,'Freq') | |
664 if strcmpi(option.band,'Mel') | |
665 % The following is largely based on the source code from Auditory Toolbox | |
666 % (A part that I could not call directly from MIRtoolbox) | |
667 | |
668 % (Malcolm Slaney, August 1993, (c) 1998 Interval Research Corporation) | |
669 | |
670 try | |
671 MakeERBFilters(1,1,1); % Just to be sure that the Auditory Toolbox is installed | |
672 catch | |
673 error('ERROR IN MIRFILTERBANK: Auditory Toolbox needs to be installed.'); | |
674 end | |
675 | |
676 %disp('Computing Mel-frequency spectral representation...') | |
677 lowestFrequency = 133.3333; | |
678 if not(option.nbbands) | |
679 option.nbbands = 40; | |
680 end | |
681 linearFilters = min(13,option.nbbands); | |
682 linearSpacing = 66.66666666; | |
683 logFilters = option.nbbands - linearFilters; | |
684 logSpacing = 1.0711703; | |
685 totalFilters = option.nbbands; | |
686 | |
687 % Figure the band edges. Interesting frequencies are spaced | |
688 % by linearSpacing for a while, then go logarithmic. First figure | |
689 % all the interesting frequencies. Lower, center, and upper band | |
690 % edges are all consequtive interesting frequencies. | |
691 freqs = lowestFrequency + (0:linearFilters-1)*linearSpacing; | |
692 freqs(linearFilters+1:totalFilters+2) = ... | |
693 freqs(linearFilters) * logSpacing.^(1:logFilters+2); | |
694 lower = freqs(1:totalFilters); | |
695 center = freqs(2:totalFilters+1); | |
696 upper = freqs(3:totalFilters+2); | |
697 | |
698 % Figure out the height of the triangle so that each filter has | |
699 % unit weight, assuming a triangular weighting function. | |
700 triangleHeight = 2./(upper-lower); | |
701 | |
702 e = cell(1,length(m)); | |
703 nch = cell(1,length(m)); | |
704 for h = 1:length(m) | |
705 e{h} = cell(1,length(m{h})); | |
706 for i = 1:length(m{h}) | |
707 mi = m{h}{i}; | |
708 fi = f{h}{i}(:,1,1); | |
709 fftSize = size(fi,1); | |
710 | |
711 % We now want to combine FFT bins and figure out | |
712 % each frequencies contribution | |
713 mfccFilterWeights = zeros(totalFilters,fftSize); | |
714 for chan=1:totalFilters | |
715 mfccFilterWeights(chan,:) = triangleHeight(chan).*... | |
716 ((fi > lower(chan) & fi <= center(chan)).* ... | |
717 (fi-lower(chan))/(center(chan)-lower(chan)) + ... | |
718 (fi > center(chan) & fi < upper(chan)).* ... | |
719 (upper(chan)-fi)/(upper(chan)-center(chan))); | |
720 end | |
721 if find(diff(not(sum(mfccFilterWeights,2)))==-1) | |
722 % If one channel has no weight whereas the higher one | |
723 % has a positive weight. | |
724 warning('WARNING in MIRSPECTRUM: The frequency resolution of the spectrum is too low for the Mel transformation. Some Mel components are undefined.') | |
725 display('Recommended frequency resolution: at least 66 Hz.') | |
726 end | |
727 e{h}{i} = zeros(1,size(mi,2),totalFilters); | |
728 for J = 1:size(mi,2) | |
729 if max(mi(:,J)) > 0 | |
730 fftData = zeros(fftSize,1); % Zero-padding ? | |
731 fftData(1:size(mi,1)) = mi(:,J); | |
732 p = mfccFilterWeights * fftData + 1e-16; | |
733 e{h}{i}(1,J,:) = reshape(p,[1,1,length(p)]); | |
734 end | |
735 end | |
736 f{h}{i} = zeros(1,size(mi,2),totalFilters); | |
737 end | |
738 nch{h} = (1:totalFilters)'; | |
739 end | |
740 m = e; | |
741 s = set(s,'XScale','Mel','Title','Mel-Spectrum','Abs','Mel bands','Channels',nch); | |
742 elseif strcmpi(option.band,'Bark') | |
743 sr = get(s,'Sampling'); | |
744 % Code taken from Pampalk's MA Toolbox. | |
745 %% zwicker & fastl: psychoacoustics 1999, page 159 | |
746 bark_upper = [10 20 30 40 51 63 77 92 108 127 148 172 200 232 270 315 370 440 530 640 770 950 1200 1550]*10; %% Hz | |
747 e = cell(1,length(m)); | |
748 nch = cell(1,length(m)); | |
749 for h = 1:length(m) | |
750 %% ignore critical bands outside of sampling rate range | |
751 cb = min(min([find(bark_upper>sr{h}/2),length(bark_upper)]),length(bark_upper)); | |
752 e{h} = cell(1,length(m{h})); | |
753 for i = 1:length(m{h}) | |
754 mi = sum(m{h}{i},3); | |
755 e{h}{i} = zeros(1,size(mi,2),cb); | |
756 k = 1; | |
757 for J=1:cb, %% group into bark bands | |
758 idx = find(f{h}{i}(k:end,1,1)<=bark_upper(J)); | |
759 idx = idx + k-1; | |
760 e{h}{i}(1,:,J) = sum(mi(idx,:,:),1); | |
761 k = max(idx)+1; | |
762 end | |
763 f{h}{i} = zeros(1,size(mi,2),cb); | |
764 end | |
765 nch{h} = (1:length(bark_upper))'; | |
766 end | |
767 m = e; | |
768 s = set(s,'XScale','Bark','Title','Bark-Spectrum','Abs','Bark bands','Channels',nch); | |
769 elseif strcmpi(option.band,'Cents') || option.collapsed | |
770 for h = 1:length(m) | |
771 for i = 1:length(m{h}) | |
772 mi = m{h}{i}; | |
773 fi = f{h}{i}; | |
774 isgood = fi(:,1,1)*(2^(1/1200)-1) >= fi(2,1,1)-fi(1,1,1); | |
775 good = find(isgood); | |
776 if isempty(good) | |
777 mirerror('mirspectrum',... | |
778 'The frequency resolution of the spectrum is too low to be decomposed into cents.'); | |
779 display('Hint: if you specify a minimum value for the frequency range (''Min'' option)'); | |
780 display(' and if you do not specify any frequency resolution (''Res'' option), '); | |
781 display(' the correct frequency resolution will be automatically chosen.'); | |
782 end | |
783 if good>1 | |
784 warning(['MIRSPECTRUM: Frequency resolution in cent is achieved for frequencies over ',... | |
785 num2str(floor(fi(good(1)))),' Hz. Lower frequencies are ignored.']) | |
786 display('Hint: if you specify a minimum value for the frequency range (''Min'' option)'); | |
787 display(' and if you do not specify any frequency resolution (''Res'' option), '); | |
788 display(' the correct frequency resolution will be automatically chosen.'); | |
789 end | |
790 fi = fi(good,:,:); | |
791 mi = mi(good,:,:); | |
792 f2cents = 440*2.^(1/1200*(-1200*10:1200*10-1)'); | |
793 % The frequencies corresponding to the cent range | |
794 cents = repmat((0:1199)',[20,size(fi,2),size(fi,3)]); | |
795 % The cent range is for the moment centered to A440 | |
796 octaves = ones(1200,1)*(-10:10); | |
797 octaves = repmat(octaves(:),[1,size(fi,2),size(fi,3)]); | |
798 select = find(f2cents>fi(1) & f2cents<fi(end)); | |
799 % Cent range actually used in the current spectrum | |
800 f2cents = repmat(f2cents(select),[1,size(fi,2),size(fi,3)]); | |
801 cents = repmat(cents(select),[1,size(fi,2),size(fi,3)]); | |
802 octaves = repmat(octaves(select),[1,size(fi,2),size(fi,3)]); | |
803 m{h}{i} = interp1(fi(:,1,1),mi,f2cents(:,1,1)); | |
804 f{h}{i} = octaves*1200+cents + 6900; | |
805 % Now the cent range is expressed in midicent | |
806 end | |
807 end | |
808 s = set(s,'Abs','pitch (in midicents)','XScale','Cents'); | |
809 end | |
810 end | |
811 if option.mask | |
812 if strcmp(s.xscale,'Freq') | |
813 warning('WARNING IN MIRSPECTRUM: ''Mask'' option available only for Mel-spectrum and Bark-spectrum.'); | |
814 disp('''Mask'' option ignored'); | |
815 else | |
816 nch = get(s,'Channels'); | |
817 for h = 1:length(m) | |
818 % Code taken from Pampalk's MA Toolbox. | |
819 %% spreading function: schroeder et al., 1979, JASA, optimizing | |
820 %% digital speech coders by exploiting masking properties of the human ear | |
821 cb = length(nch{h}); % Number of bands. | |
822 for i = 1:cb, | |
823 spread(i,:) = 10.^((15.81+7.5*((i-(1:cb))+0.474)-17.5*(1+((i-(1:cb))+0.474).^2).^0.5)/10); | |
824 end | |
825 for i = 1:length(m{h}) | |
826 for J = 1:size(m{h}{i},2) | |
827 mj = m{h}{i}(1,J,:); | |
828 mj = spread(1:length(mj),1:length(mj))*mj(:); | |
829 m{h}{i}(1,J,:) = reshape(mj,1,1,length(mj)); | |
830 end | |
831 end | |
832 end | |
833 end | |
834 end | |
835 if option.collapsed | |
836 for h = 1:length(m) | |
837 for i = 1:length(m{h}) | |
838 mi = m{h}{i}; | |
839 fi = f{h}{i}; | |
840 centclass = rem(fi(:,1,1),1200); | |
841 %neg = find(centclass<0); | |
842 %centclass(neg) = 1200 + centclass(neg); | |
843 m{h}{i} = NaN(1200,size(mi,2),size(mi,3)); | |
844 for k = 0:1199 | |
845 m{h}{i}(k+1,:,:) = sum(mi(find(centclass==k),:,:),1); | |
846 end | |
847 f{h}{i} = repmat((0:1199)',[1,size(fi,2),size(fi,3)]); | |
848 end | |
849 end | |
850 s = set(s,'Abs','Cents','XScale','Cents(Collapsed)'); | |
851 end | |
852 if option.log || option.db | |
853 if not(isa(s,'mirspectrum') && s.log) | |
854 if option.db | |
855 s = set(s,'log',10); | |
856 else | |
857 s = set(s,'log',1); | |
858 end | |
859 for k = 1:length(m) | |
860 if not(iscell(m{k})) | |
861 m{k} = {m{k}}; | |
862 f{k} = {f{k}}; | |
863 end | |
864 for l = 1:length(m{k}) | |
865 m{k}{l} = log10(m{k}{l}+1e-16); | |
866 if option.db | |
867 m{k}{l} = 10*m{k}{l}; | |
868 end | |
869 end | |
870 end | |
871 elseif isa(s,'mirspectrum') && s.log<10 | |
872 for k = 1:length(m) | |
873 for l = 1:length(m{k}) | |
874 m{k}{l} = 10*m{k}{l}; | |
875 end | |
876 end | |
877 end | |
878 if option.db>0 && option.db < Inf | |
879 for k = 1:length(m) | |
880 for l = 1:length(m{k}) | |
881 m{k}{l} = m{k}{l}-repmat(max(m{k}{l}),[size(m{k}{l},1) 1 1]); | |
882 m{k}{l} = option.db + max(-option.db,m{k}{l}); | |
883 end | |
884 end | |
885 end | |
886 end | |
887 if option.aver | |
888 for k = 1:length(m) | |
889 for i = 1:length(m{k}) | |
890 m{k}{i} = filter(ones(1,option.aver),1,m{k}{i}); | |
891 end | |
892 end | |
893 end | |
894 if option.gauss | |
895 for k = 1:length(m) | |
896 for i = 1:length(m{k}) | |
897 sigma = option.gauss; | |
898 gauss = 1/sigma/2/pi... | |
899 *exp(- (-4*sigma:4*sigma).^2 /2/sigma^2); | |
900 y = filter(gauss,1,[m{k}{i};zeros(4*sigma,1)]); | |
901 y = y(4*sigma:end,:,:); | |
902 m{k}{i} = y(1:size(m{k}{i},1),:,:); | |
903 end | |
904 end | |
905 end | |
906 s = set(s,'Magnitude',m,'Frequency',f); | |
907 | |
908 | |
909 function dj = mirwindow(dj,win,N) | |
910 if nargin<3 | |
911 N = size(dj,1); | |
912 elseif size(dj,1)<N | |
913 dj(N,1,1) = 0; | |
914 end | |
915 if not(win == 0) | |
916 winf = str2func(win); | |
917 try | |
918 w = window(winf,N); | |
919 catch | |
920 if strcmpi(win,'hamming') | |
921 disp('Signal Processing Toolbox does not seem to be installed. Recompute the hamming window manually.'); | |
922 w = 0.54 - 0.46 * cos(2*pi*(0:N-1)'/(N-1)); | |
923 else | |
924 error(['ERROR in MIRSPECTRUM: Unknown windowing function ',win,' (maybe Signal Processing Toolbox is not installed).']); | |
925 end | |
926 end | |
927 kw = repmat(w,[1,size(dj,2),size(dj,3)]); | |
928 dj = dj(1:N,:,:).* kw; | |
929 end | |
930 | |
931 | |
932 function [y orig] = eachchunk(orig,option,missing,postchunk) | |
933 option.zp = option.zp+missing; | |
934 y = mirspectrum(orig,option); | |
935 | |
936 | |
937 function y = combinechunk(old,new) | |
938 do = get(old,'Data'); | |
939 do = do{1}{1}; | |
940 dn = get(new,'Data'); | |
941 dn = dn{1}{1}; | |
942 y = set(old,'ChunkData',do+dn); |