comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirspectrum/mirspectrum.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function varargout = mirspectrum(orig,varargin)
2 % s = mirspectrum(x) computes the spectrum of the audio signal x, showing
3 % the distribution of the energy along the frequencies.
4 % (x can be the name of an audio file as well.)
5 % Optional argument:
6 % mirspectrum(...,'Frame',l,h) computes spectrogram, using frames of
7 % length l seconds and a hop rate h.
8 % Default values: l = .05 s, h = .5.
9 % mirspectrum(...,'Min',mi) indicates the lowest frequency taken into
10 % consideration, expressed in Hz.
11 % Default value: 0 Hz.
12 % mirspectrum(...,'Max',ma) indicates the highest frequency taken into
13 % consideration, expressed in Hz.
14 % Default value: the maximal possible frequency, corresponding to
15 % the sampling rate of x divided by 2.
16 % mirspectrum(...,'Window',w): windowing method:
17 % either w = 0 (no windowing) or any windowing function proposed
18 % in the Signal Processing Toolbox (help window).
19 % default value: w = 'hamming'
20 %
21 % mirspectrum(...,'Cents'): decomposes the energy in cents.
22 % mirspectrum(...,'Collapsed'): collapses the spectrum into one
23 % octave divided into 1200 cents.
24 % Redistribution of the frequencies into bands:
25 % mirspectrum(...,'Mel'): Mel bands.
26 % (Requires the Auditory Toolbox.)
27 % mirspectrum(...,'Bark'): Bark bands.
28 % (Code based on Pampalk's MA toolbox).
29 % If the audio signal was frame decomposed, the output s is a
30 % band-decomposed spectrogram. It is then possible to compute
31 % the spectrum of the temporal signal in each band,
32 % using the following syntax:
33 % mirspectrum(s,'AlongBands')
34 % This corresponds to fluctuation (cf. mirfluctuation).
35 % mirspectrum(...,'Mask'): Models masking phenomena in each band.
36 % (Code based on Pampalk's MA toolbox).
37 % mirspectrum(...,'Normal'): normalizes with respect to energy.
38 % mirspectrum(...,'NormalInput'): input signal is normalized from 0 to 1.
39 % mirspectrum(...,'Power'): squares the energy.
40 % mirspectrum(...,'dB'): represents the spectrum energy in decibel scale.
41 % mirspectrum(...,'dB',th): keeps only the highest energy over a
42 % range of th dB.
43 % mirspectrum(...,'Terhardt'): modulates the energy following
44 % (Terhardt, 1979) outer ear model. (Code based on Pampalk's MA
45 % toolbox).
46 % mirspectrum(...,'Resonance',r): multiplies the spectrum with a
47 % resonance curve. Possible value for r:
48 % 'ToiviainenSnyder': highlights best perceived meter
49 % (Toiviainen & Snyder 2003)
50 % (default value for spectrum of envelopes)
51 % 'Fluctuation': fluctuation strength (Fastl 1982)
52 % (default value for spectrum of band channels)
53 % mirspectrum(...,'Prod',m): Enhances components that have harmonics
54 % located at multiples of range(s) m of the signal's fundamental
55 % frequency. Computed by compressing the signal by the list of
56 % factors m, and by multiplying all the results with the original
57 % signa (Alonso et al, 2003).
58 % mirspectrum(...,'Sum',s): Similar option using summation instead of
59 % product.
60 %
61 % mirspectrum(...,'MinRes',mr): Indicates a minimal accepted
62 % frequency resolution, in Hz. The audio signal is zero-padded in
63 % order to reach the desired resolution.
64 % If the 'Mel' option is toggled on, 'MinRes' is set by default
65 % to 66 Hz.
66 % mirspectrum(...,'MinRes',mr,'OctaveRatio',tol): Indicates the
67 % minimal accepted resolution in terms of number of divisions of
68 % the octave. Low frequencies are ignored in order to reach the
69 % desired resolution.
70 % The corresponding required frequency resolution is equal to
71 % the difference between the first frequency bins, multiplied
72 % by the constraining multiplicative factor tol (set by
73 % default to .75).
74 % mirspectrum(...,'Res',r): Indicates the required precise frequency
75 % resolution, in Hz. The audio signal is zero-padded in order to
76 % reach the desired resolution.
77 % mirspectrum(...,'Length',l): Specifies the length of the FFT,
78 % overriding the FFT length initially planned.
79 % mirspectrum(...,'ZeroPad',s): Zero-padding of s samples.
80 % mirspectrum(...,'WarningRes',s): Indicates a required frequency
81 % resolution, in Hz, for the input signal. If the resolution does
82 % not reach that prerequisite, a warning is displayed.
83 % mirspectrum(...,'ConstantQ',nb): Carries out a Constant Q Transform
84 % instead of a FFT, with a number of bins per octave fixed to nb.
85 % Default value for nb: 12 bins per octave.
86 %
87 % mirspectrum(...,'Smooth',o): smooths the envelope using a movering
88 % average of order o.
89 % Default value when the option is toggled on: o=10
90 % mirspectrum(...,'Gauss',o): smooths the envelope using a gaussian
91 % of standard deviation o samples.
92 % Default value when the option is toggled on: o=10
93 % mirspectrum(...,'Phase',0): do not compute the FFT phase.
94
95 win.key = 'Window';
96 win.type = 'String';
97 win.default = 'hamming';
98 option.win = win;
99
100 min.key = 'Min';
101 min.type = 'Integer';
102 min.default = 0;
103 option.min = min;
104
105 max.key = 'Max';
106 max.type = 'Integer';
107 max.default = Inf;
108 option.max = max;
109
110 mr.key = 'MinRes';
111 mr.type = 'Integer';
112 mr.default = 0;
113 option.mr = mr;
114
115 res.key = 'Res';
116 res.type = 'Integer';
117 res.default = NaN;
118 option.res = res;
119
120 length.key = 'Length';
121 length.type = 'Integer';
122 length.default = NaN;
123 option.length = length;
124
125 zp.key = 'ZeroPad';
126 zp.type = 'Integer';
127 zp.default = 0;
128 zp.keydefault = Inf;
129 option.zp = zp;
130
131 wr.key = 'WarningRes';
132 wr.type = 'Integer';
133 wr.default = 0;
134 option.wr = wr;
135
136 octave.key = 'OctaveRatio';
137 octave.type = 'Boolean';
138 octave.default = 0;
139 option.octave = octave;
140
141 constq.key = 'ConstantQ';
142 constq.type = 'Integer';
143 constq.default = 0;
144 constq.keydefault = 12;
145 option.constq = constq;
146
147 alongbands.key = 'AlongBands';
148 alongbands.type = 'Boolean';
149 alongbands.default = 0;
150 option.alongbands = alongbands;
151
152 ni.key = 'NormalInput';
153 ni.type = 'Boolean';
154 ni.default = 0;
155 option.ni = ni;
156
157 norm.key = 'Normal';
158 norm.type = 'Integer';
159 norm.default = 0;
160 norm.keydefault = 1;
161 norm.when = 'After';
162 option.norm = norm;
163
164 band.type = 'String';
165 band.choice = {'Freq','Mel','Bark','Cents'};
166 band.default = 'Freq';
167 band.when = 'Both';
168 option.band = band;
169
170 nbbands.key = 'Bands';
171 nbbands.type = 'Integer';
172 nbbands.default = 0;
173 nbbands.when = 'After';
174 option.nbbands = nbbands;
175
176 mprod.key = 'Prod';
177 mprod.type = 'Integers';
178 mprod.default = [];
179 mprod.keydefault = 2:6;
180 mprod.when = 'After';
181 option.mprod = mprod;
182
183 msum.key = 'Sum';
184 msum.type = 'Integers';
185 msum.default = [];
186 msum.keydefault = 2:6;
187 msum.when = 'After';
188 option.msum = msum;
189
190 reso.key = 'Resonance';
191 reso.type = 'String';
192 reso.choice = {'ToiviainenSnyder','Fluctuation','Meter',0,'no','off'};
193 reso.default = 0;
194 reso.when = 'After';
195 option.reso = reso;
196
197 log.key = 'log';
198 log.type = 'Boolean';
199 log.default = 0;
200 log.when = 'After';
201 option.log = log;
202
203 db.key = 'dB';
204 db.type = 'Integer';
205 db.default = 0;
206 db.keydefault = Inf;
207 db.when = 'After';
208 option.db = db;
209
210 pow.key = 'Power';
211 pow.type = 'Boolean';
212 pow.default = 0;
213 pow.when = 'After';
214 option.pow = pow;
215
216 terhardt.key = 'Terhardt';
217 terhardt.type = 'Boolean';
218 terhardt.default = 0;
219 terhardt.when = 'After';
220 option.terhardt = terhardt;
221
222 mask.key = 'Mask';
223 mask.type = 'Boolean';
224 mask.default = 0;
225 mask.when = 'After';
226 option.mask = mask;
227
228 % e.key = 'Enhanced';
229 % e.type = 'Integer';
230 % e.default = [];
231 % e.keydefault = 2:10;
232 % e.when = 'After';
233 %option.e = e;
234
235 collapsed.key = 'Collapsed';
236 collapsed.type = 'Boolean';
237 collapsed.default = 0;
238 collapsed.when = 'Both';
239 option.collapsed = collapsed;
240
241 aver.key = 'Smooth';
242 aver.type = 'Integer';
243 aver.default = 0;
244 aver.keydefault = 10;
245 aver.when = 'After';
246 option.aver = aver;
247
248 gauss.key = 'Gauss';
249 gauss.type = 'Integer';
250 gauss.default = 0;
251 gauss.keydefault = 10;
252 gauss.when = 'After';
253 option.gauss = gauss;
254
255 rapid.key = 'Rapid';
256 rapid.type = 'Boolean';
257 rapid.default = 0;
258 option.rapid = rapid;
259
260 phase.key = 'Phase';
261 phase.type = 'Boolean';
262 phase.default = 1;
263 option.phase = phase;
264
265 specif.option = option;
266
267 specif.defaultframelength = 0.05;
268 specif.defaultframehop = 0.5;
269
270 specif.eachchunk = @eachchunk;
271 specif.combinechunk = @combinechunk;
272
273 if isamir(orig,'mirscalar') || isamir(orig,'mirenvelope')
274 specif.nochunk = 1;
275 end
276
277 varargout = mirfunction(@mirspectrum,orig,varargin,nargout,specif,@init,@main);
278
279
280 function [x type] = init(x,option)
281 type = 'mirspectrum';
282
283
284 function s = main(orig,option,postoption)
285 if isstruct(option)
286 if option.collapsed
287 option.band = 'Cents';
288 end
289 if isnan(option.res) && strcmpi(option.band,'Cents') && option.min
290 option.res = option.min *(2^(1/1200)-1)*.9;
291 end
292 end
293 if not(isempty(postoption))
294 if not(strcmpi(postoption.band,'Freq') && isempty(postoption.msum) ...
295 || isempty(postoption.mprod)) || postoption.log || postoption.db ...
296 || postoption.pow || postoption.mask || postoption.collapsed ...
297 || postoption.aver || postoption.gauss
298 option.phase = 0;
299 end
300 end
301 if iscell(orig)
302 orig = orig{1};
303 end
304 if isa(orig,'mirspectrum') && ...
305 not(isfield(option,'alongbands') && option.alongbands)
306 s = orig;
307 if isfield(option,'min') && ...
308 (option.min || iscell(option.max) || option.max < Inf)
309 magn = get(s,'Magnitude');
310 freq = get(s,'Frequency');
311 for k = 1:length(magn)
312 m = magn{k};
313 f = freq{k};
314 if iscell(option.max)
315 mi = option.min{k};
316 ma = option.max{k};
317 else
318 mi = option.min;
319 ma = option.max;
320 end
321 if not(iscell(m))
322 m = {m};
323 f = {f};
324 end
325 for l = 1:length(m)
326 range = find(and(f{l}(:,1) >= mi,f{l}(:,1) <= ma));
327 m{l} = m{l}(range,:,:);
328 f{l} = f{l}(range,:,:);
329 end
330 magn{k} = m;
331 freq{k} = f;
332 end
333 s = set(s,'Magnitude',magn,'Frequency',freq);
334 end
335 if not(isempty(postoption)) && not(isequal(postoption,0))
336 s = post(s,postoption);
337 end
338 elseif ischar(orig)
339 s = mirspectrum(miraudio(orig),option,postoption);
340 else
341 if nargin == 0
342 orig = [];
343 end
344 s.phase = [];
345 s.log = 0;
346 s.xscale = 'Freq';
347 s.pow = 1;
348 s = class(s,'mirspectrum',mirdata(orig));
349 s = purgedata(s);
350 s = set(s,'Title','Spectrum','Abs','frequency (Hz)','Ord','magnitude');
351 %disp('Computing spectrum...')
352 sig = get(orig,'Data');
353 t = get(orig,'Pos');
354 if isempty(t)
355 t = get(orig,'FramePos');
356 for k = 1:length(sig)
357 for l = 1:length(sig{k})
358 sig{k}{l} = sig{k}{l}';
359 t{k}{l} = t{k}{l}(1,:,:)';
360 end
361 end
362 end
363 fs = get(orig,'Sampling');
364 fp = get(orig,'FramePos');
365 m = cell(1,length(sig));
366 p = cell(1,length(sig));
367 f = cell(1,length(sig));
368 for i = 1:length(sig)
369 d = sig{i};
370 fpi = fp{i};
371 if not(iscell(d))
372 d = {d};
373 end
374 if option.alongbands
375 fsi = 1 / (fpi{1}(1,2) - fpi{1}(1,1));
376 else
377 fsi = fs{i};
378 end
379 mi = cell(1,length(d));
380 phi = cell(1,length(d));
381 fi = cell(1,length(d));
382 for J = 1:length(d)
383 dj = d{J};
384 if option.ni
385 mxdj = repmat(max(dj),[size(dj,1),1,1]);
386 mndj = repmat(min(dj),[size(dj,1),1,1]);
387 dj = (dj-mndj)./(mxdj-mndj);
388 end
389 if option.alongbands
390 if size(dj,1)>1
391 error('ERROR IN MIRSPECTRUM: ''AlongBands'' is restricted to spectrum decomposed into bands, such as ''Mel'' and ''Bark''.')
392 end
393 dj = reshape(dj,[size(dj,2),1,size(dj,3)]);
394 fp{i}{J} = fp{i}{J}([1;end]);
395 end
396
397 if option.constq
398 % Constant Q Transform
399 r = 2^(1/option.constq);
400 Q = 1 / (r - 1);
401 f_max = min(fsi/2,option.max);
402 f_min = option.min;
403 if not(f_min)
404 f_min = 16.3516;
405 end
406 B = floor(log(f_max/f_min) / log(r)); % number of bins
407 N0 = round(Q*fsi/f_min); % maximum Nkcq
408 j2piQn = -j*2*pi*Q*(0:N0-1)';
409
410 fj = f_min * r.^(0:B-1)';
411 transf = NaN(B,size(dj,2),size(dj,3));
412 for kcq = 1:B
413 Nkcq = round(Q*fsi/fj(kcq));
414 win = mirwindow(dj,option.win,Nkcq);
415 exq = repmat(exp(j2piQn(1:Nkcq)/Nkcq),...
416 [1,size(win,2),size(win,3)]);
417 transf(kcq,:) = sum(win.* exq) / Nkcq;
418 end
419 else
420 % FFT
421 dj = mirwindow(dj,option.win);
422 if option.zp
423 if option.zp < Inf
424 dj = [dj;zeros(option.zp,size(dj,2),size(dj,3))];
425 else
426 dj = [dj;zeros(size(dj))];
427 end
428 end
429 if isstruct(postoption)
430 if strcmpi(postoption.band,'Mel') && ...
431 (not(option.mr) || option.mr > 66)
432 option.mr = 66;
433 end
434 else
435 %warning('WARNING in MIRSPECTRUM (for debugging purposes): By default, minimum resolution specified.')
436 if not(option.mr)
437 option.mr = 66;
438 end
439 end
440 if option.octave
441 N = size(dj,1);
442 res = (2.^(1/option.mr)-1)*option.octave;
443 % Minimal freq ratio between 2 first bins.
444 % freq resolution should be > option.min * res
445 Nrec = fsi/(option.min*res);
446 % Recommended minimal sample length.
447 if Nrec > N
448 % If actual sample length is too small.
449 option.min = fsi/N / res;
450 warning('WARNING IN MIRSPECTRUM: The input signal is too short to obtain the desired octave resolution. Lowest frequencies will be ignored.');
451 display(['(The recommended minimal input signal length would be ' num2str(Nrec/fsi) ' s.)']);
452 display(['New low frequency range: ' num2str(option.min) ' Hz.']);
453 end
454 N = 2^nextpow2(N);
455 elseif isnan(option.length)
456 if isnan(option.res)
457 N = size(dj,1);
458 if option.mr && N < fsi/option.mr
459 if option.wr && N < fsi/option.wr
460 warning('WARNING IN MIRSPECTRUM: The input signal is too short to obtain the desired frequency resolution. Performed zero-padding will not guarantee the quality of the results.');
461 end
462 N = max(N,fsi/option.mr);
463 end
464 N = 2^nextpow2(N);
465 else
466 N = ceil(fsi/option.res);
467 end
468 else
469 N = option.length;
470 end
471
472 % Here is the spectrum computation itself
473 transf = fft(dj,N);
474
475 len = ceil(size(transf,1)/2);
476 fj = fsi/2 * linspace(0,1,len)';
477 if option.max
478 maxf = find(fj>=option.max,1);
479 if isempty(maxf)
480 maxf = len;
481 end
482 else
483 maxf = len;
484 end
485 if option.min
486 minf = find(fj>=option.min,1);
487 if isempty(minf)
488 maxf = len;
489 end
490 else
491 minf = 1;
492 end
493
494 transf = transf(minf:maxf,:,:);
495 fj = fj(minf:maxf);
496 end
497
498 mi{J} = abs(transf);
499 if option.phase
500 phi{J} = angle(transf);
501 end
502 fi{J} = repmat(fj,[1,size(transf,2),size(transf,3)]);
503 end
504 if iscell(sig{i})
505 m{i} = mi;
506 p{i} = phi;
507 f{i} = fi;
508 else
509 m{i} = mi{1};
510 p{i} = phi{1};
511 f{i} = fi{1};
512 end
513 end
514 s = set(s,'Frequency',f,'Magnitude',m,'Phase',p,'FramePos',fp);
515 if not(isempty(postoption)) && isstruct(postoption)
516 s = post(s,postoption);
517 end
518 end
519
520
521 function s = post(s,option)
522 if option.collapsed
523 option.band = 'Cents';
524 end
525 m = get(s,'Magnitude');
526 f = get(s,'Frequency');
527 for k = 1:length(m)
528 if not(iscell(m{k}))
529 m{k} = {m{k}};
530 f{k} = {f{k}};
531 end
532 end
533 if get(s,'Power') == 1 && ...
534 (option.pow || any(option.mprod) || any(option.msum))
535 for h = 1:length(m)
536 for l = 1:length(m{k})
537 m{h}{l} = m{h}{l}.^2;
538 end
539 end
540 s = set(s,'Power',2,'Title',['Power ',get(s,'Title')]);
541 end
542 if any(option.mprod)
543 for h = 1:length(m)
544 for l = 1:length(m{k})
545 z0 = m{h}{l};
546 z1 = z0;
547 for k = 1:length(option.mprod)
548 mpr = option.mprod(k);
549 if mpr
550 zi = ones(size(z0));
551 zi(1:floor(end/mpr),:,:) = z0(mpr:mpr:end,:,:);
552 z1 = z1.*zi;
553 end
554 end
555 m{h}{l} = z1;
556 end
557 end
558 s = set(s,'Title','Spectral product');
559 end
560 if any(option.msum)
561 for h = 1:length(m)
562 for l = 1:length(m{k})
563 z0 = m{h}{l};
564 z1 = z0;
565 for k = 1:length(option.msum)
566 mpr = option.msum(k);
567 if mpr
568 zi = ones(size(z0));
569 zi(1:floor(end/mpr),:,:) = z0(mpr:mpr:end,:,:);
570 z1 = z1+zi;
571 end
572 end
573 m{h}{l} = z1;
574 end
575 end
576 s = set(s,'Title','Spectral sum');
577 end
578 %for i = option.e % Enhancing procedure %%%% TO CHECK, t IS NOT DEFINED!
579 % for h = 1:length(m)
580 % for l = 1:length(m{k})
581 % c = m{h}{l};
582 % % option.e is the list of scaling factors
583 % % i is the scaling factor
584 % if i
585 % ic = zeros(size(c));
586 % for g = 1:size(c,2)
587 % for h2 = 1:size(c,3)
588 % beg = find(t(:,g,h2)/i >= t(1,g,h2))
589 % if not(isempty(beg))
590 % ic(:,g,h2) = interp1(t(:,g,h2),c(:,g,h2),t(:,g,h2)/i); %,'cubic');
591 % The scaled autocorrelation
592 % ic(1:beg(1)-1,g,h2) = 0;
593 % end
594 % end
595 % end
596 % ic(find(isnan(ic))) = Inf; % All the NaN values are changed into 0 in the resulting curve
597 % ic = max(ic,0);
598 % c = c - ic; % The scaled autocorrelation is substracted to the initial one
599 % c = max(c,0); % Half-wave rectification
600 %figure
601 %plot(c)
602 % end
603 % end
604 % end
605 %end
606 if option.norm
607 for k = 1:length(m)
608 for l = 1:length(m{k})
609 mkl = m{k}{l};
610 nkl = zeros(1,size(mkl,2),size(mkl,3));
611 for kk = 1:size(mkl,2)
612 for ll = 1:size(mkl,3)
613 nkl(1,kk,l) = norm(mkl(:,kk,ll));
614 end
615 end
616 m{k}{l} = mkl./repmat(nkl,[size(m{k}{k},1),1,1]);
617 end
618 end
619 end
620 if option.terhardt && not(isempty(find(f{1}{1}))) % This excludes the case where spectrum already along bands
621 % Code taken from Pampalk's MA Toolbox
622 for h = 1:length(m)
623 for l = 1:length(m{k})
624 W_Adb = zeros(size(f{k}{l}));
625 W_Adb(2:size(f{k}{l},1),:,:) = ...
626 + 10.^((-3.64*(f{k}{l}(2:end,:,:)/1000).^-0.8 ...
627 + 6.5 * exp(-0.6 * (f{k}{l}(2:end,:,:)/1000 - 3.3).^2) ...
628 - 0.001*(f{k}{l}(2:end,:,:)/1000).^4)/20);
629 W_Adb = W_Adb.^2;
630 m{h}{l} = m{h}{l}.*W_Adb;
631 end
632 end
633 end
634 if option.reso
635 if not(ischar(option.reso))
636 if strcmp(get(orig,'XScale'),'Mel')
637 option.reso = 'Fluctuation';
638 else
639 option.reso = 'ToiviainenSnyder';
640 end
641 end
642 %if strcmpi(option.reso,'ToiviainenSnyder') || ...
643 % strcmpi(option.reso,'Meter') || ...
644 % strcmpi(option.reso,'Fluctuation')
645 for k = 1:length(m)
646 for l = 1:length(m{k})
647 if strcmpi(option.reso,'ToiviainenSnyder') || strcmpi(option.reso,'Meter')
648 w = max(0,...
649 1 - 0.25*(log2(max(1./max(f{k}{l},1e-12),1e-12)/0.5)).^2);
650 elseif strcmpi(option.reso,'Fluctuation')
651 w1 = f{k}{l} / 4; % ascending part of the fluctuation curve;
652 w2 = 1 - 0.3 * (f{k}{l} - 4)/6; % descending part;
653 w = min(w1,w2);
654 end
655 if max(w) == 0
656 warning('The resonance curve, not defined for this range of delays, will not be applied.')
657 else
658 m{k}{l} = m{k}{l}.* w;
659 end
660 end
661 end
662 end
663 if strcmp(s.xscale,'Freq')
664 if strcmpi(option.band,'Mel')
665 % The following is largely based on the source code from Auditory Toolbox
666 % (A part that I could not call directly from MIRtoolbox)
667
668 % (Malcolm Slaney, August 1993, (c) 1998 Interval Research Corporation)
669
670 try
671 MakeERBFilters(1,1,1); % Just to be sure that the Auditory Toolbox is installed
672 catch
673 error('ERROR IN MIRFILTERBANK: Auditory Toolbox needs to be installed.');
674 end
675
676 %disp('Computing Mel-frequency spectral representation...')
677 lowestFrequency = 133.3333;
678 if not(option.nbbands)
679 option.nbbands = 40;
680 end
681 linearFilters = min(13,option.nbbands);
682 linearSpacing = 66.66666666;
683 logFilters = option.nbbands - linearFilters;
684 logSpacing = 1.0711703;
685 totalFilters = option.nbbands;
686
687 % Figure the band edges. Interesting frequencies are spaced
688 % by linearSpacing for a while, then go logarithmic. First figure
689 % all the interesting frequencies. Lower, center, and upper band
690 % edges are all consequtive interesting frequencies.
691 freqs = lowestFrequency + (0:linearFilters-1)*linearSpacing;
692 freqs(linearFilters+1:totalFilters+2) = ...
693 freqs(linearFilters) * logSpacing.^(1:logFilters+2);
694 lower = freqs(1:totalFilters);
695 center = freqs(2:totalFilters+1);
696 upper = freqs(3:totalFilters+2);
697
698 % Figure out the height of the triangle so that each filter has
699 % unit weight, assuming a triangular weighting function.
700 triangleHeight = 2./(upper-lower);
701
702 e = cell(1,length(m));
703 nch = cell(1,length(m));
704 for h = 1:length(m)
705 e{h} = cell(1,length(m{h}));
706 for i = 1:length(m{h})
707 mi = m{h}{i};
708 fi = f{h}{i}(:,1,1);
709 fftSize = size(fi,1);
710
711 % We now want to combine FFT bins and figure out
712 % each frequencies contribution
713 mfccFilterWeights = zeros(totalFilters,fftSize);
714 for chan=1:totalFilters
715 mfccFilterWeights(chan,:) = triangleHeight(chan).*...
716 ((fi > lower(chan) & fi <= center(chan)).* ...
717 (fi-lower(chan))/(center(chan)-lower(chan)) + ...
718 (fi > center(chan) & fi < upper(chan)).* ...
719 (upper(chan)-fi)/(upper(chan)-center(chan)));
720 end
721 if find(diff(not(sum(mfccFilterWeights,2)))==-1)
722 % If one channel has no weight whereas the higher one
723 % has a positive weight.
724 warning('WARNING in MIRSPECTRUM: The frequency resolution of the spectrum is too low for the Mel transformation. Some Mel components are undefined.')
725 display('Recommended frequency resolution: at least 66 Hz.')
726 end
727 e{h}{i} = zeros(1,size(mi,2),totalFilters);
728 for J = 1:size(mi,2)
729 if max(mi(:,J)) > 0
730 fftData = zeros(fftSize,1); % Zero-padding ?
731 fftData(1:size(mi,1)) = mi(:,J);
732 p = mfccFilterWeights * fftData + 1e-16;
733 e{h}{i}(1,J,:) = reshape(p,[1,1,length(p)]);
734 end
735 end
736 f{h}{i} = zeros(1,size(mi,2),totalFilters);
737 end
738 nch{h} = (1:totalFilters)';
739 end
740 m = e;
741 s = set(s,'XScale','Mel','Title','Mel-Spectrum','Abs','Mel bands','Channels',nch);
742 elseif strcmpi(option.band,'Bark')
743 sr = get(s,'Sampling');
744 % Code taken from Pampalk's MA Toolbox.
745 %% zwicker & fastl: psychoacoustics 1999, page 159
746 bark_upper = [10 20 30 40 51 63 77 92 108 127 148 172 200 232 270 315 370 440 530 640 770 950 1200 1550]*10; %% Hz
747 e = cell(1,length(m));
748 nch = cell(1,length(m));
749 for h = 1:length(m)
750 %% ignore critical bands outside of sampling rate range
751 cb = min(min([find(bark_upper>sr{h}/2),length(bark_upper)]),length(bark_upper));
752 e{h} = cell(1,length(m{h}));
753 for i = 1:length(m{h})
754 mi = sum(m{h}{i},3);
755 e{h}{i} = zeros(1,size(mi,2),cb);
756 k = 1;
757 for J=1:cb, %% group into bark bands
758 idx = find(f{h}{i}(k:end,1,1)<=bark_upper(J));
759 idx = idx + k-1;
760 e{h}{i}(1,:,J) = sum(mi(idx,:,:),1);
761 k = max(idx)+1;
762 end
763 f{h}{i} = zeros(1,size(mi,2),cb);
764 end
765 nch{h} = (1:length(bark_upper))';
766 end
767 m = e;
768 s = set(s,'XScale','Bark','Title','Bark-Spectrum','Abs','Bark bands','Channels',nch);
769 elseif strcmpi(option.band,'Cents') || option.collapsed
770 for h = 1:length(m)
771 for i = 1:length(m{h})
772 mi = m{h}{i};
773 fi = f{h}{i};
774 isgood = fi(:,1,1)*(2^(1/1200)-1) >= fi(2,1,1)-fi(1,1,1);
775 good = find(isgood);
776 if isempty(good)
777 mirerror('mirspectrum',...
778 'The frequency resolution of the spectrum is too low to be decomposed into cents.');
779 display('Hint: if you specify a minimum value for the frequency range (''Min'' option)');
780 display(' and if you do not specify any frequency resolution (''Res'' option), ');
781 display(' the correct frequency resolution will be automatically chosen.');
782 end
783 if good>1
784 warning(['MIRSPECTRUM: Frequency resolution in cent is achieved for frequencies over ',...
785 num2str(floor(fi(good(1)))),' Hz. Lower frequencies are ignored.'])
786 display('Hint: if you specify a minimum value for the frequency range (''Min'' option)');
787 display(' and if you do not specify any frequency resolution (''Res'' option), ');
788 display(' the correct frequency resolution will be automatically chosen.');
789 end
790 fi = fi(good,:,:);
791 mi = mi(good,:,:);
792 f2cents = 440*2.^(1/1200*(-1200*10:1200*10-1)');
793 % The frequencies corresponding to the cent range
794 cents = repmat((0:1199)',[20,size(fi,2),size(fi,3)]);
795 % The cent range is for the moment centered to A440
796 octaves = ones(1200,1)*(-10:10);
797 octaves = repmat(octaves(:),[1,size(fi,2),size(fi,3)]);
798 select = find(f2cents>fi(1) & f2cents<fi(end));
799 % Cent range actually used in the current spectrum
800 f2cents = repmat(f2cents(select),[1,size(fi,2),size(fi,3)]);
801 cents = repmat(cents(select),[1,size(fi,2),size(fi,3)]);
802 octaves = repmat(octaves(select),[1,size(fi,2),size(fi,3)]);
803 m{h}{i} = interp1(fi(:,1,1),mi,f2cents(:,1,1));
804 f{h}{i} = octaves*1200+cents + 6900;
805 % Now the cent range is expressed in midicent
806 end
807 end
808 s = set(s,'Abs','pitch (in midicents)','XScale','Cents');
809 end
810 end
811 if option.mask
812 if strcmp(s.xscale,'Freq')
813 warning('WARNING IN MIRSPECTRUM: ''Mask'' option available only for Mel-spectrum and Bark-spectrum.');
814 disp('''Mask'' option ignored');
815 else
816 nch = get(s,'Channels');
817 for h = 1:length(m)
818 % Code taken from Pampalk's MA Toolbox.
819 %% spreading function: schroeder et al., 1979, JASA, optimizing
820 %% digital speech coders by exploiting masking properties of the human ear
821 cb = length(nch{h}); % Number of bands.
822 for i = 1:cb,
823 spread(i,:) = 10.^((15.81+7.5*((i-(1:cb))+0.474)-17.5*(1+((i-(1:cb))+0.474).^2).^0.5)/10);
824 end
825 for i = 1:length(m{h})
826 for J = 1:size(m{h}{i},2)
827 mj = m{h}{i}(1,J,:);
828 mj = spread(1:length(mj),1:length(mj))*mj(:);
829 m{h}{i}(1,J,:) = reshape(mj,1,1,length(mj));
830 end
831 end
832 end
833 end
834 end
835 if option.collapsed
836 for h = 1:length(m)
837 for i = 1:length(m{h})
838 mi = m{h}{i};
839 fi = f{h}{i};
840 centclass = rem(fi(:,1,1),1200);
841 %neg = find(centclass<0);
842 %centclass(neg) = 1200 + centclass(neg);
843 m{h}{i} = NaN(1200,size(mi,2),size(mi,3));
844 for k = 0:1199
845 m{h}{i}(k+1,:,:) = sum(mi(find(centclass==k),:,:),1);
846 end
847 f{h}{i} = repmat((0:1199)',[1,size(fi,2),size(fi,3)]);
848 end
849 end
850 s = set(s,'Abs','Cents','XScale','Cents(Collapsed)');
851 end
852 if option.log || option.db
853 if not(isa(s,'mirspectrum') && s.log)
854 if option.db
855 s = set(s,'log',10);
856 else
857 s = set(s,'log',1);
858 end
859 for k = 1:length(m)
860 if not(iscell(m{k}))
861 m{k} = {m{k}};
862 f{k} = {f{k}};
863 end
864 for l = 1:length(m{k})
865 m{k}{l} = log10(m{k}{l}+1e-16);
866 if option.db
867 m{k}{l} = 10*m{k}{l};
868 end
869 end
870 end
871 elseif isa(s,'mirspectrum') && s.log<10
872 for k = 1:length(m)
873 for l = 1:length(m{k})
874 m{k}{l} = 10*m{k}{l};
875 end
876 end
877 end
878 if option.db>0 && option.db < Inf
879 for k = 1:length(m)
880 for l = 1:length(m{k})
881 m{k}{l} = m{k}{l}-repmat(max(m{k}{l}),[size(m{k}{l},1) 1 1]);
882 m{k}{l} = option.db + max(-option.db,m{k}{l});
883 end
884 end
885 end
886 end
887 if option.aver
888 for k = 1:length(m)
889 for i = 1:length(m{k})
890 m{k}{i} = filter(ones(1,option.aver),1,m{k}{i});
891 end
892 end
893 end
894 if option.gauss
895 for k = 1:length(m)
896 for i = 1:length(m{k})
897 sigma = option.gauss;
898 gauss = 1/sigma/2/pi...
899 *exp(- (-4*sigma:4*sigma).^2 /2/sigma^2);
900 y = filter(gauss,1,[m{k}{i};zeros(4*sigma,1)]);
901 y = y(4*sigma:end,:,:);
902 m{k}{i} = y(1:size(m{k}{i},1),:,:);
903 end
904 end
905 end
906 s = set(s,'Magnitude',m,'Frequency',f);
907
908
909 function dj = mirwindow(dj,win,N)
910 if nargin<3
911 N = size(dj,1);
912 elseif size(dj,1)<N
913 dj(N,1,1) = 0;
914 end
915 if not(win == 0)
916 winf = str2func(win);
917 try
918 w = window(winf,N);
919 catch
920 if strcmpi(win,'hamming')
921 disp('Signal Processing Toolbox does not seem to be installed. Recompute the hamming window manually.');
922 w = 0.54 - 0.46 * cos(2*pi*(0:N-1)'/(N-1));
923 else
924 error(['ERROR in MIRSPECTRUM: Unknown windowing function ',win,' (maybe Signal Processing Toolbox is not installed).']);
925 end
926 end
927 kw = repmat(w,[1,size(dj,2),size(dj,3)]);
928 dj = dj(1:N,:,:).* kw;
929 end
930
931
932 function [y orig] = eachchunk(orig,option,missing,postchunk)
933 option.zp = option.zp+missing;
934 y = mirspectrum(orig,option);
935
936
937 function y = combinechunk(old,new)
938 do = get(old,'Data');
939 do = do{1}{1};
940 dn = get(new,'Data');
941 dn = dn{1}{1};
942 y = set(old,'ChunkData',do+dn);