idamnjanovic@8
|
1 function [p,s] = SMALL_swipe(X,fs, f, plim,dlog2p,dERBs,woverlap,sTHR)
|
idamnjanovic@8
|
2 %
|
idamnjanovic@8
|
3 %
|
idamnjanovic@24
|
4 % Centre for Digital Music, Queen Mary, University of London.
|
idamnjanovic@24
|
5 % This file copyright 2009 Ivan Damnjanovic.
|
idamnjanovic@8
|
6 %
|
idamnjanovic@24
|
7 % This program is free software; you can redistribute it and/or
|
idamnjanovic@24
|
8 % modify it under the terms of the GNU General Public License as
|
idamnjanovic@24
|
9 % published by the Free Software Foundation; either version 2 of the
|
idamnjanovic@24
|
10 % License, or (at your option) any later version. See the file
|
idamnjanovic@24
|
11 % COPYING included with this distribution for more information.
|
idamnjanovic@24
|
12 %
|
idamnjanovic@24
|
13 %
|
idamnjanovic@24
|
14 % This is modified swipep MATLAB code that is working directly in spectral
|
idamnjanovic@24
|
15 % domain and uses only one window size. The results are suboptimal
|
idamnjanovic@24
|
16 % comparing to original code. It is also converted to SWIPE which uses all
|
idamnjanovic@24
|
17 % the harmonics of the signal.
|
idamnjanovic@24
|
18 %
|
idamnjanovic@24
|
19 % SWIPEP Pitch estimation using SWIPE'.
|
idamnjanovic@8
|
20 % P = SWIPEP(X,Fs,[PMIN PMAX],DT,DLOG2P,DERBS,STHR) estimates the pitch
|
idamnjanovic@8
|
21 % of the vector signal X every DT seconds. The sampling frequency of
|
idamnjanovic@8
|
22 % the signal is Fs (in Hertz). The spectrum is computed using a Hann
|
idamnjanovic@8
|
23 % window with an overlap WOVERLAP between 0 and 1. The spectrum is
|
idamnjanovic@8
|
24 % sampled uniformly in the ERB scale with a step size of DERBS ERBs. The
|
idamnjanovic@8
|
25 % pitch is searched within the range [PMIN PMAX] (in Hertz) with samples
|
idamnjanovic@8
|
26 % distributed every DLOG2P units on a base-2 logarithmic scale of Hertz.
|
idamnjanovic@8
|
27 % The pitch is fine-tuned using parabolic interpolation with a resolution
|
idamnjanovic@8
|
28 % of 1 cent. Pitch estimates with a strength lower than STHR are treated
|
idamnjanovic@8
|
29 % as undefined.
|
idamnjanovic@8
|
30 %
|
idamnjanovic@8
|
31 % [P,T,S] = SWIPEP(X,Fs,[PMIN PMAX],DT,DLOG2P,DERBS,WOVERLAP,STHR)
|
idamnjanovic@8
|
32 % returns the times T at which the pitch was estimated and the pitch
|
idamnjanovic@8
|
33 % strength S of every pitch estimate.
|
idamnjanovic@8
|
34 %
|
idamnjanovic@8
|
35 % P = SWIPEP(X,Fs) estimates the pitch using the default settings PMIN =
|
idamnjanovic@8
|
36 % 30 Hz, PMAX = 5000 Hz, DT = 0.001 s, DLOG2P = 1/48 (48 steps per
|
idamnjanovic@8
|
37 % octave), DERBS = 0.1 ERBs, WOVERLAP = 0.5, and STHR = -Inf.
|
idamnjanovic@8
|
38 %
|
idamnjanovic@8
|
39 % P = SWIPEP(X,Fs,...,[],...) uses the default setting for the parameter
|
idamnjanovic@8
|
40 % replaced with the placeholder [].
|
idamnjanovic@8
|
41 %
|
idamnjanovic@8
|
42 % REMARKS: (1) For better results, make DLOG2P and DERBS as small as
|
idamnjanovic@8
|
43 % possible and WOVERLAP as large as possible. However, take into account
|
idamnjanovic@8
|
44 % that the computational complexity of the algorithm is inversely
|
idamnjanovic@8
|
45 % proportional to DLOG2P, DERBS and 1-WOVERLAP, and that the default
|
idamnjanovic@8
|
46 % values have been found empirically to produce good results. Consider
|
idamnjanovic@8
|
47 % also that the computational complexity is directly proportional to the
|
idamnjanovic@8
|
48 % number of octaves in the pitch search range, and therefore , it is
|
idamnjanovic@8
|
49 % recommendable to restrict the search range to the expected range of
|
idamnjanovic@8
|
50 % pitch, if any. (2) This code implements SWIPE', which uses only the
|
idamnjanovic@8
|
51 % first and prime harmonics of the signal. To convert it into SWIPE,
|
idamnjanovic@8
|
52 % which uses all the harmonics of the signal, replace the word
|
idamnjanovic@8
|
53 % PRIMES with a colon (it is located almost at the end of the code).
|
idamnjanovic@8
|
54 % However, this may not be recommendable since SWIPE' is reported to
|
idamnjanovic@8
|
55 % produce on average better results than SWIPE (Camacho and Harris,
|
idamnjanovic@8
|
56 % 2008).
|
idamnjanovic@8
|
57 %
|
idamnjanovic@8
|
58 % EXAMPLE: Estimate the pitch of the signal X every 10 ms within the
|
idamnjanovic@8
|
59 % range 75-500 Hz using the default resolution (i.e., 48 steps per
|
idamnjanovic@8
|
60 % octave), sampling the spectrum every 1/20th of ERB, using a window
|
idamnjanovic@8
|
61 % overlap factor of 50%, and discarding samples with pitch strength
|
idamnjanovic@8
|
62 % lower than 0.2. Plot the pitch trace.
|
idamnjanovic@8
|
63 % [x,Fs] = wavread(filename);
|
idamnjanovic@8
|
64 % [p,t,s] = swipep(x,Fs,[75 500],0.01,[],1/20,0.5,0.2);
|
idamnjanovic@8
|
65 % plot(1000*t,p)
|
idamnjanovic@8
|
66 % xlabel('Time (ms)')
|
idamnjanovic@8
|
67 % ylabel('Pitch (Hz)')
|
idamnjanovic@8
|
68 %
|
idamnjanovic@8
|
69 % REFERENCES: Camacho, A., Harris, J.G, (2008) "A sawtooth waveform
|
idamnjanovic@8
|
70 % inspired pitch estimator for speech and music," J. Acoust. Soc. Am.
|
idamnjanovic@8
|
71 % 124, 1638-1652.
|
idamnjanovic@24
|
72
|
idamnjanovic@8
|
73 if ~ exist( 'plim', 'var' ) || isempty(plim), plim = [30 5000]; end
|
idamnjanovic@8
|
74 %if ~ exist( 'dt', 'var' ) || isempty(dt), dt = 0.001; end
|
idamnjanovic@8
|
75 if ~ exist( 'dlog2p', 'var' ) || isempty(dlog2p), dlog2p = 1/48; end
|
idamnjanovic@8
|
76 if ~ exist( 'dERBs', 'var' ) || isempty(dERBs), dERBs = 0.05; end
|
idamnjanovic@8
|
77 % if ~ exist( 'woverlap', 'var' ) || isempty(woverlap)
|
idamnjanovic@8
|
78 % woverlap = 0.5;
|
idamnjanovic@8
|
79 % elseif woverlap>1 || woverlap<0
|
idamnjanovic@8
|
80 % error('Window overlap must be between 0 and 1.')
|
idamnjanovic@8
|
81 % end
|
idamnjanovic@8
|
82 if ~ exist( 'sTHR', 'var' ) || isempty(sTHR), sTHR = -Inf; end
|
idamnjanovic@8
|
83 %t = [ 0: dt: length(x)/fs ]'; % Times
|
idamnjanovic@8
|
84 % Define pitch candidates
|
idamnjanovic@8
|
85 log2pc = [ log2(plim(1)): dlog2p: log2(plim(2)) ]';
|
idamnjanovic@8
|
86 pc = 2 .^ log2pc;
|
idamnjanovic@8
|
87 S = zeros( length(pc), 1 ); % Pitch strength matrix
|
idamnjanovic@8
|
88 % Determine P2-WSs
|
idamnjanovic@8
|
89 %logWs = round( log2( 8*fs ./ plim ) );
|
idamnjanovic@8
|
90 ws = [2822];%2.^[ logWs(1): -1: logWs(2) ]; % P2-WSs
|
idamnjanovic@8
|
91 pO = 8 * fs ./ ws; % Optimal pitches for P2-WSs
|
idamnjanovic@8
|
92 % Determine window sizes used by each pitch candidate
|
idamnjanovic@8
|
93 d = 1 + log2pc - log2( 8*fs./ws(1) );
|
idamnjanovic@8
|
94 % Create ERB-scale uniformly-spaced frequencies (in Hertz)
|
idamnjanovic@8
|
95 fERBs = erbs2hz([ hz2erbs(min(pc)/4): dERBs: hz2erbs(fs/2) ]');
|
idamnjanovic@8
|
96 for i = 1 : length(ws)
|
idamnjanovic@8
|
97 %dn = max( 1, round( 8*(1-woverlap) * fs / pO(i) ) ); % Hop size
|
idamnjanovic@8
|
98 % Zero pad signal
|
idamnjanovic@8
|
99 %xzp = [ zeros( ws(i)/2, 1 ); x(:); zeros( dn + ws(i)/2, 1 ) ];
|
idamnjanovic@8
|
100 % Compute spectrum
|
idamnjanovic@8
|
101 %w = hanning( ws(i) ); % Hann window
|
idamnjanovic@8
|
102 %o = max( 0, round( ws(i) - dn ) ); % Window overlap
|
idamnjanovic@8
|
103 %[ X, f, ti ] = specgram( xzp, ws(i), fs, w, o );
|
idamnjanovic@8
|
104 % Select candidates that use this window size
|
idamnjanovic@8
|
105 if length(ws) == 1
|
idamnjanovic@8
|
106 j=[1:size(pc)]'; k = [];
|
idamnjanovic@8
|
107 elseif i == length(ws)
|
idamnjanovic@8
|
108 j=find(d-i>-1); k=find(d(j)-i<0);
|
idamnjanovic@8
|
109 elseif i==1
|
idamnjanovic@8
|
110 j=find(d-i<1); k=find(d(j)-i>0);
|
idamnjanovic@8
|
111 else
|
idamnjanovic@8
|
112 j=find(abs(d-i)<1); k=1:length(j);
|
idamnjanovic@8
|
113 end
|
idamnjanovic@8
|
114 % Compute loudness at ERBs uniformly-spaced frequencies
|
idamnjanovic@8
|
115 fERBs = fERBs( find( fERBs > pc(1)/4, 1, 'first' ) : end );
|
idamnjanovic@8
|
116 L = sqrt( max( 0, interp1( f, X, fERBs, 'spline', 0) ) );
|
idamnjanovic@8
|
117 % Compute pitch strength
|
idamnjanovic@8
|
118 Si = pitchStrengthAllCandidates( fERBs, L, pc );
|
idamnjanovic@8
|
119 % Interpolate pitch strength at desired times
|
idamnjanovic@8
|
120 % if size(Si,2) > 1
|
idamnjanovic@8
|
121 % warning off MATLAB:interp1:NaNinY
|
idamnjanovic@8
|
122 % Si = interp1( ti, Si', t, 'linear', NaN )';
|
idamnjanovic@8
|
123 % warning on MATLAB:interp1:NaNinY
|
idamnjanovic@8
|
124 % else
|
idamnjanovic@8
|
125 % Si = repmat( NaN, length(Si),1 );
|
idamnjanovic@8
|
126 % end
|
idamnjanovic@8
|
127 % Add pitch strength to combination
|
idamnjanovic@8
|
128 % lambda = d( j(k) ) - i;
|
idamnjanovic@8
|
129 mu = ones( size(j) );
|
idamnjanovic@8
|
130 % mu(k) = 1 - abs( lambda );
|
idamnjanovic@8
|
131 S(j,:) = S(j,:) + repmat(mu,1,size(Si,2)) .* Si;
|
idamnjanovic@8
|
132 end
|
idamnjanovic@8
|
133 % Fine tune pitch using parabolic interpolation
|
idamnjanovic@8
|
134 p = repmat( NaN, size(S,2), 1 );
|
idamnjanovic@8
|
135 s = repmat( NaN, size(S,2), 1 );
|
idamnjanovic@8
|
136 for j = 1 : size(S,2)
|
idamnjanovic@8
|
137 [ s(j), i ] = max( S(:,j), [], 1 );
|
idamnjanovic@8
|
138 if s(j) < sTHR, continue, end
|
idamnjanovic@8
|
139 if i == 1 || i == length(pc)
|
idamnjanovic@8
|
140 p(j) = pc(i);
|
idamnjanovic@8
|
141 else
|
idamnjanovic@8
|
142 I = i-1 : i+1;
|
idamnjanovic@8
|
143 tc = 1 ./ pc(I);
|
idamnjanovic@8
|
144 ntc = ( tc/tc(2) - 1 ) * 2*pi;
|
idamnjanovic@8
|
145 c = polyfit( ntc, S(I,j), 2 );
|
idamnjanovic@8
|
146 ftc = 1 ./ 2.^[ log2(pc(I(1))): 1/12/100: log2(pc(I(3))) ];
|
idamnjanovic@8
|
147 nftc = ( ftc/tc(2) - 1 ) * 2*pi;
|
idamnjanovic@8
|
148 [s(j) k] = max( polyval( c, nftc ) );
|
idamnjanovic@8
|
149 p(j) = 2 ^ ( log2(pc(I(1))) + (k-1)/12/100 );
|
idamnjanovic@8
|
150 % if (p(j)-pc(I(1)))<0.75*abs(p(j)-pc(I(2)))
|
idamnjanovic@8
|
151 % p(j)=pc(I(1));
|
idamnjanovic@8
|
152 % elseif (pc(I(3))-p(j))<0.75*abs(p(j)-pc(I(2)))
|
idamnjanovic@8
|
153 % p(j)=pc(I(3));
|
idamnjanovic@8
|
154 % else
|
idamnjanovic@8
|
155 p(j)=pc(I(2));
|
idamnjanovic@8
|
156 % end
|
idamnjanovic@8
|
157 end
|
idamnjanovic@8
|
158 end
|
idamnjanovic@8
|
159
|
idamnjanovic@8
|
160 function S = pitchStrengthAllCandidates( f, L, pc )
|
idamnjanovic@8
|
161 % Create pitch strength matrix
|
idamnjanovic@8
|
162 S = zeros( length(pc), size(L,2) );
|
idamnjanovic@8
|
163 % Define integration regions
|
idamnjanovic@8
|
164 k = ones( 1, length(pc)+1 );
|
idamnjanovic@8
|
165 for j = 1 : length(k)-1
|
idamnjanovic@8
|
166 k(j+1) = k(j) - 1 + find( f(k(j):end) > pc(j)/4, 1, 'first' );
|
idamnjanovic@8
|
167 end
|
idamnjanovic@8
|
168 k = k(2:end);
|
idamnjanovic@8
|
169 % Create loudness normalization matrix
|
idamnjanovic@8
|
170 N = sqrt( flipud( cumsum( flipud(L.*L) ) ) );
|
idamnjanovic@8
|
171 for j = 1 : length(pc)
|
idamnjanovic@8
|
172 % Normalize loudness
|
idamnjanovic@8
|
173 warning off MATLAB:divideByZero
|
idamnjanovic@8
|
174 NL = L(k(j):end,:) ./ repmat( N(k(j),:), size(L,1)-k(j)+1, 1);
|
idamnjanovic@8
|
175 warning on MATLAB:divideByZero
|
idamnjanovic@8
|
176 % Compute pitch strength
|
idamnjanovic@8
|
177
|
idamnjanovic@8
|
178 S(j,:) = pitchStrengthOneCandidate( f(k(j):end), NL, pc(j) );
|
idamnjanovic@8
|
179 end
|
idamnjanovic@8
|
180
|
idamnjanovic@8
|
181 function S = pitchStrengthOneCandidate( f, NL, pc )
|
idamnjanovic@8
|
182 n = fix( f(end)/pc - 0.75 ); % Number of harmonics
|
idamnjanovic@8
|
183 if n==0, S=NaN; return, end
|
idamnjanovic@8
|
184 k = zeros( size(f) ); % Kernel
|
idamnjanovic@8
|
185 % Normalize frequency w.r.t. candidate
|
idamnjanovic@8
|
186 q = f / pc;
|
idamnjanovic@8
|
187 % Create kernel
|
idamnjanovic@8
|
188 for i = [ 1:n] % primes(n) ]
|
idamnjanovic@8
|
189 a = abs( q - i );
|
idamnjanovic@8
|
190 % Peak's weigth
|
idamnjanovic@8
|
191 p = a < .25;
|
idamnjanovic@8
|
192 k(p) = cos( 2*pi * q(p) );
|
idamnjanovic@8
|
193 % Valleys' weights
|
idamnjanovic@8
|
194 v = .25 < a & a < .75;
|
idamnjanovic@8
|
195 k(v) = k(v) + cos( 2*pi * q(v) ) / 2;
|
idamnjanovic@8
|
196 end
|
idamnjanovic@8
|
197 % Apply envelope
|
idamnjanovic@8
|
198 k = k .* sqrt( 1./f );
|
idamnjanovic@8
|
199 % K+-normalize kernel
|
idamnjanovic@8
|
200 k = k / norm( k(k>0) );
|
idamnjanovic@8
|
201 % Compute pitch strength
|
idamnjanovic@8
|
202 S = k' * NL;
|
idamnjanovic@8
|
203
|
idamnjanovic@8
|
204 function erbs = hz2erbs(hz)
|
idamnjanovic@8
|
205 erbs = 21.4 * log10( 1 + hz/229 );
|
idamnjanovic@8
|
206
|
idamnjanovic@8
|
207 function hz = erbs2hz(erbs)
|
idamnjanovic@8
|
208 hz = ( 10 .^ (erbs./21.4) - 1 ) * 229;
|