Mercurial > hg > smallbox
comparison util/SMALL_swipe.m @ 8:33850553b702
(none)
author | idamnjanovic |
---|---|
date | Mon, 22 Mar 2010 10:56:54 +0000 |
parents | |
children | fc395272d53e |
comparison
equal
deleted
inserted
replaced
7:0151f1ea080d | 8:33850553b702 |
---|---|
1 function [p,s] = SMALL_swipe(X,fs, f, plim,dlog2p,dERBs,woverlap,sTHR) | |
2 % | |
3 % Ivan Damnjanovic 2010 | |
4 % | |
5 % This is modified swipep MATLAB code that is working directly in spectral | |
6 % domain and uses only one window size. The results are suboptimal | |
7 % comparing to original code. It is also converted to SWIPE which uses all | |
8 % the harmonics of the signal. | |
9 % | |
10 %SWIPEP Pitch estimation using SWIPE'. | |
11 % P = SWIPEP(X,Fs,[PMIN PMAX],DT,DLOG2P,DERBS,STHR) estimates the pitch | |
12 % of the vector signal X every DT seconds. The sampling frequency of | |
13 % the signal is Fs (in Hertz). The spectrum is computed using a Hann | |
14 % window with an overlap WOVERLAP between 0 and 1. The spectrum is | |
15 % sampled uniformly in the ERB scale with a step size of DERBS ERBs. The | |
16 % pitch is searched within the range [PMIN PMAX] (in Hertz) with samples | |
17 % distributed every DLOG2P units on a base-2 logarithmic scale of Hertz. | |
18 % The pitch is fine-tuned using parabolic interpolation with a resolution | |
19 % of 1 cent. Pitch estimates with a strength lower than STHR are treated | |
20 % as undefined. | |
21 % | |
22 % [P,T,S] = SWIPEP(X,Fs,[PMIN PMAX],DT,DLOG2P,DERBS,WOVERLAP,STHR) | |
23 % returns the times T at which the pitch was estimated and the pitch | |
24 % strength S of every pitch estimate. | |
25 % | |
26 % P = SWIPEP(X,Fs) estimates the pitch using the default settings PMIN = | |
27 % 30 Hz, PMAX = 5000 Hz, DT = 0.001 s, DLOG2P = 1/48 (48 steps per | |
28 % octave), DERBS = 0.1 ERBs, WOVERLAP = 0.5, and STHR = -Inf. | |
29 % | |
30 % P = SWIPEP(X,Fs,...,[],...) uses the default setting for the parameter | |
31 % replaced with the placeholder []. | |
32 % | |
33 % REMARKS: (1) For better results, make DLOG2P and DERBS as small as | |
34 % possible and WOVERLAP as large as possible. However, take into account | |
35 % that the computational complexity of the algorithm is inversely | |
36 % proportional to DLOG2P, DERBS and 1-WOVERLAP, and that the default | |
37 % values have been found empirically to produce good results. Consider | |
38 % also that the computational complexity is directly proportional to the | |
39 % number of octaves in the pitch search range, and therefore , it is | |
40 % recommendable to restrict the search range to the expected range of | |
41 % pitch, if any. (2) This code implements SWIPE', which uses only the | |
42 % first and prime harmonics of the signal. To convert it into SWIPE, | |
43 % which uses all the harmonics of the signal, replace the word | |
44 % PRIMES with a colon (it is located almost at the end of the code). | |
45 % However, this may not be recommendable since SWIPE' is reported to | |
46 % produce on average better results than SWIPE (Camacho and Harris, | |
47 % 2008). | |
48 % | |
49 % EXAMPLE: Estimate the pitch of the signal X every 10 ms within the | |
50 % range 75-500 Hz using the default resolution (i.e., 48 steps per | |
51 % octave), sampling the spectrum every 1/20th of ERB, using a window | |
52 % overlap factor of 50%, and discarding samples with pitch strength | |
53 % lower than 0.2. Plot the pitch trace. | |
54 % [x,Fs] = wavread(filename); | |
55 % [p,t,s] = swipep(x,Fs,[75 500],0.01,[],1/20,0.5,0.2); | |
56 % plot(1000*t,p) | |
57 % xlabel('Time (ms)') | |
58 % ylabel('Pitch (Hz)') | |
59 % | |
60 % REFERENCES: Camacho, A., Harris, J.G, (2008) "A sawtooth waveform | |
61 % inspired pitch estimator for speech and music," J. Acoust. Soc. Am. | |
62 % 124, 1638-1652. | |
63 if ~ exist( 'plim', 'var' ) || isempty(plim), plim = [30 5000]; end | |
64 %if ~ exist( 'dt', 'var' ) || isempty(dt), dt = 0.001; end | |
65 if ~ exist( 'dlog2p', 'var' ) || isempty(dlog2p), dlog2p = 1/48; end | |
66 if ~ exist( 'dERBs', 'var' ) || isempty(dERBs), dERBs = 0.05; end | |
67 % if ~ exist( 'woverlap', 'var' ) || isempty(woverlap) | |
68 % woverlap = 0.5; | |
69 % elseif woverlap>1 || woverlap<0 | |
70 % error('Window overlap must be between 0 and 1.') | |
71 % end | |
72 if ~ exist( 'sTHR', 'var' ) || isempty(sTHR), sTHR = -Inf; end | |
73 %t = [ 0: dt: length(x)/fs ]'; % Times | |
74 % Define pitch candidates | |
75 log2pc = [ log2(plim(1)): dlog2p: log2(plim(2)) ]'; | |
76 pc = 2 .^ log2pc; | |
77 S = zeros( length(pc), 1 ); % Pitch strength matrix | |
78 % Determine P2-WSs | |
79 %logWs = round( log2( 8*fs ./ plim ) ); | |
80 ws = [2822];%2.^[ logWs(1): -1: logWs(2) ]; % P2-WSs | |
81 pO = 8 * fs ./ ws; % Optimal pitches for P2-WSs | |
82 % Determine window sizes used by each pitch candidate | |
83 d = 1 + log2pc - log2( 8*fs./ws(1) ); | |
84 % Create ERB-scale uniformly-spaced frequencies (in Hertz) | |
85 fERBs = erbs2hz([ hz2erbs(min(pc)/4): dERBs: hz2erbs(fs/2) ]'); | |
86 for i = 1 : length(ws) | |
87 %dn = max( 1, round( 8*(1-woverlap) * fs / pO(i) ) ); % Hop size | |
88 % Zero pad signal | |
89 %xzp = [ zeros( ws(i)/2, 1 ); x(:); zeros( dn + ws(i)/2, 1 ) ]; | |
90 % Compute spectrum | |
91 %w = hanning( ws(i) ); % Hann window | |
92 %o = max( 0, round( ws(i) - dn ) ); % Window overlap | |
93 %[ X, f, ti ] = specgram( xzp, ws(i), fs, w, o ); | |
94 % Select candidates that use this window size | |
95 if length(ws) == 1 | |
96 j=[1:size(pc)]'; k = []; | |
97 elseif i == length(ws) | |
98 j=find(d-i>-1); k=find(d(j)-i<0); | |
99 elseif i==1 | |
100 j=find(d-i<1); k=find(d(j)-i>0); | |
101 else | |
102 j=find(abs(d-i)<1); k=1:length(j); | |
103 end | |
104 % Compute loudness at ERBs uniformly-spaced frequencies | |
105 fERBs = fERBs( find( fERBs > pc(1)/4, 1, 'first' ) : end ); | |
106 L = sqrt( max( 0, interp1( f, X, fERBs, 'spline', 0) ) ); | |
107 % Compute pitch strength | |
108 Si = pitchStrengthAllCandidates( fERBs, L, pc ); | |
109 % Interpolate pitch strength at desired times | |
110 % if size(Si,2) > 1 | |
111 % warning off MATLAB:interp1:NaNinY | |
112 % Si = interp1( ti, Si', t, 'linear', NaN )'; | |
113 % warning on MATLAB:interp1:NaNinY | |
114 % else | |
115 % Si = repmat( NaN, length(Si),1 ); | |
116 % end | |
117 % Add pitch strength to combination | |
118 % lambda = d( j(k) ) - i; | |
119 mu = ones( size(j) ); | |
120 % mu(k) = 1 - abs( lambda ); | |
121 S(j,:) = S(j,:) + repmat(mu,1,size(Si,2)) .* Si; | |
122 end | |
123 % Fine tune pitch using parabolic interpolation | |
124 p = repmat( NaN, size(S,2), 1 ); | |
125 s = repmat( NaN, size(S,2), 1 ); | |
126 for j = 1 : size(S,2) | |
127 [ s(j), i ] = max( S(:,j), [], 1 ); | |
128 if s(j) < sTHR, continue, end | |
129 if i == 1 || i == length(pc) | |
130 p(j) = pc(i); | |
131 else | |
132 I = i-1 : i+1; | |
133 tc = 1 ./ pc(I); | |
134 ntc = ( tc/tc(2) - 1 ) * 2*pi; | |
135 c = polyfit( ntc, S(I,j), 2 ); | |
136 ftc = 1 ./ 2.^[ log2(pc(I(1))): 1/12/100: log2(pc(I(3))) ]; | |
137 nftc = ( ftc/tc(2) - 1 ) * 2*pi; | |
138 [s(j) k] = max( polyval( c, nftc ) ); | |
139 p(j) = 2 ^ ( log2(pc(I(1))) + (k-1)/12/100 ); | |
140 % if (p(j)-pc(I(1)))<0.75*abs(p(j)-pc(I(2))) | |
141 % p(j)=pc(I(1)); | |
142 % elseif (pc(I(3))-p(j))<0.75*abs(p(j)-pc(I(2))) | |
143 % p(j)=pc(I(3)); | |
144 % else | |
145 p(j)=pc(I(2)); | |
146 % end | |
147 end | |
148 end | |
149 | |
150 function S = pitchStrengthAllCandidates( f, L, pc ) | |
151 % Create pitch strength matrix | |
152 S = zeros( length(pc), size(L,2) ); | |
153 % Define integration regions | |
154 k = ones( 1, length(pc)+1 ); | |
155 for j = 1 : length(k)-1 | |
156 k(j+1) = k(j) - 1 + find( f(k(j):end) > pc(j)/4, 1, 'first' ); | |
157 end | |
158 k = k(2:end); | |
159 % Create loudness normalization matrix | |
160 N = sqrt( flipud( cumsum( flipud(L.*L) ) ) ); | |
161 for j = 1 : length(pc) | |
162 % Normalize loudness | |
163 warning off MATLAB:divideByZero | |
164 NL = L(k(j):end,:) ./ repmat( N(k(j),:), size(L,1)-k(j)+1, 1); | |
165 warning on MATLAB:divideByZero | |
166 % Compute pitch strength | |
167 | |
168 S(j,:) = pitchStrengthOneCandidate( f(k(j):end), NL, pc(j) ); | |
169 end | |
170 | |
171 function S = pitchStrengthOneCandidate( f, NL, pc ) | |
172 n = fix( f(end)/pc - 0.75 ); % Number of harmonics | |
173 if n==0, S=NaN; return, end | |
174 k = zeros( size(f) ); % Kernel | |
175 % Normalize frequency w.r.t. candidate | |
176 q = f / pc; | |
177 % Create kernel | |
178 for i = [ 1:n] % primes(n) ] | |
179 a = abs( q - i ); | |
180 % Peak's weigth | |
181 p = a < .25; | |
182 k(p) = cos( 2*pi * q(p) ); | |
183 % Valleys' weights | |
184 v = .25 < a & a < .75; | |
185 k(v) = k(v) + cos( 2*pi * q(v) ) / 2; | |
186 end | |
187 % Apply envelope | |
188 k = k .* sqrt( 1./f ); | |
189 % K+-normalize kernel | |
190 k = k / norm( k(k>0) ); | |
191 % Compute pitch strength | |
192 S = k' * NL; | |
193 | |
194 function erbs = hz2erbs(hz) | |
195 erbs = 21.4 * log10( 1 + hz/229 ); | |
196 | |
197 function hz = erbs2hz(erbs) | |
198 hz = ( 10 .^ (erbs./21.4) - 1 ) * 229; |