Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/AuditoryToolbox/proclpc.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [aCoeff,resid,pitch,G,parcor,stream] = proclpc(data,sr,L,fr,fs,preemp) | |
2 % USAGE: [aCoeff,resid,pitch,G,parcor,stream] = proclpc(data,sr,L,fr,fs,preemp) | |
3 % | |
4 % This function computes the LPC (linear-predictive coding) coefficients that | |
5 % describe a speech signal. The LPC coefficients are a short-time measure of | |
6 % the speech signal which describe the signal as the output of an all-pole | |
7 % filter. This all-pole filter provides a good description of the speech | |
8 % articulators; thus LPC analysis is often used in speech recognition and | |
9 % speech coding systems. The LPC parameters are recalculated, by default in | |
10 % this implementation, every 20ms. | |
11 % | |
12 % The results of LPC analysis are a new representation of the signal | |
13 % s(n) = G e(n) - sum from 1 to L a(i)s(n-i) | |
14 % where s(n) is the original data. a(i) and e(n) are the outputs of the LPC | |
15 % analysis with a(i) representing the LPC model. The e(n) term represents | |
16 % either the speech source's excitation, or the residual: the details of the | |
17 % signal that are not captured by the LPC coefficients. The G factor is a | |
18 % gain term. | |
19 % | |
20 % LPC analysis is performed on a monaural sound vector (data) which has been | |
21 % sampled at a sampling rate of "sr". The following optional parameters modify | |
22 % the behaviour of this algorithm. | |
23 % L - The order of the analysis. There are L+1 LPC coefficients in the output | |
24 % array aCoeff for each frame of data. L defaults to 13. | |
25 % fr - Frame time increment, in ms. The LPC analysis is done starting every | |
26 % fr ms in time. Defaults to 20ms (50 LPC vectors a second) | |
27 % fs - Frame size in ms. The LPC analysis is done by windowing the speech | |
28 % data with a rectangular window that is fs ms long. Defaults to 30ms | |
29 % preemp - This variable is the epsilon in a digital one-zero filter which | |
30 % serves to preemphasize the speech signal and compensate for the 6dB | |
31 % per octave rolloff in the radiation function. Defaults to .9378. | |
32 % | |
33 % The output variables from this function are | |
34 % aCoeff - The LPC analysis results, a(i). One column of L numbers for each | |
35 % frame of data | |
36 % resid - The LPC residual, e(n). One column of sr*fs samples representing | |
37 % the excitation or residual of the LPC filter. | |
38 % pitch - A frame-by-frame estimate of the pitch of the signal, calculated | |
39 % by finding the peak in the residual's autocorrelation for each frame. | |
40 % G - The LPC gain for each frame. | |
41 % parcor - The parcor coefficients. The parcor coefficients give the ratio | |
42 % between adjacent sections in a tubular model of the speech | |
43 % articulators. There are L parcor coefficients for each frame of | |
44 % speech. | |
45 % stream - The LPC analysis' residual or excitation signal as one long vector. | |
46 % Overlapping frames of the resid output combined into a new one- | |
47 % dimensional signal and post-filtered. | |
48 % | |
49 % The synlpc routine inverts this transform and returns the original speech | |
50 % signal. | |
51 % | |
52 % This code was graciously provided by: | |
53 % Delores Etter (University of Colorado, Boulder) and | |
54 % Professor Geoffrey Orsak (Southern Methodist University) | |
55 % It was first published in | |
56 % Orsak, G.C. et al. "Collaborative SP education using the Internet and | |
57 % MATLAB" IEEE SIGNAL PROCESSING MAGAZINE Nov. 1995. vol.12, no.6, pp. | |
58 % 23-32. | |
59 % Modified and debugging plots added by Kate Nguyen and Malcolm Slaney | |
60 | |
61 % A more complete set of routines for LPC analysis can be found at | |
62 % http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html | |
63 | |
64 % (c) 1998 Interval Research Corporation | |
65 | |
66 | |
67 if (nargin<3), L = 13; end | |
68 if (nargin<4), fr = 20; end | |
69 if (nargin<5), fs = 30; end | |
70 if (nargin<6), preemp = .9378; end | |
71 | |
72 [row col] = size(data); | |
73 if col==1 data=data'; end | |
74 | |
75 nframe = 0; | |
76 msfr = round(sr/1000*fr); % Convert ms to samples | |
77 msfs = round(sr/1000*fs); % Convert ms to samples | |
78 duration = length(data); | |
79 speech = filter([1 -preemp], 1, data)'; % Preemphasize speech | |
80 msoverlap = msfs - msfr; | |
81 ramp = [0:1/(msoverlap-1):1]'; % Compute part of window | |
82 | |
83 | |
84 for frameIndex=1:msfr:duration-msfs+1 % frame rate=20ms | |
85 frameData = speech(frameIndex:(frameIndex+msfs-1)); % frame size=30ms | |
86 nframe = nframe+1; | |
87 autoCor = xcorr(frameData); % Compute the cross correlation | |
88 autoCorVec = autoCor(msfs+[0:L]); | |
89 | |
90 % Levinson's method | |
91 err(1) = autoCorVec(1); | |
92 k(1) = 0; | |
93 A = []; | |
94 for index=1:L | |
95 numerator = [1 A.']*autoCorVec(index+1:-1:2); | |
96 denominator = -1*err(index); | |
97 k(index) = numerator/denominator; % PARCOR coeffs | |
98 A = [A+k(index)*flipud(A); k(index)]; | |
99 err(index+1) = (1-k(index)^2)*err(index); | |
100 end | |
101 | |
102 aCoeff(:,nframe) = [1; A]; | |
103 parcor(:,nframe) = k'; | |
104 | |
105 % Calculate the filter | |
106 % response | |
107 % by evaluating the | |
108 % z-transform | |
109 if 0 | |
110 gain=0; | |
111 cft=0:(1/255):1; | |
112 for index=1:L | |
113 gain = gain + aCoeff(index,nframe)*exp(-i*2*pi*cft).^index; | |
114 end | |
115 gain = abs(1./gain); | |
116 spec(:,nframe) = 20*log10(gain(1:128))'; | |
117 plot(20*log10(gain)); | |
118 title(nframe); | |
119 drawnow; | |
120 end | |
121 | |
122 % Calculate the filter response | |
123 % from the filter's impulse | |
124 % response (to check above). | |
125 if 0 | |
126 impulseResponse = filter(1, aCoeff(:,nframe), [1 zeros(1,255)]); | |
127 freqResp = 20*log10(abs(fft(impulseResponse))); | |
128 plot(freqResp); | |
129 end | |
130 | |
131 errSig = filter([1 A'],1,frameData); % find excitation noise | |
132 | |
133 G(nframe) = sqrt(err(L+1)); % gain | |
134 autoCorErr = xcorr(errSig); % calculate pitch & voicing | |
135 % information | |
136 [B,I] = sort(autoCorErr); | |
137 num = length(I); | |
138 if B(num-1) > .3*B(num) | |
139 pitch(nframe) = abs(I(num) - I(num-1)); | |
140 else | |
141 pitch(nframe) = 0; | |
142 end | |
143 | |
144 resid(:,nframe) = errSig/G(nframe); | |
145 if(frameIndex==1) % add residual frames using a | |
146 stream = resid(1:msfr,nframe); % trapezoidal window | |
147 else | |
148 stream = [stream; | |
149 overlap+resid(1:msoverlap,nframe).*ramp; | |
150 resid(msoverlap+1:msfr,nframe)]; | |
151 end | |
152 if(frameIndex+msfr+msfs-1 > duration) | |
153 stream = [stream; resid(msfr+1:msfs,nframe)]; | |
154 else | |
155 overlap = resid(msfr+1:msfs,nframe).*flipud(ramp); | |
156 end | |
157 end | |
158 stream = filter(1, [1 -preemp], stream)'; |