wolffd@0
|
1 function [aCoeff,resid,pitch,G,parcor,stream] = proclpc(data,sr,L,fr,fs,preemp)
|
wolffd@0
|
2 % USAGE: [aCoeff,resid,pitch,G,parcor,stream] = proclpc(data,sr,L,fr,fs,preemp)
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % This function computes the LPC (linear-predictive coding) coefficients that
|
wolffd@0
|
5 % describe a speech signal. The LPC coefficients are a short-time measure of
|
wolffd@0
|
6 % the speech signal which describe the signal as the output of an all-pole
|
wolffd@0
|
7 % filter. This all-pole filter provides a good description of the speech
|
wolffd@0
|
8 % articulators; thus LPC analysis is often used in speech recognition and
|
wolffd@0
|
9 % speech coding systems. The LPC parameters are recalculated, by default in
|
wolffd@0
|
10 % this implementation, every 20ms.
|
wolffd@0
|
11 %
|
wolffd@0
|
12 % The results of LPC analysis are a new representation of the signal
|
wolffd@0
|
13 % s(n) = G e(n) - sum from 1 to L a(i)s(n-i)
|
wolffd@0
|
14 % where s(n) is the original data. a(i) and e(n) are the outputs of the LPC
|
wolffd@0
|
15 % analysis with a(i) representing the LPC model. The e(n) term represents
|
wolffd@0
|
16 % either the speech source's excitation, or the residual: the details of the
|
wolffd@0
|
17 % signal that are not captured by the LPC coefficients. The G factor is a
|
wolffd@0
|
18 % gain term.
|
wolffd@0
|
19 %
|
wolffd@0
|
20 % LPC analysis is performed on a monaural sound vector (data) which has been
|
wolffd@0
|
21 % sampled at a sampling rate of "sr". The following optional parameters modify
|
wolffd@0
|
22 % the behaviour of this algorithm.
|
wolffd@0
|
23 % L - The order of the analysis. There are L+1 LPC coefficients in the output
|
wolffd@0
|
24 % array aCoeff for each frame of data. L defaults to 13.
|
wolffd@0
|
25 % fr - Frame time increment, in ms. The LPC analysis is done starting every
|
wolffd@0
|
26 % fr ms in time. Defaults to 20ms (50 LPC vectors a second)
|
wolffd@0
|
27 % fs - Frame size in ms. The LPC analysis is done by windowing the speech
|
wolffd@0
|
28 % data with a rectangular window that is fs ms long. Defaults to 30ms
|
wolffd@0
|
29 % preemp - This variable is the epsilon in a digital one-zero filter which
|
wolffd@0
|
30 % serves to preemphasize the speech signal and compensate for the 6dB
|
wolffd@0
|
31 % per octave rolloff in the radiation function. Defaults to .9378.
|
wolffd@0
|
32 %
|
wolffd@0
|
33 % The output variables from this function are
|
wolffd@0
|
34 % aCoeff - The LPC analysis results, a(i). One column of L numbers for each
|
wolffd@0
|
35 % frame of data
|
wolffd@0
|
36 % resid - The LPC residual, e(n). One column of sr*fs samples representing
|
wolffd@0
|
37 % the excitation or residual of the LPC filter.
|
wolffd@0
|
38 % pitch - A frame-by-frame estimate of the pitch of the signal, calculated
|
wolffd@0
|
39 % by finding the peak in the residual's autocorrelation for each frame.
|
wolffd@0
|
40 % G - The LPC gain for each frame.
|
wolffd@0
|
41 % parcor - The parcor coefficients. The parcor coefficients give the ratio
|
wolffd@0
|
42 % between adjacent sections in a tubular model of the speech
|
wolffd@0
|
43 % articulators. There are L parcor coefficients for each frame of
|
wolffd@0
|
44 % speech.
|
wolffd@0
|
45 % stream - The LPC analysis' residual or excitation signal as one long vector.
|
wolffd@0
|
46 % Overlapping frames of the resid output combined into a new one-
|
wolffd@0
|
47 % dimensional signal and post-filtered.
|
wolffd@0
|
48 %
|
wolffd@0
|
49 % The synlpc routine inverts this transform and returns the original speech
|
wolffd@0
|
50 % signal.
|
wolffd@0
|
51 %
|
wolffd@0
|
52 % This code was graciously provided by:
|
wolffd@0
|
53 % Delores Etter (University of Colorado, Boulder) and
|
wolffd@0
|
54 % Professor Geoffrey Orsak (Southern Methodist University)
|
wolffd@0
|
55 % It was first published in
|
wolffd@0
|
56 % Orsak, G.C. et al. "Collaborative SP education using the Internet and
|
wolffd@0
|
57 % MATLAB" IEEE SIGNAL PROCESSING MAGAZINE Nov. 1995. vol.12, no.6, pp.
|
wolffd@0
|
58 % 23-32.
|
wolffd@0
|
59 % Modified and debugging plots added by Kate Nguyen and Malcolm Slaney
|
wolffd@0
|
60
|
wolffd@0
|
61 % A more complete set of routines for LPC analysis can be found at
|
wolffd@0
|
62 % http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
|
wolffd@0
|
63
|
wolffd@0
|
64 % (c) 1998 Interval Research Corporation
|
wolffd@0
|
65
|
wolffd@0
|
66
|
wolffd@0
|
67 if (nargin<3), L = 13; end
|
wolffd@0
|
68 if (nargin<4), fr = 20; end
|
wolffd@0
|
69 if (nargin<5), fs = 30; end
|
wolffd@0
|
70 if (nargin<6), preemp = .9378; end
|
wolffd@0
|
71
|
wolffd@0
|
72 [row col] = size(data);
|
wolffd@0
|
73 if col==1 data=data'; end
|
wolffd@0
|
74
|
wolffd@0
|
75 nframe = 0;
|
wolffd@0
|
76 msfr = round(sr/1000*fr); % Convert ms to samples
|
wolffd@0
|
77 msfs = round(sr/1000*fs); % Convert ms to samples
|
wolffd@0
|
78 duration = length(data);
|
wolffd@0
|
79 speech = filter([1 -preemp], 1, data)'; % Preemphasize speech
|
wolffd@0
|
80 msoverlap = msfs - msfr;
|
wolffd@0
|
81 ramp = [0:1/(msoverlap-1):1]'; % Compute part of window
|
wolffd@0
|
82
|
wolffd@0
|
83
|
wolffd@0
|
84 for frameIndex=1:msfr:duration-msfs+1 % frame rate=20ms
|
wolffd@0
|
85 frameData = speech(frameIndex:(frameIndex+msfs-1)); % frame size=30ms
|
wolffd@0
|
86 nframe = nframe+1;
|
wolffd@0
|
87 autoCor = xcorr(frameData); % Compute the cross correlation
|
wolffd@0
|
88 autoCorVec = autoCor(msfs+[0:L]);
|
wolffd@0
|
89
|
wolffd@0
|
90 % Levinson's method
|
wolffd@0
|
91 err(1) = autoCorVec(1);
|
wolffd@0
|
92 k(1) = 0;
|
wolffd@0
|
93 A = [];
|
wolffd@0
|
94 for index=1:L
|
wolffd@0
|
95 numerator = [1 A.']*autoCorVec(index+1:-1:2);
|
wolffd@0
|
96 denominator = -1*err(index);
|
wolffd@0
|
97 k(index) = numerator/denominator; % PARCOR coeffs
|
wolffd@0
|
98 A = [A+k(index)*flipud(A); k(index)];
|
wolffd@0
|
99 err(index+1) = (1-k(index)^2)*err(index);
|
wolffd@0
|
100 end
|
wolffd@0
|
101
|
wolffd@0
|
102 aCoeff(:,nframe) = [1; A];
|
wolffd@0
|
103 parcor(:,nframe) = k';
|
wolffd@0
|
104
|
wolffd@0
|
105 % Calculate the filter
|
wolffd@0
|
106 % response
|
wolffd@0
|
107 % by evaluating the
|
wolffd@0
|
108 % z-transform
|
wolffd@0
|
109 if 0
|
wolffd@0
|
110 gain=0;
|
wolffd@0
|
111 cft=0:(1/255):1;
|
wolffd@0
|
112 for index=1:L
|
wolffd@0
|
113 gain = gain + aCoeff(index,nframe)*exp(-i*2*pi*cft).^index;
|
wolffd@0
|
114 end
|
wolffd@0
|
115 gain = abs(1./gain);
|
wolffd@0
|
116 spec(:,nframe) = 20*log10(gain(1:128))';
|
wolffd@0
|
117 plot(20*log10(gain));
|
wolffd@0
|
118 title(nframe);
|
wolffd@0
|
119 drawnow;
|
wolffd@0
|
120 end
|
wolffd@0
|
121
|
wolffd@0
|
122 % Calculate the filter response
|
wolffd@0
|
123 % from the filter's impulse
|
wolffd@0
|
124 % response (to check above).
|
wolffd@0
|
125 if 0
|
wolffd@0
|
126 impulseResponse = filter(1, aCoeff(:,nframe), [1 zeros(1,255)]);
|
wolffd@0
|
127 freqResp = 20*log10(abs(fft(impulseResponse)));
|
wolffd@0
|
128 plot(freqResp);
|
wolffd@0
|
129 end
|
wolffd@0
|
130
|
wolffd@0
|
131 errSig = filter([1 A'],1,frameData); % find excitation noise
|
wolffd@0
|
132
|
wolffd@0
|
133 G(nframe) = sqrt(err(L+1)); % gain
|
wolffd@0
|
134 autoCorErr = xcorr(errSig); % calculate pitch & voicing
|
wolffd@0
|
135 % information
|
wolffd@0
|
136 [B,I] = sort(autoCorErr);
|
wolffd@0
|
137 num = length(I);
|
wolffd@0
|
138 if B(num-1) > .3*B(num)
|
wolffd@0
|
139 pitch(nframe) = abs(I(num) - I(num-1));
|
wolffd@0
|
140 else
|
wolffd@0
|
141 pitch(nframe) = 0;
|
wolffd@0
|
142 end
|
wolffd@0
|
143
|
wolffd@0
|
144 resid(:,nframe) = errSig/G(nframe);
|
wolffd@0
|
145 if(frameIndex==1) % add residual frames using a
|
wolffd@0
|
146 stream = resid(1:msfr,nframe); % trapezoidal window
|
wolffd@0
|
147 else
|
wolffd@0
|
148 stream = [stream;
|
wolffd@0
|
149 overlap+resid(1:msoverlap,nframe).*ramp;
|
wolffd@0
|
150 resid(msoverlap+1:msfr,nframe)];
|
wolffd@0
|
151 end
|
wolffd@0
|
152 if(frameIndex+msfr+msfs-1 > duration)
|
wolffd@0
|
153 stream = [stream; resid(msfr+1:msfs,nframe)];
|
wolffd@0
|
154 else
|
wolffd@0
|
155 overlap = resid(msfr+1:msfs,nframe).*flipud(ramp);
|
wolffd@0
|
156 end
|
wolffd@0
|
157 end
|
wolffd@0
|
158 stream = filter(1, [1 -preemp], stream)';
|