camir-aes2014: toolboxes/MIRtoolbox1.3.2/AuditoryToolbox/proclpc.m annotate

annotate toolboxes/MIRtoolbox1.3.2/AuditoryToolbox/proclpc.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 function [aCoeff,resid,pitch,G,parcor,stream] = proclpc(data,sr,L,fr,fs,preemp)
wolffd@0	2 % USAGE: [aCoeff,resid,pitch,G,parcor,stream] = proclpc(data,sr,L,fr,fs,preemp)
wolffd@0	3 %
wolffd@0	4 % This function computes the LPC (linear-predictive coding) coefficients that
wolffd@0	5 % describe a speech signal. The LPC coefficients are a short-time measure of
wolffd@0	6 % the speech signal which describe the signal as the output of an all-pole
wolffd@0	7 % filter. This all-pole filter provides a good description of the speech
wolffd@0	8 % articulators; thus LPC analysis is often used in speech recognition and
wolffd@0	9 % speech coding systems. The LPC parameters are recalculated, by default in
wolffd@0	10 % this implementation, every 20ms.
wolffd@0	11 %
wolffd@0	12 % The results of LPC analysis are a new representation of the signal
wolffd@0	13 % s(n) = G e(n) - sum from 1 to L a(i)s(n-i)
wolffd@0	14 % where s(n) is the original data. a(i) and e(n) are the outputs of the LPC
wolffd@0	15 % analysis with a(i) representing the LPC model. The e(n) term represents
wolffd@0	16 % either the speech source's excitation, or the residual: the details of the
wolffd@0	17 % signal that are not captured by the LPC coefficients. The G factor is a
wolffd@0	18 % gain term.
wolffd@0	19 %
wolffd@0	20 % LPC analysis is performed on a monaural sound vector (data) which has been
wolffd@0	21 % sampled at a sampling rate of "sr". The following optional parameters modify
wolffd@0	22 % the behaviour of this algorithm.
wolffd@0	23 % L - The order of the analysis. There are L+1 LPC coefficients in the output
wolffd@0	24 % array aCoeff for each frame of data. L defaults to 13.
wolffd@0	25 % fr - Frame time increment, in ms. The LPC analysis is done starting every
wolffd@0	26 % fr ms in time. Defaults to 20ms (50 LPC vectors a second)
wolffd@0	27 % fs - Frame size in ms. The LPC analysis is done by windowing the speech
wolffd@0	28 % data with a rectangular window that is fs ms long. Defaults to 30ms
wolffd@0	29 % preemp - This variable is the epsilon in a digital one-zero filter which
wolffd@0	30 % serves to preemphasize the speech signal and compensate for the 6dB
wolffd@0	31 % per octave rolloff in the radiation function. Defaults to .9378.
wolffd@0	32 %
wolffd@0	33 % The output variables from this function are
wolffd@0	34 % aCoeff - The LPC analysis results, a(i). One column of L numbers for each
wolffd@0	35 % frame of data
wolffd@0	36 % resid - The LPC residual, e(n). One column of sr*fs samples representing
wolffd@0	37 % the excitation or residual of the LPC filter.
wolffd@0	38 % pitch - A frame-by-frame estimate of the pitch of the signal, calculated
wolffd@0	39 % by finding the peak in the residual's autocorrelation for each frame.
wolffd@0	40 % G - The LPC gain for each frame.
wolffd@0	41 % parcor - The parcor coefficients. The parcor coefficients give the ratio
wolffd@0	42 % between adjacent sections in a tubular model of the speech
wolffd@0	43 % articulators. There are L parcor coefficients for each frame of
wolffd@0	44 % speech.
wolffd@0	45 % stream - The LPC analysis' residual or excitation signal as one long vector.
wolffd@0	46 % Overlapping frames of the resid output combined into a new one-
wolffd@0	47 % dimensional signal and post-filtered.
wolffd@0	48 %
wolffd@0	49 % The synlpc routine inverts this transform and returns the original speech
wolffd@0	50 % signal.
wolffd@0	51 %
wolffd@0	52 % This code was graciously provided by:
wolffd@0	53 % Delores Etter (University of Colorado, Boulder) and
wolffd@0	54 % Professor Geoffrey Orsak (Southern Methodist University)
wolffd@0	55 % It was first published in
wolffd@0	56 % Orsak, G.C. et al. "Collaborative SP education using the Internet and
wolffd@0	57 % MATLAB" IEEE SIGNAL PROCESSING MAGAZINE Nov. 1995. vol.12, no.6, pp.
wolffd@0	58 % 23-32.
wolffd@0	59 % Modified and debugging plots added by Kate Nguyen and Malcolm Slaney
wolffd@0	60
wolffd@0	61 % A more complete set of routines for LPC analysis can be found at
wolffd@0	62 % http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
wolffd@0	63
wolffd@0	64 % (c) 1998 Interval Research Corporation
wolffd@0	65
wolffd@0	66
wolffd@0	67 if (nargin<3), L = 13; end
wolffd@0	68 if (nargin<4), fr = 20; end
wolffd@0	69 if (nargin<5), fs = 30; end
wolffd@0	70 if (nargin<6), preemp = .9378; end
wolffd@0	71
wolffd@0	72 [row col] = size(data);
wolffd@0	73 if col==1 data=data'; end
wolffd@0	74
wolffd@0	75 nframe = 0;
wolffd@0	76 msfr = round(sr/1000*fr); % Convert ms to samples
wolffd@0	77 msfs = round(sr/1000*fs); % Convert ms to samples
wolffd@0	78 duration = length(data);
wolffd@0	79 speech = filter([1 -preemp], 1, data)'; % Preemphasize speech
wolffd@0	80 msoverlap = msfs - msfr;
wolffd@0	81 ramp = [0:1/(msoverlap-1):1]'; % Compute part of window
wolffd@0	82
wolffd@0	83
wolffd@0	84 for frameIndex=1:msfr:duration-msfs+1 % frame rate=20ms
wolffd@0	85 frameData = speech(frameIndex:(frameIndex+msfs-1)); % frame size=30ms
wolffd@0	86 nframe = nframe+1;
wolffd@0	87 autoCor = xcorr(frameData); % Compute the cross correlation
wolffd@0	88 autoCorVec = autoCor(msfs+[0:L]);
wolffd@0	89
wolffd@0	90 % Levinson's method
wolffd@0	91 err(1) = autoCorVec(1);
wolffd@0	92 k(1) = 0;
wolffd@0	93 A = [];
wolffd@0	94 for index=1:L
wolffd@0	95 numerator = [1 A.']*autoCorVec(index+1:-1:2);
wolffd@0	96 denominator = -1*err(index);
wolffd@0	97 k(index) = numerator/denominator; % PARCOR coeffs
wolffd@0	98 A = [A+k(index)*flipud(A); k(index)];
wolffd@0	99 err(index+1) = (1-k(index)^2)*err(index);
wolffd@0	100 end
wolffd@0	101
wolffd@0	102 aCoeff(:,nframe) = [1; A];
wolffd@0	103 parcor(:,nframe) = k';
wolffd@0	104
wolffd@0	105 % Calculate the filter
wolffd@0	106 % response
wolffd@0	107 % by evaluating the
wolffd@0	108 % z-transform
wolffd@0	109 if 0
wolffd@0	110 gain=0;
wolffd@0	111 cft=0:(1/255):1;
wolffd@0	112 for index=1:L
wolffd@0	113 gain = gain + aCoeff(index,nframe)exp(-i2picft).^index;
wolffd@0	114 end
wolffd@0	115 gain = abs(1./gain);
wolffd@0	116 spec(:,nframe) = 20*log10(gain(1:128))';
wolffd@0	117 plot(20*log10(gain));
wolffd@0	118 title(nframe);
wolffd@0	119 drawnow;
wolffd@0	120 end
wolffd@0	121
wolffd@0	122 % Calculate the filter response
wolffd@0	123 % from the filter's impulse
wolffd@0	124 % response (to check above).
wolffd@0	125 if 0
wolffd@0	126 impulseResponse = filter(1, aCoeff(:,nframe), [1 zeros(1,255)]);
wolffd@0	127 freqResp = 20*log10(abs(fft(impulseResponse)));
wolffd@0	128 plot(freqResp);
wolffd@0	129 end
wolffd@0	130
wolffd@0	131 errSig = filter([1 A'],1,frameData); % find excitation noise
wolffd@0	132
wolffd@0	133 G(nframe) = sqrt(err(L+1)); % gain
wolffd@0	134 autoCorErr = xcorr(errSig); % calculate pitch & voicing
wolffd@0	135 % information
wolffd@0	136 [B,I] = sort(autoCorErr);
wolffd@0	137 num = length(I);
wolffd@0	138 if B(num-1) > .3*B(num)
wolffd@0	139 pitch(nframe) = abs(I(num) - I(num-1));
wolffd@0	140 else
wolffd@0	141 pitch(nframe) = 0;
wolffd@0	142 end
wolffd@0	143
wolffd@0	144 resid(:,nframe) = errSig/G(nframe);
wolffd@0	145 if(frameIndex==1) % add residual frames using a
wolffd@0	146 stream = resid(1:msfr,nframe); % trapezoidal window
wolffd@0	147 else
wolffd@0	148 stream = [stream;
wolffd@0	149 overlap+resid(1:msoverlap,nframe).*ramp;
wolffd@0	150 resid(msoverlap+1:msfr,nframe)];
wolffd@0	151 end
wolffd@0	152 if(frameIndex+msfr+msfs-1 > duration)
wolffd@0	153 stream = [stream; resid(msfr+1:msfs,nframe)];
wolffd@0	154 else
wolffd@0	155 overlap = resid(msfr+1:msfs,nframe).*flipud(ramp);
wolffd@0	156 end
wolffd@0	157 end
wolffd@0	158 stream = filter(1, [1 -preemp], stream)';

Mercurial > hg > camir-aes2014

annotate toolboxes/MIRtoolbox1.3.2/AuditoryToolbox/proclpc.m @ 0:e9a9cd732c1e tip