diff ComputeFeatures.m @ 0:acfea2266c6d tip

Baseline classification system. Note that needs ma toolbox (for comoputation of mfccs) and pmtk3 toolbox
author Daniele Barchiesi <daniele.barchiesi@eecs.qmul.ac.uk>
date Fri, 31 May 2013 12:25:30 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ComputeFeatures.m	Fri May 31 12:25:30 2013 +0100
@@ -0,0 +1,93 @@
+function [fea, feaNam, feaSiz] = ComputeFeatures(wav, par)
+% Computes a feature vector consisting of MFCCs coefficients and
+% coefficients derived from a matching pursuit decomposition with Gabor
+% atoms.
+%
+% [fea, feaNam] = computeMFCCsAndMP(wav,par)
+%
+% Input
+% -wav: file name locating a .wav audio signal.
+% -par: struct of paramters with the following fields
+%   .fs (22050): sampling frequency
+%   .num_ceps_coeffs (13): number of cepstral coefficients
+%   .mel_filt_bank ([0 11025 23]): extrema and number of mel frequency
+%   bands
+%   .use_first_coeff (false): retain 1st MFCC coefficient
+%   .fft_size (1024): length of fft
+%   .hopsize (512): overlap of consecutive fft
+% Output
+% -fea: matrix of features (one column per frame)
+% -feaNam: struct containing names of features
+%% Unit test
+if ~nargin, [fea, feaNam, feaSiz] = unitTest; return, end
+
+%% Defaults
+if ~exist('par','var') || isempty(par), par = struct; end
+
+def.fs = 22050;                         %sampling rate
+def.fft_size = 1024;                    %size of window
+def.hopsize = 512;                      %step size
+def.usePreEmphasis = false;              %use pre-emphasis (high pass filter)
+def.feaNam = {'mfcc','dmfcc','nme','hos','zcr','sro','scn','sfl','lpc','mpf'};
+
+par = setdefaultoptions(par,def);       %set default options
+
+%% Compute features
+s = preprocessAudio(wav,par);                       %preprocess audio file
+
+feaSiz = [];
+fea = [];
+par.feaNam = {par.feaNam};
+for iFea=1:length(par.feaNam);
+    switch par.feaNam{iFea}
+        case {'mfcc','dmfcc','nme'}             %MFCCs and related
+            [mfcc,~,mel] = ma_mfcc(s,par);
+            switch par.feaNam{iFea}
+                case 'mfcc'
+                    x = mfcc;
+                case 'dmfcc'
+                    x = derivative(mfcc);
+                case 'nme'
+                    x = mel*diag(1./sum(mel)); %energy in each mel band normalized by total energy
+            end
+        case 'hos'                                  %Higher order statistics (see Chi2003Ba)
+            x = (kurtosis(s)/(var(s)^2))*ones(1,fix(length(s)/par.hopsize)-1);
+        case 'zcr'
+            x = zcr(s,par.fft_size,par.hopsize,par.fs)'; %zero crossing rate
+        case 'sro'
+            x = SpectralRollOff(s,par.fft_size,par.hopsize,0.80,par.fs);    %spectral roll-off
+        case 'scn'
+            x = SpectralCentroid(s,par.fft_size,par.hopsize,par.fs)';       %spectral centroid
+        case 'sfl'
+            x = SpectralFlux(s,par.fft_size,par.hopsize,par.fs)';           %spectral flux
+        case 'lpc'
+            x = LPCFeatures(s,par);                             %LPC features
+        case 'mpf'
+            x = GaborFeatures(s,par);                      %Gabor features
+    end
+feaSiz = [feaSiz, size(x,1)];
+fea = [fea; x];
+feaNam = par.feaNam;
+end
+
+function s = preprocessAudio(wav,par)
+s = wavread(wav);                   %read file
+s = s(1:2:end,:);                   %subsample audio (from 44.1kHz tp 22.05kHz)
+if size(s,2)>0, s = mean(s,2); end  %convert to mono
+if par.usePreEmphasis               %apply pre-emphasis filter that highlights high frequencies
+    h = [1, -15/16];                %see Fundamentals of speech processing (Rabiner, Juang)
+    s = filter(h,1,s);
+end
+s = s/max(abs(s));                  %normalize audio
+
+function dmfcc = derivative(mfcc)
+dmfcc = zeros(size(mfcc));          %mfccs 1st derivative
+for iRow=1:size(mfcc,1)
+    temp = conv([mfcc(iRow,1) mfcc(iRow,:) mfcc(iRow,end)],[1/2,0,-1/2],'same');
+    dmfcc(iRow,:) = temp(2:end-1);
+end
+
+function [fea, feaNam, feaSizes] = unitTest
+clear, clc, close all
+file = 'bus01.wav';
+[fea, feaSizes] = ComputeFeatures(file);
\ No newline at end of file