Mercurial > hg > asc-c
diff ComputeFeatures.m @ 0:acfea2266c6d tip
Baseline classification system. Note that needs ma toolbox (for comoputation of mfccs) and pmtk3 toolbox
author | Daniele Barchiesi <daniele.barchiesi@eecs.qmul.ac.uk> |
---|---|
date | Fri, 31 May 2013 12:25:30 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ComputeFeatures.m Fri May 31 12:25:30 2013 +0100 @@ -0,0 +1,93 @@ +function [fea, feaNam, feaSiz] = ComputeFeatures(wav, par) +% Computes a feature vector consisting of MFCCs coefficients and +% coefficients derived from a matching pursuit decomposition with Gabor +% atoms. +% +% [fea, feaNam] = computeMFCCsAndMP(wav,par) +% +% Input +% -wav: file name locating a .wav audio signal. +% -par: struct of paramters with the following fields +% .fs (22050): sampling frequency +% .num_ceps_coeffs (13): number of cepstral coefficients +% .mel_filt_bank ([0 11025 23]): extrema and number of mel frequency +% bands +% .use_first_coeff (false): retain 1st MFCC coefficient +% .fft_size (1024): length of fft +% .hopsize (512): overlap of consecutive fft +% Output +% -fea: matrix of features (one column per frame) +% -feaNam: struct containing names of features +%% Unit test +if ~nargin, [fea, feaNam, feaSiz] = unitTest; return, end + +%% Defaults +if ~exist('par','var') || isempty(par), par = struct; end + +def.fs = 22050; %sampling rate +def.fft_size = 1024; %size of window +def.hopsize = 512; %step size +def.usePreEmphasis = false; %use pre-emphasis (high pass filter) +def.feaNam = {'mfcc','dmfcc','nme','hos','zcr','sro','scn','sfl','lpc','mpf'}; + +par = setdefaultoptions(par,def); %set default options + +%% Compute features +s = preprocessAudio(wav,par); %preprocess audio file + +feaSiz = []; +fea = []; +par.feaNam = {par.feaNam}; +for iFea=1:length(par.feaNam); + switch par.feaNam{iFea} + case {'mfcc','dmfcc','nme'} %MFCCs and related + [mfcc,~,mel] = ma_mfcc(s,par); + switch par.feaNam{iFea} + case 'mfcc' + x = mfcc; + case 'dmfcc' + x = derivative(mfcc); + case 'nme' + x = mel*diag(1./sum(mel)); %energy in each mel band normalized by total energy + end + case 'hos' %Higher order statistics (see Chi2003Ba) + x = (kurtosis(s)/(var(s)^2))*ones(1,fix(length(s)/par.hopsize)-1); + case 'zcr' + x = zcr(s,par.fft_size,par.hopsize,par.fs)'; %zero crossing rate + case 'sro' + x = SpectralRollOff(s,par.fft_size,par.hopsize,0.80,par.fs); %spectral roll-off + case 'scn' + x = SpectralCentroid(s,par.fft_size,par.hopsize,par.fs)'; %spectral centroid + case 'sfl' + x = SpectralFlux(s,par.fft_size,par.hopsize,par.fs)'; %spectral flux + case 'lpc' + x = LPCFeatures(s,par); %LPC features + case 'mpf' + x = GaborFeatures(s,par); %Gabor features + end +feaSiz = [feaSiz, size(x,1)]; +fea = [fea; x]; +feaNam = par.feaNam; +end + +function s = preprocessAudio(wav,par) +s = wavread(wav); %read file +s = s(1:2:end,:); %subsample audio (from 44.1kHz tp 22.05kHz) +if size(s,2)>0, s = mean(s,2); end %convert to mono +if par.usePreEmphasis %apply pre-emphasis filter that highlights high frequencies + h = [1, -15/16]; %see Fundamentals of speech processing (Rabiner, Juang) + s = filter(h,1,s); +end +s = s/max(abs(s)); %normalize audio + +function dmfcc = derivative(mfcc) +dmfcc = zeros(size(mfcc)); %mfccs 1st derivative +for iRow=1:size(mfcc,1) + temp = conv([mfcc(iRow,1) mfcc(iRow,:) mfcc(iRow,end)],[1/2,0,-1/2],'same'); + dmfcc(iRow,:) = temp(2:end-1); +end + +function [fea, feaNam, feaSizes] = unitTest +clear, clc, close all +file = 'bus01.wav'; +[fea, feaSizes] = ComputeFeatures(file); \ No newline at end of file