annotate ComputeFeatures.m @ 0:acfea2266c6d tip

Baseline classification system. Note that needs ma toolbox (for comoputation of mfccs) and pmtk3 toolbox
author Daniele Barchiesi <daniele.barchiesi@eecs.qmul.ac.uk>
date Fri, 31 May 2013 12:25:30 +0100
parents
children
rev   line source
daniele@0 1 function [fea, feaNam, feaSiz] = ComputeFeatures(wav, par)
daniele@0 2 % Computes a feature vector consisting of MFCCs coefficients and
daniele@0 3 % coefficients derived from a matching pursuit decomposition with Gabor
daniele@0 4 % atoms.
daniele@0 5 %
daniele@0 6 % [fea, feaNam] = computeMFCCsAndMP(wav,par)
daniele@0 7 %
daniele@0 8 % Input
daniele@0 9 % -wav: file name locating a .wav audio signal.
daniele@0 10 % -par: struct of paramters with the following fields
daniele@0 11 % .fs (22050): sampling frequency
daniele@0 12 % .num_ceps_coeffs (13): number of cepstral coefficients
daniele@0 13 % .mel_filt_bank ([0 11025 23]): extrema and number of mel frequency
daniele@0 14 % bands
daniele@0 15 % .use_first_coeff (false): retain 1st MFCC coefficient
daniele@0 16 % .fft_size (1024): length of fft
daniele@0 17 % .hopsize (512): overlap of consecutive fft
daniele@0 18 % Output
daniele@0 19 % -fea: matrix of features (one column per frame)
daniele@0 20 % -feaNam: struct containing names of features
daniele@0 21 %% Unit test
daniele@0 22 if ~nargin, [fea, feaNam, feaSiz] = unitTest; return, end
daniele@0 23
daniele@0 24 %% Defaults
daniele@0 25 if ~exist('par','var') || isempty(par), par = struct; end
daniele@0 26
daniele@0 27 def.fs = 22050; %sampling rate
daniele@0 28 def.fft_size = 1024; %size of window
daniele@0 29 def.hopsize = 512; %step size
daniele@0 30 def.usePreEmphasis = false; %use pre-emphasis (high pass filter)
daniele@0 31 def.feaNam = {'mfcc','dmfcc','nme','hos','zcr','sro','scn','sfl','lpc','mpf'};
daniele@0 32
daniele@0 33 par = setdefaultoptions(par,def); %set default options
daniele@0 34
daniele@0 35 %% Compute features
daniele@0 36 s = preprocessAudio(wav,par); %preprocess audio file
daniele@0 37
daniele@0 38 feaSiz = [];
daniele@0 39 fea = [];
daniele@0 40 par.feaNam = {par.feaNam};
daniele@0 41 for iFea=1:length(par.feaNam);
daniele@0 42 switch par.feaNam{iFea}
daniele@0 43 case {'mfcc','dmfcc','nme'} %MFCCs and related
daniele@0 44 [mfcc,~,mel] = ma_mfcc(s,par);
daniele@0 45 switch par.feaNam{iFea}
daniele@0 46 case 'mfcc'
daniele@0 47 x = mfcc;
daniele@0 48 case 'dmfcc'
daniele@0 49 x = derivative(mfcc);
daniele@0 50 case 'nme'
daniele@0 51 x = mel*diag(1./sum(mel)); %energy in each mel band normalized by total energy
daniele@0 52 end
daniele@0 53 case 'hos' %Higher order statistics (see Chi2003Ba)
daniele@0 54 x = (kurtosis(s)/(var(s)^2))*ones(1,fix(length(s)/par.hopsize)-1);
daniele@0 55 case 'zcr'
daniele@0 56 x = zcr(s,par.fft_size,par.hopsize,par.fs)'; %zero crossing rate
daniele@0 57 case 'sro'
daniele@0 58 x = SpectralRollOff(s,par.fft_size,par.hopsize,0.80,par.fs); %spectral roll-off
daniele@0 59 case 'scn'
daniele@0 60 x = SpectralCentroid(s,par.fft_size,par.hopsize,par.fs)'; %spectral centroid
daniele@0 61 case 'sfl'
daniele@0 62 x = SpectralFlux(s,par.fft_size,par.hopsize,par.fs)'; %spectral flux
daniele@0 63 case 'lpc'
daniele@0 64 x = LPCFeatures(s,par); %LPC features
daniele@0 65 case 'mpf'
daniele@0 66 x = GaborFeatures(s,par); %Gabor features
daniele@0 67 end
daniele@0 68 feaSiz = [feaSiz, size(x,1)];
daniele@0 69 fea = [fea; x];
daniele@0 70 feaNam = par.feaNam;
daniele@0 71 end
daniele@0 72
daniele@0 73 function s = preprocessAudio(wav,par)
daniele@0 74 s = wavread(wav); %read file
daniele@0 75 s = s(1:2:end,:); %subsample audio (from 44.1kHz tp 22.05kHz)
daniele@0 76 if size(s,2)>0, s = mean(s,2); end %convert to mono
daniele@0 77 if par.usePreEmphasis %apply pre-emphasis filter that highlights high frequencies
daniele@0 78 h = [1, -15/16]; %see Fundamentals of speech processing (Rabiner, Juang)
daniele@0 79 s = filter(h,1,s);
daniele@0 80 end
daniele@0 81 s = s/max(abs(s)); %normalize audio
daniele@0 82
daniele@0 83 function dmfcc = derivative(mfcc)
daniele@0 84 dmfcc = zeros(size(mfcc)); %mfccs 1st derivative
daniele@0 85 for iRow=1:size(mfcc,1)
daniele@0 86 temp = conv([mfcc(iRow,1) mfcc(iRow,:) mfcc(iRow,end)],[1/2,0,-1/2],'same');
daniele@0 87 dmfcc(iRow,:) = temp(2:end-1);
daniele@0 88 end
daniele@0 89
daniele@0 90 function [fea, feaNam, feaSizes] = unitTest
daniele@0 91 clear, clc, close all
daniele@0 92 file = 'bus01.wav';
daniele@0 93 [fea, feaSizes] = ComputeFeatures(file);