view ComputeFeatures.m @ 0:acfea2266c6d tip

Baseline classification system. Note that needs ma toolbox (for comoputation of mfccs) and pmtk3 toolbox
author Daniele Barchiesi <daniele.barchiesi@eecs.qmul.ac.uk>
date Fri, 31 May 2013 12:25:30 +0100
parents
children
line wrap: on
line source
function [fea, feaNam, feaSiz] = ComputeFeatures(wav, par)
% Computes a feature vector consisting of MFCCs coefficients and
% coefficients derived from a matching pursuit decomposition with Gabor
% atoms.
%
% [fea, feaNam] = computeMFCCsAndMP(wav,par)
%
% Input
% -wav: file name locating a .wav audio signal.
% -par: struct of paramters with the following fields
%   .fs (22050): sampling frequency
%   .num_ceps_coeffs (13): number of cepstral coefficients
%   .mel_filt_bank ([0 11025 23]): extrema and number of mel frequency
%   bands
%   .use_first_coeff (false): retain 1st MFCC coefficient
%   .fft_size (1024): length of fft
%   .hopsize (512): overlap of consecutive fft
% Output
% -fea: matrix of features (one column per frame)
% -feaNam: struct containing names of features
%% Unit test
if ~nargin, [fea, feaNam, feaSiz] = unitTest; return, end

%% Defaults
if ~exist('par','var') || isempty(par), par = struct; end

def.fs = 22050;                         %sampling rate
def.fft_size = 1024;                    %size of window
def.hopsize = 512;                      %step size
def.usePreEmphasis = false;              %use pre-emphasis (high pass filter)
def.feaNam = {'mfcc','dmfcc','nme','hos','zcr','sro','scn','sfl','lpc','mpf'};

par = setdefaultoptions(par,def);       %set default options

%% Compute features
s = preprocessAudio(wav,par);                       %preprocess audio file

feaSiz = [];
fea = [];
par.feaNam = {par.feaNam};
for iFea=1:length(par.feaNam);
    switch par.feaNam{iFea}
        case {'mfcc','dmfcc','nme'}             %MFCCs and related
            [mfcc,~,mel] = ma_mfcc(s,par);
            switch par.feaNam{iFea}
                case 'mfcc'
                    x = mfcc;
                case 'dmfcc'
                    x = derivative(mfcc);
                case 'nme'
                    x = mel*diag(1./sum(mel)); %energy in each mel band normalized by total energy
            end
        case 'hos'                                  %Higher order statistics (see Chi2003Ba)
            x = (kurtosis(s)/(var(s)^2))*ones(1,fix(length(s)/par.hopsize)-1);
        case 'zcr'
            x = zcr(s,par.fft_size,par.hopsize,par.fs)'; %zero crossing rate
        case 'sro'
            x = SpectralRollOff(s,par.fft_size,par.hopsize,0.80,par.fs);    %spectral roll-off
        case 'scn'
            x = SpectralCentroid(s,par.fft_size,par.hopsize,par.fs)';       %spectral centroid
        case 'sfl'
            x = SpectralFlux(s,par.fft_size,par.hopsize,par.fs)';           %spectral flux
        case 'lpc'
            x = LPCFeatures(s,par);                             %LPC features
        case 'mpf'
            x = GaborFeatures(s,par);                      %Gabor features
    end
feaSiz = [feaSiz, size(x,1)];
fea = [fea; x];
feaNam = par.feaNam;
end

function s = preprocessAudio(wav,par)
s = wavread(wav);                   %read file
s = s(1:2:end,:);                   %subsample audio (from 44.1kHz tp 22.05kHz)
if size(s,2)>0, s = mean(s,2); end  %convert to mono
if par.usePreEmphasis               %apply pre-emphasis filter that highlights high frequencies
    h = [1, -15/16];                %see Fundamentals of speech processing (Rabiner, Juang)
    s = filter(h,1,s);
end
s = s/max(abs(s));                  %normalize audio

function dmfcc = derivative(mfcc)
dmfcc = zeros(size(mfcc));          %mfccs 1st derivative
for iRow=1:size(mfcc,1)
    temp = conv([mfcc(iRow,1) mfcc(iRow,:) mfcc(iRow,end)],[1/2,0,-1/2],'same');
    dmfcc(iRow,:) = temp(2:end-1);
end

function [fea, feaNam, feaSizes] = unitTest
clear, clc, close all
file = 'bus01.wav';
[fea, feaSizes] = ComputeFeatures(file);