Mercurial > hg > asc-c
comparison ComputeFeatures.m @ 0:acfea2266c6d tip
Baseline classification system. Note that needs ma toolbox (for comoputation of mfccs) and pmtk3 toolbox
author | Daniele Barchiesi <daniele.barchiesi@eecs.qmul.ac.uk> |
---|---|
date | Fri, 31 May 2013 12:25:30 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acfea2266c6d |
---|---|
1 function [fea, feaNam, feaSiz] = ComputeFeatures(wav, par) | |
2 % Computes a feature vector consisting of MFCCs coefficients and | |
3 % coefficients derived from a matching pursuit decomposition with Gabor | |
4 % atoms. | |
5 % | |
6 % [fea, feaNam] = computeMFCCsAndMP(wav,par) | |
7 % | |
8 % Input | |
9 % -wav: file name locating a .wav audio signal. | |
10 % -par: struct of paramters with the following fields | |
11 % .fs (22050): sampling frequency | |
12 % .num_ceps_coeffs (13): number of cepstral coefficients | |
13 % .mel_filt_bank ([0 11025 23]): extrema and number of mel frequency | |
14 % bands | |
15 % .use_first_coeff (false): retain 1st MFCC coefficient | |
16 % .fft_size (1024): length of fft | |
17 % .hopsize (512): overlap of consecutive fft | |
18 % Output | |
19 % -fea: matrix of features (one column per frame) | |
20 % -feaNam: struct containing names of features | |
21 %% Unit test | |
22 if ~nargin, [fea, feaNam, feaSiz] = unitTest; return, end | |
23 | |
24 %% Defaults | |
25 if ~exist('par','var') || isempty(par), par = struct; end | |
26 | |
27 def.fs = 22050; %sampling rate | |
28 def.fft_size = 1024; %size of window | |
29 def.hopsize = 512; %step size | |
30 def.usePreEmphasis = false; %use pre-emphasis (high pass filter) | |
31 def.feaNam = {'mfcc','dmfcc','nme','hos','zcr','sro','scn','sfl','lpc','mpf'}; | |
32 | |
33 par = setdefaultoptions(par,def); %set default options | |
34 | |
35 %% Compute features | |
36 s = preprocessAudio(wav,par); %preprocess audio file | |
37 | |
38 feaSiz = []; | |
39 fea = []; | |
40 par.feaNam = {par.feaNam}; | |
41 for iFea=1:length(par.feaNam); | |
42 switch par.feaNam{iFea} | |
43 case {'mfcc','dmfcc','nme'} %MFCCs and related | |
44 [mfcc,~,mel] = ma_mfcc(s,par); | |
45 switch par.feaNam{iFea} | |
46 case 'mfcc' | |
47 x = mfcc; | |
48 case 'dmfcc' | |
49 x = derivative(mfcc); | |
50 case 'nme' | |
51 x = mel*diag(1./sum(mel)); %energy in each mel band normalized by total energy | |
52 end | |
53 case 'hos' %Higher order statistics (see Chi2003Ba) | |
54 x = (kurtosis(s)/(var(s)^2))*ones(1,fix(length(s)/par.hopsize)-1); | |
55 case 'zcr' | |
56 x = zcr(s,par.fft_size,par.hopsize,par.fs)'; %zero crossing rate | |
57 case 'sro' | |
58 x = SpectralRollOff(s,par.fft_size,par.hopsize,0.80,par.fs); %spectral roll-off | |
59 case 'scn' | |
60 x = SpectralCentroid(s,par.fft_size,par.hopsize,par.fs)'; %spectral centroid | |
61 case 'sfl' | |
62 x = SpectralFlux(s,par.fft_size,par.hopsize,par.fs)'; %spectral flux | |
63 case 'lpc' | |
64 x = LPCFeatures(s,par); %LPC features | |
65 case 'mpf' | |
66 x = GaborFeatures(s,par); %Gabor features | |
67 end | |
68 feaSiz = [feaSiz, size(x,1)]; | |
69 fea = [fea; x]; | |
70 feaNam = par.feaNam; | |
71 end | |
72 | |
73 function s = preprocessAudio(wav,par) | |
74 s = wavread(wav); %read file | |
75 s = s(1:2:end,:); %subsample audio (from 44.1kHz tp 22.05kHz) | |
76 if size(s,2)>0, s = mean(s,2); end %convert to mono | |
77 if par.usePreEmphasis %apply pre-emphasis filter that highlights high frequencies | |
78 h = [1, -15/16]; %see Fundamentals of speech processing (Rabiner, Juang) | |
79 s = filter(h,1,s); | |
80 end | |
81 s = s/max(abs(s)); %normalize audio | |
82 | |
83 function dmfcc = derivative(mfcc) | |
84 dmfcc = zeros(size(mfcc)); %mfccs 1st derivative | |
85 for iRow=1:size(mfcc,1) | |
86 temp = conv([mfcc(iRow,1) mfcc(iRow,:) mfcc(iRow,end)],[1/2,0,-1/2],'same'); | |
87 dmfcc(iRow,:) = temp(2:end-1); | |
88 end | |
89 | |
90 function [fea, feaNam, feaSizes] = unitTest | |
91 clear, clc, close all | |
92 file = 'bus01.wav'; | |
93 [fea, feaSizes] = ComputeFeatures(file); |