daniele@0
|
1 function [fea, feaNam, feaSiz] = ComputeFeatures(wav, par)
|
daniele@0
|
2 % Computes a feature vector consisting of MFCCs coefficients and
|
daniele@0
|
3 % coefficients derived from a matching pursuit decomposition with Gabor
|
daniele@0
|
4 % atoms.
|
daniele@0
|
5 %
|
daniele@0
|
6 % [fea, feaNam] = computeMFCCsAndMP(wav,par)
|
daniele@0
|
7 %
|
daniele@0
|
8 % Input
|
daniele@0
|
9 % -wav: file name locating a .wav audio signal.
|
daniele@0
|
10 % -par: struct of paramters with the following fields
|
daniele@0
|
11 % .fs (22050): sampling frequency
|
daniele@0
|
12 % .num_ceps_coeffs (13): number of cepstral coefficients
|
daniele@0
|
13 % .mel_filt_bank ([0 11025 23]): extrema and number of mel frequency
|
daniele@0
|
14 % bands
|
daniele@0
|
15 % .use_first_coeff (false): retain 1st MFCC coefficient
|
daniele@0
|
16 % .fft_size (1024): length of fft
|
daniele@0
|
17 % .hopsize (512): overlap of consecutive fft
|
daniele@0
|
18 % Output
|
daniele@0
|
19 % -fea: matrix of features (one column per frame)
|
daniele@0
|
20 % -feaNam: struct containing names of features
|
daniele@0
|
21 %% Unit test
|
daniele@0
|
22 if ~nargin, [fea, feaNam, feaSiz] = unitTest; return, end
|
daniele@0
|
23
|
daniele@0
|
24 %% Defaults
|
daniele@0
|
25 if ~exist('par','var') || isempty(par), par = struct; end
|
daniele@0
|
26
|
daniele@0
|
27 def.fs = 22050; %sampling rate
|
daniele@0
|
28 def.fft_size = 1024; %size of window
|
daniele@0
|
29 def.hopsize = 512; %step size
|
daniele@0
|
30 def.usePreEmphasis = false; %use pre-emphasis (high pass filter)
|
daniele@0
|
31 def.feaNam = {'mfcc','dmfcc','nme','hos','zcr','sro','scn','sfl','lpc','mpf'};
|
daniele@0
|
32
|
daniele@0
|
33 par = setdefaultoptions(par,def); %set default options
|
daniele@0
|
34
|
daniele@0
|
35 %% Compute features
|
daniele@0
|
36 s = preprocessAudio(wav,par); %preprocess audio file
|
daniele@0
|
37
|
daniele@0
|
38 feaSiz = [];
|
daniele@0
|
39 fea = [];
|
daniele@0
|
40 par.feaNam = {par.feaNam};
|
daniele@0
|
41 for iFea=1:length(par.feaNam);
|
daniele@0
|
42 switch par.feaNam{iFea}
|
daniele@0
|
43 case {'mfcc','dmfcc','nme'} %MFCCs and related
|
daniele@0
|
44 [mfcc,~,mel] = ma_mfcc(s,par);
|
daniele@0
|
45 switch par.feaNam{iFea}
|
daniele@0
|
46 case 'mfcc'
|
daniele@0
|
47 x = mfcc;
|
daniele@0
|
48 case 'dmfcc'
|
daniele@0
|
49 x = derivative(mfcc);
|
daniele@0
|
50 case 'nme'
|
daniele@0
|
51 x = mel*diag(1./sum(mel)); %energy in each mel band normalized by total energy
|
daniele@0
|
52 end
|
daniele@0
|
53 case 'hos' %Higher order statistics (see Chi2003Ba)
|
daniele@0
|
54 x = (kurtosis(s)/(var(s)^2))*ones(1,fix(length(s)/par.hopsize)-1);
|
daniele@0
|
55 case 'zcr'
|
daniele@0
|
56 x = zcr(s,par.fft_size,par.hopsize,par.fs)'; %zero crossing rate
|
daniele@0
|
57 case 'sro'
|
daniele@0
|
58 x = SpectralRollOff(s,par.fft_size,par.hopsize,0.80,par.fs); %spectral roll-off
|
daniele@0
|
59 case 'scn'
|
daniele@0
|
60 x = SpectralCentroid(s,par.fft_size,par.hopsize,par.fs)'; %spectral centroid
|
daniele@0
|
61 case 'sfl'
|
daniele@0
|
62 x = SpectralFlux(s,par.fft_size,par.hopsize,par.fs)'; %spectral flux
|
daniele@0
|
63 case 'lpc'
|
daniele@0
|
64 x = LPCFeatures(s,par); %LPC features
|
daniele@0
|
65 case 'mpf'
|
daniele@0
|
66 x = GaborFeatures(s,par); %Gabor features
|
daniele@0
|
67 end
|
daniele@0
|
68 feaSiz = [feaSiz, size(x,1)];
|
daniele@0
|
69 fea = [fea; x];
|
daniele@0
|
70 feaNam = par.feaNam;
|
daniele@0
|
71 end
|
daniele@0
|
72
|
daniele@0
|
73 function s = preprocessAudio(wav,par)
|
daniele@0
|
74 s = wavread(wav); %read file
|
daniele@0
|
75 s = s(1:2:end,:); %subsample audio (from 44.1kHz tp 22.05kHz)
|
daniele@0
|
76 if size(s,2)>0, s = mean(s,2); end %convert to mono
|
daniele@0
|
77 if par.usePreEmphasis %apply pre-emphasis filter that highlights high frequencies
|
daniele@0
|
78 h = [1, -15/16]; %see Fundamentals of speech processing (Rabiner, Juang)
|
daniele@0
|
79 s = filter(h,1,s);
|
daniele@0
|
80 end
|
daniele@0
|
81 s = s/max(abs(s)); %normalize audio
|
daniele@0
|
82
|
daniele@0
|
83 function dmfcc = derivative(mfcc)
|
daniele@0
|
84 dmfcc = zeros(size(mfcc)); %mfccs 1st derivative
|
daniele@0
|
85 for iRow=1:size(mfcc,1)
|
daniele@0
|
86 temp = conv([mfcc(iRow,1) mfcc(iRow,:) mfcc(iRow,end)],[1/2,0,-1/2],'same');
|
daniele@0
|
87 dmfcc(iRow,:) = temp(2:end-1);
|
daniele@0
|
88 end
|
daniele@0
|
89
|
daniele@0
|
90 function [fea, feaNam, feaSizes] = unitTest
|
daniele@0
|
91 clear, clc, close all
|
daniele@0
|
92 file = 'bus01.wav';
|
daniele@0
|
93 [fea, feaSizes] = ComputeFeatures(file); |