Mercurial > hg > asc-c
view baseline.m @ 0:acfea2266c6d tip
Baseline classification system. Note that needs ma toolbox (for comoputation of mfccs) and pmtk3 toolbox
author | Daniele Barchiesi <daniele.barchiesi@eecs.qmul.ac.uk> |
---|---|
date | Fri, 31 May 2013 12:25:30 +0100 |
parents | |
children |
line wrap: on
line source
function tesCat = baseline(train_files,test_files,out_name) % Baseline ASC function using mfccs and gmms. % Input: % - train_files: text file containing the path to the training files % - test_files: text file containing the path to the test files % - out_name: text file where to write the results % Output: % - est_cat: cell of strings containing the estimated classes of the test % files % %% Input parsing and parameters % Open files, retrieve paths and training categories. trainData = textscan(fopen(train_files,'r'),'%s'); fclose('all'); nTraFil = length(trainData{1})/2; traPath = cell(nTraFil,1); traCat = cell(nTraFil,1); for i=1:nTraFil traPath{i} = trainData{1}{2*i-1}; traCat{i} = trainData{1}{2*i}; traPath{i} = regexprep(traPath{i},'import','Volumes'); end testData = textscan(fopen(test_files,'r'),'%s'); fclose('all'); nTesFil = length(testData{1}); tesPath = cell(nTesFil,1); tesCat = cell(nTesFil,1); for i=1:nTesFil tesPath{i} = testData{1}{i}; tesPath{i} = regexprep(tesPath{i},'import','Volumes'); end % Set parameters feaNam = {'mfcc'}; %compute mfccs par = struct('fs', 22050,... %sampling frequency 'fft_size', 1024,... %size of Fourier transform 'hopsize', 512,... %overlap of Fourier transfrom windows 'usePreEmphasis', false,... %do not filter sound prior to feature extraction 'feaNam',feaNam,... %extract mfcc coefficients 'nGaussCom',5,... %use 5 Gaussian mixture components 'empar', struct('nrandomRestarts',10,...%restart algoritms 5 times and pick the best model 'maxIter',50,... %maximum number of iterations 'verbose',false)); %% Training phase % Extract features from each training signal traFea = cell(nTraFil,1); for iFil=1:length(traPath) fprintf('\n Extracting %s features from file %s - (%s)...',feaNam{:},traPath{iFil},traCat{iFil}); traFea{iFil} = ComputeFeatures(traPath{iFil},par); fprintf(' done!'); end % Pre-process features by subtracting mean and dividing by standard % deviation muFea = mean([traFea{:}],2); sdFea = std([traFea{:}],[],2); for iFil=1:nTraFil traFea{iFil} = (traFea{iFil}-repmat(muFea,1,size(traFea{iFil},2)))./repmat(sdFea,1,size(traFea{iFil},2)); end % Compute statistical distribution of the featuers belonging to a given % category using GMMs staMod = 'GMMs'; %statistical model uniCat = unique(traCat); %unique categories nUniCat = length(uniCat); models = cell(nUniCat,1); for iUniCat=1:nUniCat fprintf('\n Computing statistical distribution of features from class %s using %s ...',uniCat{iUniCat},staMod); idx = strcmpi(traCat,uniCat(iUniCat)); %indexes of trainig files belonging to iUniCat category traFeaMat = [traFea{idx}]'; models{iUniCat} = mixGaussFit(traFeaMat,par.nGaussCom,par.empar); fprintf(' done!'); end %% Test phase % Repeat processing chain and compute the features extracted from test files for iFil=1:nTesFil fprintf('\n Extracting %s features from file %s...',feaNam{:},tesPath{iFil}); tesFea = ComputeFeatures(tesPath{iFil},par); % pre-process features using the mean and standard deviation values % computed on the training set tesFea = (tesFea-repmat(muFea,1,size(tesFea,2)))./repmat(sdFea,1,size(tesFea,2)); fprintf(' done!'); % Compute likelihood scores scores = nan(length(models),1); for iModel=1:length(models) scores(iModel) = sum(mixGaussLogprob(models{iModel},tesFea')); %returns the negative loglikelihood of the data given the model iModel end [~, ind] = max(scores); tesCat{iFil} = uniCat{ind(1)}; end filNam = [mfilename,'_',datestr(now,'ddmmyyyy-HH:MM'),'.txt']; fId = fopen(['results', filesep, out_name],'w'); for iFil=1:nTesFil fprintf(fId,[tesPath{iFil} '\t']); fprintf(fId,[tesCat{iFil} '\n']); end fclose(fId);