view eventdetection.m @ 0:2fadb31a9d55 tip

Import code by Vuegen et al
author Dan Stowell <dan.stowell@elec.qmul.ac.uk>
date Fri, 11 Oct 2013 12:02:43 +0100
parents
children
line wrap: on
line source
function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex)
%% Readme of the eventdetection function
% Input parameters
%       dirFuncs:   should link to the directory with the toolboxes
%                   e.g. 'D:\Projects\AASP\functions'
%       dirInput:   should be the directory linking to the test scripts
%                   e.g. 'D:\Projects\AASP\Datasets'
%       nameInput:  is the name of the test set
%                   e.g. 'test01.wav'
%       dirOutput:  Is the directory where the output file shoud be saved
%                   e.g. 'D:\Projects\AASP\Output'
%       nameOutput: Is the name of the output files
%                   e.g. 'outputTest01.txt'.
%       C0index:    Determines the loaded threshold
%                   Use 1 for Office Life
%                   Use 2 for Office Synthetic with a SNR of -6
%                   Use 3 for Office Synthetic with a SNR of 0
%                   Use 4 for Office Synthetic with a SNR of 6%       
%% add paths to the functions
addpath([dirFuncs filesep 'functions' filesep 'challange']);
addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']);
addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']);
%% load the GMMs
load([pwd filesep 'acousticModel' filesep 'GMM.mat']);
%% Load the threholds
nrStages = 5;
minC0 = [-189.5 -35 -45 -95];
%% Load the development audio file
dirDataName = [dirInput filesep nameInput];
[x_develop fs] = wavread(dirDataName);
disp(['Stage 1 of ' num2str(nrStages) ' is completed']);
%% Apply a downssampling to new_fs=16kHz
new_fs=16000;
x_develop = resample(x_develop, new_fs, fs);
fs = new_fs;
%% Extract the features
features=feature_extraction(x_develop, audioconf);
disp(['Stage 2 of ' num2str(nrStages) ' is completed']);
%% Determine where C0 > minimum
C0 = features.mfcc_static(14,:);
frames=(C0>minC0(COindex));
%% Do moving average filtering on the indices
minWindowFrames=50;
% Filter coefficients
b_mov_avg=ones(minWindowFrames,1)/minWindowFrames;
frames_filt=fftfilt(b_mov_avg,double(frames));
% Seek for ones and go to doubles
frames_filt_ones=double((frames_filt>=0.999));
% Seek risig edges for compensating with the filter delay
% Each rising edge corresponds to an event
b_edges=[1 -1];
edges=fftfilt(b_edges,frames_filt_ones);
[ignore indRisingEdges]=find(edges>=0.999);
[ignore indFallingEdges]=find(edges<=-0.999);
 nrEdges=size(indRisingEdges,2);
% Preallocation of variable eventFrames for speed
eventFrames=zeros(nrEdges,2);
    for(edgeNr=1:nrEdges)
        frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1;
        eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)];
    end,
indEvents=find(frames_filt_ones);
indSilence=find(ones(1,length(C0))-frames_filt_ones);
disp(['Stage 3 of ' num2str(nrStages) ' is completed']);
%% Compare the developpment script with all the GMMs (posteriorgram)
% Preallcoation of variables for speed
nrClasses = size(gmm_class_mfcc_feat,1);
likelihood = zeros(nrClasses,length(C0));
% Loop over the classes and determine likelihood
    for(classNr=1:nrClasses)
        likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd');
    end,
% Compare with the silence class GMM    
likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd');
labelsClass{classNr+1}='silence';
% Go to posteriors
posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1));
%% Apply an moving average filtering on the posteriorgram
% Preallocation for speed
likelihoodFilt = zeros(nrClasses,length(C0));
% Min duration devided by 2 as filterlengths
minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2;
numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000));
% Loop over the posteriorgram
    for(classNr=1:nrClasses+1)
        b=(ones(numFrames(classNr),1)/numFrames(classNr));
        likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:));             
    end,
% Make sure no complex values occur (some have a very small imaginary valuee
likelihoodFilt = abs(likelihoodFilt);
% Back to posteriogram
posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1));
disp(['Stage 4 of ' num2str(nrStages) ' is completed']);
%% Apply the thresholding on the moving averaged posteriorgram
posteriorFiltActive=posteriorFilt;
posteriorFiltActive(:,indSilence)=0;
% Loop over the detected events
    for(detectionNr=1:nrEdges)
        [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2));
    end,
%% Go over to the AASP metrics    
pianorollClassification=zeros(size(posteriorFiltActive));
for(detectionNr=1:nrEdges)
    pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones;
end,
% Go to a text file complementary to the challange requirements
eventBased(pianorollClassification,nameOutput,dirOutput, audioconf);
disp(['Stage 5 of ' num2str(nrStages) ' is completed']);
end