diff eventdetection.m @ 0:2fadb31a9d55 tip

Import code by Vuegen et al
author Dan Stowell <dan.stowell@elec.qmul.ac.uk>
date Fri, 11 Oct 2013 12:02:43 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eventdetection.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,109 @@
+function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex)
+%% Readme of the eventdetection function
+% Input parameters
+%       dirFuncs:   should link to the directory with the toolboxes
+%                   e.g. 'D:\Projects\AASP\functions'
+%       dirInput:   should be the directory linking to the test scripts
+%                   e.g. 'D:\Projects\AASP\Datasets'
+%       nameInput:  is the name of the test set
+%                   e.g. 'test01.wav'
+%       dirOutput:  Is the directory where the output file shoud be saved
+%                   e.g. 'D:\Projects\AASP\Output'
+%       nameOutput: Is the name of the output files
+%                   e.g. 'outputTest01.txt'.
+%       C0index:    Determines the loaded threshold
+%                   Use 1 for Office Life
+%                   Use 2 for Office Synthetic with a SNR of -6
+%                   Use 3 for Office Synthetic with a SNR of 0
+%                   Use 4 for Office Synthetic with a SNR of 6%       
+%% add paths to the functions
+addpath([dirFuncs filesep 'functions' filesep 'challange']);
+addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']);
+addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']);
+%% load the GMMs
+load([pwd filesep 'acousticModel' filesep 'GMM.mat']);
+%% Load the threholds
+nrStages = 5;
+minC0 = [-189.5 -35 -45 -95];
+%% Load the development audio file
+dirDataName = [dirInput filesep nameInput];
+[x_develop fs] = wavread(dirDataName);
+disp(['Stage 1 of ' num2str(nrStages) ' is completed']);
+%% Apply a downssampling to new_fs=16kHz
+new_fs=16000;
+x_develop = resample(x_develop, new_fs, fs);
+fs = new_fs;
+%% Extract the features
+features=feature_extraction(x_develop, audioconf);
+disp(['Stage 2 of ' num2str(nrStages) ' is completed']);
+%% Determine where C0 > minimum
+C0 = features.mfcc_static(14,:);
+frames=(C0>minC0(COindex));
+%% Do moving average filtering on the indices
+minWindowFrames=50;
+% Filter coefficients
+b_mov_avg=ones(minWindowFrames,1)/minWindowFrames;
+frames_filt=fftfilt(b_mov_avg,double(frames));
+% Seek for ones and go to doubles
+frames_filt_ones=double((frames_filt>=0.999));
+% Seek risig edges for compensating with the filter delay
+% Each rising edge corresponds to an event
+b_edges=[1 -1];
+edges=fftfilt(b_edges,frames_filt_ones);
+[ignore indRisingEdges]=find(edges>=0.999);
+[ignore indFallingEdges]=find(edges<=-0.999);
+ nrEdges=size(indRisingEdges,2);
+% Preallocation of variable eventFrames for speed
+eventFrames=zeros(nrEdges,2);
+    for(edgeNr=1:nrEdges)
+        frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1;
+        eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)];
+    end,
+indEvents=find(frames_filt_ones);
+indSilence=find(ones(1,length(C0))-frames_filt_ones);
+disp(['Stage 3 of ' num2str(nrStages) ' is completed']);
+%% Compare the developpment script with all the GMMs (posteriorgram)
+% Preallcoation of variables for speed
+nrClasses = size(gmm_class_mfcc_feat,1);
+likelihood = zeros(nrClasses,length(C0));
+% Loop over the classes and determine likelihood
+    for(classNr=1:nrClasses)
+        likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd');
+    end,
+% Compare with the silence class GMM    
+likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd');
+labelsClass{classNr+1}='silence';
+% Go to posteriors
+posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1));
+%% Apply an moving average filtering on the posteriorgram
+% Preallocation for speed
+likelihoodFilt = zeros(nrClasses,length(C0));
+% Min duration devided by 2 as filterlengths
+minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2;
+numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000));
+% Loop over the posteriorgram
+    for(classNr=1:nrClasses+1)
+        b=(ones(numFrames(classNr),1)/numFrames(classNr));
+        likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:));             
+    end,
+% Make sure no complex values occur (some have a very small imaginary valuee
+likelihoodFilt = abs(likelihoodFilt);
+% Back to posteriogram
+posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1));
+disp(['Stage 4 of ' num2str(nrStages) ' is completed']);
+%% Apply the thresholding on the moving averaged posteriorgram
+posteriorFiltActive=posteriorFilt;
+posteriorFiltActive(:,indSilence)=0;
+% Loop over the detected events
+    for(detectionNr=1:nrEdges)
+        [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2));
+    end,
+%% Go over to the AASP metrics    
+pianorollClassification=zeros(size(posteriorFiltActive));
+for(detectionNr=1:nrEdges)
+    pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones;
+end,
+% Go to a text file complementary to the challange requirements
+eventBased(pianorollClassification,nameOutput,dirOutput, audioconf);
+disp(['Stage 5 of ' num2str(nrStages) ' is completed']);
+end
\ No newline at end of file