Mercurial > hg > dcase2013_ed_vuegenetal
diff eventdetection.m @ 0:2fadb31a9d55 tip
Import code by Vuegen et al
author | Dan Stowell <dan.stowell@elec.qmul.ac.uk> |
---|---|
date | Fri, 11 Oct 2013 12:02:43 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eventdetection.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,109 @@ +function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex) +%% Readme of the eventdetection function +% Input parameters +% dirFuncs: should link to the directory with the toolboxes +% e.g. 'D:\Projects\AASP\functions' +% dirInput: should be the directory linking to the test scripts +% e.g. 'D:\Projects\AASP\Datasets' +% nameInput: is the name of the test set +% e.g. 'test01.wav' +% dirOutput: Is the directory where the output file shoud be saved +% e.g. 'D:\Projects\AASP\Output' +% nameOutput: Is the name of the output files +% e.g. 'outputTest01.txt'. +% C0index: Determines the loaded threshold +% Use 1 for Office Life +% Use 2 for Office Synthetic with a SNR of -6 +% Use 3 for Office Synthetic with a SNR of 0 +% Use 4 for Office Synthetic with a SNR of 6% +%% add paths to the functions +addpath([dirFuncs filesep 'functions' filesep 'challange']); +addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']); +addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']); +%% load the GMMs +load([pwd filesep 'acousticModel' filesep 'GMM.mat']); +%% Load the threholds +nrStages = 5; +minC0 = [-189.5 -35 -45 -95]; +%% Load the development audio file +dirDataName = [dirInput filesep nameInput]; +[x_develop fs] = wavread(dirDataName); +disp(['Stage 1 of ' num2str(nrStages) ' is completed']); +%% Apply a downssampling to new_fs=16kHz +new_fs=16000; +x_develop = resample(x_develop, new_fs, fs); +fs = new_fs; +%% Extract the features +features=feature_extraction(x_develop, audioconf); +disp(['Stage 2 of ' num2str(nrStages) ' is completed']); +%% Determine where C0 > minimum +C0 = features.mfcc_static(14,:); +frames=(C0>minC0(COindex)); +%% Do moving average filtering on the indices +minWindowFrames=50; +% Filter coefficients +b_mov_avg=ones(minWindowFrames,1)/minWindowFrames; +frames_filt=fftfilt(b_mov_avg,double(frames)); +% Seek for ones and go to doubles +frames_filt_ones=double((frames_filt>=0.999)); +% Seek risig edges for compensating with the filter delay +% Each rising edge corresponds to an event +b_edges=[1 -1]; +edges=fftfilt(b_edges,frames_filt_ones); +[ignore indRisingEdges]=find(edges>=0.999); +[ignore indFallingEdges]=find(edges<=-0.999); + nrEdges=size(indRisingEdges,2); +% Preallocation of variable eventFrames for speed +eventFrames=zeros(nrEdges,2); + for(edgeNr=1:nrEdges) + frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1; + eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)]; + end, +indEvents=find(frames_filt_ones); +indSilence=find(ones(1,length(C0))-frames_filt_ones); +disp(['Stage 3 of ' num2str(nrStages) ' is completed']); +%% Compare the developpment script with all the GMMs (posteriorgram) +% Preallcoation of variables for speed +nrClasses = size(gmm_class_mfcc_feat,1); +likelihood = zeros(nrClasses,length(C0)); +% Loop over the classes and determine likelihood + for(classNr=1:nrClasses) + likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd'); + end, +% Compare with the silence class GMM +likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd'); +labelsClass{classNr+1}='silence'; +% Go to posteriors +posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1)); +%% Apply an moving average filtering on the posteriorgram +% Preallocation for speed +likelihoodFilt = zeros(nrClasses,length(C0)); +% Min duration devided by 2 as filterlengths +minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2; +numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000)); +% Loop over the posteriorgram + for(classNr=1:nrClasses+1) + b=(ones(numFrames(classNr),1)/numFrames(classNr)); + likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:)); + end, +% Make sure no complex values occur (some have a very small imaginary valuee +likelihoodFilt = abs(likelihoodFilt); +% Back to posteriogram +posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1)); +disp(['Stage 4 of ' num2str(nrStages) ' is completed']); +%% Apply the thresholding on the moving averaged posteriorgram +posteriorFiltActive=posteriorFilt; +posteriorFiltActive(:,indSilence)=0; +% Loop over the detected events + for(detectionNr=1:nrEdges) + [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2)); + end, +%% Go over to the AASP metrics +pianorollClassification=zeros(size(posteriorFiltActive)); +for(detectionNr=1:nrEdges) + pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones; +end, +% Go to a text file complementary to the challange requirements +eventBased(pianorollClassification,nameOutput,dirOutput, audioconf); +disp(['Stage 5 of ' num2str(nrStages) ' is completed']); +end \ No newline at end of file