Mercurial > hg > dcase2013_ed_vuegenetal
view eventdetection.m @ 0:2fadb31a9d55 tip
Import code by Vuegen et al
author | Dan Stowell <dan.stowell@elec.qmul.ac.uk> |
---|---|
date | Fri, 11 Oct 2013 12:02:43 +0100 |
parents | |
children |
line wrap: on
line source
function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex) %% Readme of the eventdetection function % Input parameters % dirFuncs: should link to the directory with the toolboxes % e.g. 'D:\Projects\AASP\functions' % dirInput: should be the directory linking to the test scripts % e.g. 'D:\Projects\AASP\Datasets' % nameInput: is the name of the test set % e.g. 'test01.wav' % dirOutput: Is the directory where the output file shoud be saved % e.g. 'D:\Projects\AASP\Output' % nameOutput: Is the name of the output files % e.g. 'outputTest01.txt'. % C0index: Determines the loaded threshold % Use 1 for Office Life % Use 2 for Office Synthetic with a SNR of -6 % Use 3 for Office Synthetic with a SNR of 0 % Use 4 for Office Synthetic with a SNR of 6% %% add paths to the functions addpath([dirFuncs filesep 'functions' filesep 'challange']); addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']); addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']); %% load the GMMs load([pwd filesep 'acousticModel' filesep 'GMM.mat']); %% Load the threholds nrStages = 5; minC0 = [-189.5 -35 -45 -95]; %% Load the development audio file dirDataName = [dirInput filesep nameInput]; [x_develop fs] = wavread(dirDataName); disp(['Stage 1 of ' num2str(nrStages) ' is completed']); %% Apply a downssampling to new_fs=16kHz new_fs=16000; x_develop = resample(x_develop, new_fs, fs); fs = new_fs; %% Extract the features features=feature_extraction(x_develop, audioconf); disp(['Stage 2 of ' num2str(nrStages) ' is completed']); %% Determine where C0 > minimum C0 = features.mfcc_static(14,:); frames=(C0>minC0(COindex)); %% Do moving average filtering on the indices minWindowFrames=50; % Filter coefficients b_mov_avg=ones(minWindowFrames,1)/minWindowFrames; frames_filt=fftfilt(b_mov_avg,double(frames)); % Seek for ones and go to doubles frames_filt_ones=double((frames_filt>=0.999)); % Seek risig edges for compensating with the filter delay % Each rising edge corresponds to an event b_edges=[1 -1]; edges=fftfilt(b_edges,frames_filt_ones); [ignore indRisingEdges]=find(edges>=0.999); [ignore indFallingEdges]=find(edges<=-0.999); nrEdges=size(indRisingEdges,2); % Preallocation of variable eventFrames for speed eventFrames=zeros(nrEdges,2); for(edgeNr=1:nrEdges) frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1; eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)]; end, indEvents=find(frames_filt_ones); indSilence=find(ones(1,length(C0))-frames_filt_ones); disp(['Stage 3 of ' num2str(nrStages) ' is completed']); %% Compare the developpment script with all the GMMs (posteriorgram) % Preallcoation of variables for speed nrClasses = size(gmm_class_mfcc_feat,1); likelihood = zeros(nrClasses,length(C0)); % Loop over the classes and determine likelihood for(classNr=1:nrClasses) likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd'); end, % Compare with the silence class GMM likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd'); labelsClass{classNr+1}='silence'; % Go to posteriors posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1)); %% Apply an moving average filtering on the posteriorgram % Preallocation for speed likelihoodFilt = zeros(nrClasses,length(C0)); % Min duration devided by 2 as filterlengths minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2; numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000)); % Loop over the posteriorgram for(classNr=1:nrClasses+1) b=(ones(numFrames(classNr),1)/numFrames(classNr)); likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:)); end, % Make sure no complex values occur (some have a very small imaginary valuee likelihoodFilt = abs(likelihoodFilt); % Back to posteriogram posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1)); disp(['Stage 4 of ' num2str(nrStages) ' is completed']); %% Apply the thresholding on the moving averaged posteriorgram posteriorFiltActive=posteriorFilt; posteriorFiltActive(:,indSilence)=0; % Loop over the detected events for(detectionNr=1:nrEdges) [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2)); end, %% Go over to the AASP metrics pianorollClassification=zeros(size(posteriorFiltActive)); for(detectionNr=1:nrEdges) pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones; end, % Go to a text file complementary to the challange requirements eventBased(pianorollClassification,nameOutput,dirOutput, audioconf); disp(['Stage 5 of ' num2str(nrStages) ' is completed']); end