annotate eventdetection.m @ 0:2fadb31a9d55 tip

Import code by Vuegen et al
author Dan Stowell <dan.stowell@elec.qmul.ac.uk>
date Fri, 11 Oct 2013 12:02:43 +0100
parents
children
rev   line source
dan@0 1 function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex)
dan@0 2 %% Readme of the eventdetection function
dan@0 3 % Input parameters
dan@0 4 % dirFuncs: should link to the directory with the toolboxes
dan@0 5 % e.g. 'D:\Projects\AASP\functions'
dan@0 6 % dirInput: should be the directory linking to the test scripts
dan@0 7 % e.g. 'D:\Projects\AASP\Datasets'
dan@0 8 % nameInput: is the name of the test set
dan@0 9 % e.g. 'test01.wav'
dan@0 10 % dirOutput: Is the directory where the output file shoud be saved
dan@0 11 % e.g. 'D:\Projects\AASP\Output'
dan@0 12 % nameOutput: Is the name of the output files
dan@0 13 % e.g. 'outputTest01.txt'.
dan@0 14 % C0index: Determines the loaded threshold
dan@0 15 % Use 1 for Office Life
dan@0 16 % Use 2 for Office Synthetic with a SNR of -6
dan@0 17 % Use 3 for Office Synthetic with a SNR of 0
dan@0 18 % Use 4 for Office Synthetic with a SNR of 6%
dan@0 19 %% add paths to the functions
dan@0 20 addpath([dirFuncs filesep 'functions' filesep 'challange']);
dan@0 21 addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']);
dan@0 22 addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']);
dan@0 23 %% load the GMMs
dan@0 24 load([pwd filesep 'acousticModel' filesep 'GMM.mat']);
dan@0 25 %% Load the threholds
dan@0 26 nrStages = 5;
dan@0 27 minC0 = [-189.5 -35 -45 -95];
dan@0 28 %% Load the development audio file
dan@0 29 dirDataName = [dirInput filesep nameInput];
dan@0 30 [x_develop fs] = wavread(dirDataName);
dan@0 31 disp(['Stage 1 of ' num2str(nrStages) ' is completed']);
dan@0 32 %% Apply a downssampling to new_fs=16kHz
dan@0 33 new_fs=16000;
dan@0 34 x_develop = resample(x_develop, new_fs, fs);
dan@0 35 fs = new_fs;
dan@0 36 %% Extract the features
dan@0 37 features=feature_extraction(x_develop, audioconf);
dan@0 38 disp(['Stage 2 of ' num2str(nrStages) ' is completed']);
dan@0 39 %% Determine where C0 > minimum
dan@0 40 C0 = features.mfcc_static(14,:);
dan@0 41 frames=(C0>minC0(COindex));
dan@0 42 %% Do moving average filtering on the indices
dan@0 43 minWindowFrames=50;
dan@0 44 % Filter coefficients
dan@0 45 b_mov_avg=ones(minWindowFrames,1)/minWindowFrames;
dan@0 46 frames_filt=fftfilt(b_mov_avg,double(frames));
dan@0 47 % Seek for ones and go to doubles
dan@0 48 frames_filt_ones=double((frames_filt>=0.999));
dan@0 49 % Seek risig edges for compensating with the filter delay
dan@0 50 % Each rising edge corresponds to an event
dan@0 51 b_edges=[1 -1];
dan@0 52 edges=fftfilt(b_edges,frames_filt_ones);
dan@0 53 [ignore indRisingEdges]=find(edges>=0.999);
dan@0 54 [ignore indFallingEdges]=find(edges<=-0.999);
dan@0 55 nrEdges=size(indRisingEdges,2);
dan@0 56 % Preallocation of variable eventFrames for speed
dan@0 57 eventFrames=zeros(nrEdges,2);
dan@0 58 for(edgeNr=1:nrEdges)
dan@0 59 frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1;
dan@0 60 eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)];
dan@0 61 end,
dan@0 62 indEvents=find(frames_filt_ones);
dan@0 63 indSilence=find(ones(1,length(C0))-frames_filt_ones);
dan@0 64 disp(['Stage 3 of ' num2str(nrStages) ' is completed']);
dan@0 65 %% Compare the developpment script with all the GMMs (posteriorgram)
dan@0 66 % Preallcoation of variables for speed
dan@0 67 nrClasses = size(gmm_class_mfcc_feat,1);
dan@0 68 likelihood = zeros(nrClasses,length(C0));
dan@0 69 % Loop over the classes and determine likelihood
dan@0 70 for(classNr=1:nrClasses)
dan@0 71 likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd');
dan@0 72 end,
dan@0 73 % Compare with the silence class GMM
dan@0 74 likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd');
dan@0 75 labelsClass{classNr+1}='silence';
dan@0 76 % Go to posteriors
dan@0 77 posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1));
dan@0 78 %% Apply an moving average filtering on the posteriorgram
dan@0 79 % Preallocation for speed
dan@0 80 likelihoodFilt = zeros(nrClasses,length(C0));
dan@0 81 % Min duration devided by 2 as filterlengths
dan@0 82 minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2;
dan@0 83 numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000));
dan@0 84 % Loop over the posteriorgram
dan@0 85 for(classNr=1:nrClasses+1)
dan@0 86 b=(ones(numFrames(classNr),1)/numFrames(classNr));
dan@0 87 likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:));
dan@0 88 end,
dan@0 89 % Make sure no complex values occur (some have a very small imaginary valuee
dan@0 90 likelihoodFilt = abs(likelihoodFilt);
dan@0 91 % Back to posteriogram
dan@0 92 posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1));
dan@0 93 disp(['Stage 4 of ' num2str(nrStages) ' is completed']);
dan@0 94 %% Apply the thresholding on the moving averaged posteriorgram
dan@0 95 posteriorFiltActive=posteriorFilt;
dan@0 96 posteriorFiltActive(:,indSilence)=0;
dan@0 97 % Loop over the detected events
dan@0 98 for(detectionNr=1:nrEdges)
dan@0 99 [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2));
dan@0 100 end,
dan@0 101 %% Go over to the AASP metrics
dan@0 102 pianorollClassification=zeros(size(posteriorFiltActive));
dan@0 103 for(detectionNr=1:nrEdges)
dan@0 104 pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones;
dan@0 105 end,
dan@0 106 % Go to a text file complementary to the challange requirements
dan@0 107 eventBased(pianorollClassification,nameOutput,dirOutput, audioconf);
dan@0 108 disp(['Stage 5 of ' num2str(nrStages) ' is completed']);
dan@0 109 end