Mercurial > hg > dcase2013_ed_vuegenetal
comparison eventdetection.m @ 0:2fadb31a9d55 tip
Import code by Vuegen et al
author | Dan Stowell <dan.stowell@elec.qmul.ac.uk> |
---|---|
date | Fri, 11 Oct 2013 12:02:43 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2fadb31a9d55 |
---|---|
1 function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex) | |
2 %% Readme of the eventdetection function | |
3 % Input parameters | |
4 % dirFuncs: should link to the directory with the toolboxes | |
5 % e.g. 'D:\Projects\AASP\functions' | |
6 % dirInput: should be the directory linking to the test scripts | |
7 % e.g. 'D:\Projects\AASP\Datasets' | |
8 % nameInput: is the name of the test set | |
9 % e.g. 'test01.wav' | |
10 % dirOutput: Is the directory where the output file shoud be saved | |
11 % e.g. 'D:\Projects\AASP\Output' | |
12 % nameOutput: Is the name of the output files | |
13 % e.g. 'outputTest01.txt'. | |
14 % C0index: Determines the loaded threshold | |
15 % Use 1 for Office Life | |
16 % Use 2 for Office Synthetic with a SNR of -6 | |
17 % Use 3 for Office Synthetic with a SNR of 0 | |
18 % Use 4 for Office Synthetic with a SNR of 6% | |
19 %% add paths to the functions | |
20 addpath([dirFuncs filesep 'functions' filesep 'challange']); | |
21 addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']); | |
22 addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']); | |
23 %% load the GMMs | |
24 load([pwd filesep 'acousticModel' filesep 'GMM.mat']); | |
25 %% Load the threholds | |
26 nrStages = 5; | |
27 minC0 = [-189.5 -35 -45 -95]; | |
28 %% Load the development audio file | |
29 dirDataName = [dirInput filesep nameInput]; | |
30 [x_develop fs] = wavread(dirDataName); | |
31 disp(['Stage 1 of ' num2str(nrStages) ' is completed']); | |
32 %% Apply a downssampling to new_fs=16kHz | |
33 new_fs=16000; | |
34 x_develop = resample(x_develop, new_fs, fs); | |
35 fs = new_fs; | |
36 %% Extract the features | |
37 features=feature_extraction(x_develop, audioconf); | |
38 disp(['Stage 2 of ' num2str(nrStages) ' is completed']); | |
39 %% Determine where C0 > minimum | |
40 C0 = features.mfcc_static(14,:); | |
41 frames=(C0>minC0(COindex)); | |
42 %% Do moving average filtering on the indices | |
43 minWindowFrames=50; | |
44 % Filter coefficients | |
45 b_mov_avg=ones(minWindowFrames,1)/minWindowFrames; | |
46 frames_filt=fftfilt(b_mov_avg,double(frames)); | |
47 % Seek for ones and go to doubles | |
48 frames_filt_ones=double((frames_filt>=0.999)); | |
49 % Seek risig edges for compensating with the filter delay | |
50 % Each rising edge corresponds to an event | |
51 b_edges=[1 -1]; | |
52 edges=fftfilt(b_edges,frames_filt_ones); | |
53 [ignore indRisingEdges]=find(edges>=0.999); | |
54 [ignore indFallingEdges]=find(edges<=-0.999); | |
55 nrEdges=size(indRisingEdges,2); | |
56 % Preallocation of variable eventFrames for speed | |
57 eventFrames=zeros(nrEdges,2); | |
58 for(edgeNr=1:nrEdges) | |
59 frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1; | |
60 eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)]; | |
61 end, | |
62 indEvents=find(frames_filt_ones); | |
63 indSilence=find(ones(1,length(C0))-frames_filt_ones); | |
64 disp(['Stage 3 of ' num2str(nrStages) ' is completed']); | |
65 %% Compare the developpment script with all the GMMs (posteriorgram) | |
66 % Preallcoation of variables for speed | |
67 nrClasses = size(gmm_class_mfcc_feat,1); | |
68 likelihood = zeros(nrClasses,length(C0)); | |
69 % Loop over the classes and determine likelihood | |
70 for(classNr=1:nrClasses) | |
71 likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd'); | |
72 end, | |
73 % Compare with the silence class GMM | |
74 likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd'); | |
75 labelsClass{classNr+1}='silence'; | |
76 % Go to posteriors | |
77 posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1)); | |
78 %% Apply an moving average filtering on the posteriorgram | |
79 % Preallocation for speed | |
80 likelihoodFilt = zeros(nrClasses,length(C0)); | |
81 % Min duration devided by 2 as filterlengths | |
82 minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2; | |
83 numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000)); | |
84 % Loop over the posteriorgram | |
85 for(classNr=1:nrClasses+1) | |
86 b=(ones(numFrames(classNr),1)/numFrames(classNr)); | |
87 likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:)); | |
88 end, | |
89 % Make sure no complex values occur (some have a very small imaginary valuee | |
90 likelihoodFilt = abs(likelihoodFilt); | |
91 % Back to posteriogram | |
92 posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1)); | |
93 disp(['Stage 4 of ' num2str(nrStages) ' is completed']); | |
94 %% Apply the thresholding on the moving averaged posteriorgram | |
95 posteriorFiltActive=posteriorFilt; | |
96 posteriorFiltActive(:,indSilence)=0; | |
97 % Loop over the detected events | |
98 for(detectionNr=1:nrEdges) | |
99 [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2)); | |
100 end, | |
101 %% Go over to the AASP metrics | |
102 pianorollClassification=zeros(size(posteriorFiltActive)); | |
103 for(detectionNr=1:nrEdges) | |
104 pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones; | |
105 end, | |
106 % Go to a text file complementary to the challange requirements | |
107 eventBased(pianorollClassification,nameOutput,dirOutput, audioconf); | |
108 disp(['Stage 5 of ' num2str(nrStages) ' is completed']); | |
109 end |