dan@0: function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex) dan@0: %% Readme of the eventdetection function dan@0: % Input parameters dan@0: % dirFuncs: should link to the directory with the toolboxes dan@0: % e.g. 'D:\Projects\AASP\functions' dan@0: % dirInput: should be the directory linking to the test scripts dan@0: % e.g. 'D:\Projects\AASP\Datasets' dan@0: % nameInput: is the name of the test set dan@0: % e.g. 'test01.wav' dan@0: % dirOutput: Is the directory where the output file shoud be saved dan@0: % e.g. 'D:\Projects\AASP\Output' dan@0: % nameOutput: Is the name of the output files dan@0: % e.g. 'outputTest01.txt'. dan@0: % C0index: Determines the loaded threshold dan@0: % Use 1 for Office Life dan@0: % Use 2 for Office Synthetic with a SNR of -6 dan@0: % Use 3 for Office Synthetic with a SNR of 0 dan@0: % Use 4 for Office Synthetic with a SNR of 6% dan@0: %% add paths to the functions dan@0: addpath([dirFuncs filesep 'functions' filesep 'challange']); dan@0: addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']); dan@0: addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']); dan@0: %% load the GMMs dan@0: load([pwd filesep 'acousticModel' filesep 'GMM.mat']); dan@0: %% Load the threholds dan@0: nrStages = 5; dan@0: minC0 = [-189.5 -35 -45 -95]; dan@0: %% Load the development audio file dan@0: dirDataName = [dirInput filesep nameInput]; dan@0: [x_develop fs] = wavread(dirDataName); dan@0: disp(['Stage 1 of ' num2str(nrStages) ' is completed']); dan@0: %% Apply a downssampling to new_fs=16kHz dan@0: new_fs=16000; dan@0: x_develop = resample(x_develop, new_fs, fs); dan@0: fs = new_fs; dan@0: %% Extract the features dan@0: features=feature_extraction(x_develop, audioconf); dan@0: disp(['Stage 2 of ' num2str(nrStages) ' is completed']); dan@0: %% Determine where C0 > minimum dan@0: C0 = features.mfcc_static(14,:); dan@0: frames=(C0>minC0(COindex)); dan@0: %% Do moving average filtering on the indices dan@0: minWindowFrames=50; dan@0: % Filter coefficients dan@0: b_mov_avg=ones(minWindowFrames,1)/minWindowFrames; dan@0: frames_filt=fftfilt(b_mov_avg,double(frames)); dan@0: % Seek for ones and go to doubles dan@0: frames_filt_ones=double((frames_filt>=0.999)); dan@0: % Seek risig edges for compensating with the filter delay dan@0: % Each rising edge corresponds to an event dan@0: b_edges=[1 -1]; dan@0: edges=fftfilt(b_edges,frames_filt_ones); dan@0: [ignore indRisingEdges]=find(edges>=0.999); dan@0: [ignore indFallingEdges]=find(edges<=-0.999); dan@0: nrEdges=size(indRisingEdges,2); dan@0: % Preallocation of variable eventFrames for speed dan@0: eventFrames=zeros(nrEdges,2); dan@0: for(edgeNr=1:nrEdges) dan@0: frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1; dan@0: eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)]; dan@0: end, dan@0: indEvents=find(frames_filt_ones); dan@0: indSilence=find(ones(1,length(C0))-frames_filt_ones); dan@0: disp(['Stage 3 of ' num2str(nrStages) ' is completed']); dan@0: %% Compare the developpment script with all the GMMs (posteriorgram) dan@0: % Preallcoation of variables for speed dan@0: nrClasses = size(gmm_class_mfcc_feat,1); dan@0: likelihood = zeros(nrClasses,length(C0)); dan@0: % Loop over the classes and determine likelihood dan@0: for(classNr=1:nrClasses) dan@0: likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd'); dan@0: end, dan@0: % Compare with the silence class GMM dan@0: likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd'); dan@0: labelsClass{classNr+1}='silence'; dan@0: % Go to posteriors dan@0: posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1)); dan@0: %% Apply an moving average filtering on the posteriorgram dan@0: % Preallocation for speed dan@0: likelihoodFilt = zeros(nrClasses,length(C0)); dan@0: % Min duration devided by 2 as filterlengths dan@0: minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2; dan@0: numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000)); dan@0: % Loop over the posteriorgram dan@0: for(classNr=1:nrClasses+1) dan@0: b=(ones(numFrames(classNr),1)/numFrames(classNr)); dan@0: likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:)); dan@0: end, dan@0: % Make sure no complex values occur (some have a very small imaginary valuee dan@0: likelihoodFilt = abs(likelihoodFilt); dan@0: % Back to posteriogram dan@0: posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1)); dan@0: disp(['Stage 4 of ' num2str(nrStages) ' is completed']); dan@0: %% Apply the thresholding on the moving averaged posteriorgram dan@0: posteriorFiltActive=posteriorFilt; dan@0: posteriorFiltActive(:,indSilence)=0; dan@0: % Loop over the detected events dan@0: for(detectionNr=1:nrEdges) dan@0: [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2)); dan@0: end, dan@0: %% Go over to the AASP metrics dan@0: pianorollClassification=zeros(size(posteriorFiltActive)); dan@0: for(detectionNr=1:nrEdges) dan@0: pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones; dan@0: end, dan@0: % Go to a text file complementary to the challange requirements dan@0: eventBased(pianorollClassification,nameOutput,dirOutput, audioconf); dan@0: disp(['Stage 5 of ' num2str(nrStages) ' is completed']); dan@0: end