changeset 0:2fadb31a9d55 tip

Import code by Vuegen et al
author Dan Stowell <dan.stowell@elec.qmul.ac.uk>
date Fri, 11 Oct 2013 12:02:43 +0100
parents
children
files abstract/AbstractAASP.pdf acousticModel/GMM.mat eventdetection.m functions/challange/convertEventListToEventRoll.m functions/challange/eventBased.m functions/challange/eventDetectionMetrics_classWiseEventBased.m functions/challange/eventDetectionMetrics_eventBased.m functions/challange/eventDetectionMetrics_frameBased.m functions/challange/loadEventsList.m functions/fe_funcs/FE.m functions/fe_funcs/mel_matrix.m functions/fe_funcs/stacklp.m functions/fe_funcs/vec2featmat.m functions/funcsMobilab/feature_extraction.m functions/funcsMobilab/find_format.m functions/funcsMobilab/gmm_classification.m functions/funcsMobilab/readCSV.m functions/funcsMobilab/read_annotation_develop.m functions/funcsMobilab/read_data_develop.m readme/Readme.docx readme/Readme.pdf
diffstat 21 files changed, 949 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file abstract/AbstractAASP.pdf has changed
Binary file acousticModel/GMM.mat has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eventdetection.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,109 @@
+function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex)
+%% Readme of the eventdetection function
+% Input parameters
+%       dirFuncs:   should link to the directory with the toolboxes
+%                   e.g. 'D:\Projects\AASP\functions'
+%       dirInput:   should be the directory linking to the test scripts
+%                   e.g. 'D:\Projects\AASP\Datasets'
+%       nameInput:  is the name of the test set
+%                   e.g. 'test01.wav'
+%       dirOutput:  Is the directory where the output file shoud be saved
+%                   e.g. 'D:\Projects\AASP\Output'
+%       nameOutput: Is the name of the output files
+%                   e.g. 'outputTest01.txt'.
+%       C0index:    Determines the loaded threshold
+%                   Use 1 for Office Life
+%                   Use 2 for Office Synthetic with a SNR of -6
+%                   Use 3 for Office Synthetic with a SNR of 0
+%                   Use 4 for Office Synthetic with a SNR of 6%       
+%% add paths to the functions
+addpath([dirFuncs filesep 'functions' filesep 'challange']);
+addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']);
+addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']);
+%% load the GMMs
+load([pwd filesep 'acousticModel' filesep 'GMM.mat']);
+%% Load the threholds
+nrStages = 5;
+minC0 = [-189.5 -35 -45 -95];
+%% Load the development audio file
+dirDataName = [dirInput filesep nameInput];
+[x_develop fs] = wavread(dirDataName);
+disp(['Stage 1 of ' num2str(nrStages) ' is completed']);
+%% Apply a downssampling to new_fs=16kHz
+new_fs=16000;
+x_develop = resample(x_develop, new_fs, fs);
+fs = new_fs;
+%% Extract the features
+features=feature_extraction(x_develop, audioconf);
+disp(['Stage 2 of ' num2str(nrStages) ' is completed']);
+%% Determine where C0 > minimum
+C0 = features.mfcc_static(14,:);
+frames=(C0>minC0(COindex));
+%% Do moving average filtering on the indices
+minWindowFrames=50;
+% Filter coefficients
+b_mov_avg=ones(minWindowFrames,1)/minWindowFrames;
+frames_filt=fftfilt(b_mov_avg,double(frames));
+% Seek for ones and go to doubles
+frames_filt_ones=double((frames_filt>=0.999));
+% Seek risig edges for compensating with the filter delay
+% Each rising edge corresponds to an event
+b_edges=[1 -1];
+edges=fftfilt(b_edges,frames_filt_ones);
+[ignore indRisingEdges]=find(edges>=0.999);
+[ignore indFallingEdges]=find(edges<=-0.999);
+ nrEdges=size(indRisingEdges,2);
+% Preallocation of variable eventFrames for speed
+eventFrames=zeros(nrEdges,2);
+    for(edgeNr=1:nrEdges)
+        frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1;
+        eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)];
+    end,
+indEvents=find(frames_filt_ones);
+indSilence=find(ones(1,length(C0))-frames_filt_ones);
+disp(['Stage 3 of ' num2str(nrStages) ' is completed']);
+%% Compare the developpment script with all the GMMs (posteriorgram)
+% Preallcoation of variables for speed
+nrClasses = size(gmm_class_mfcc_feat,1);
+likelihood = zeros(nrClasses,length(C0));
+% Loop over the classes and determine likelihood
+    for(classNr=1:nrClasses)
+        likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd');
+    end,
+% Compare with the silence class GMM    
+likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd');
+labelsClass{classNr+1}='silence';
+% Go to posteriors
+posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1));
+%% Apply an moving average filtering on the posteriorgram
+% Preallocation for speed
+likelihoodFilt = zeros(nrClasses,length(C0));
+% Min duration devided by 2 as filterlengths
+minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2;
+numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000));
+% Loop over the posteriorgram
+    for(classNr=1:nrClasses+1)
+        b=(ones(numFrames(classNr),1)/numFrames(classNr));
+        likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:));             
+    end,
+% Make sure no complex values occur (some have a very small imaginary valuee
+likelihoodFilt = abs(likelihoodFilt);
+% Back to posteriogram
+posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1));
+disp(['Stage 4 of ' num2str(nrStages) ' is completed']);
+%% Apply the thresholding on the moving averaged posteriorgram
+posteriorFiltActive=posteriorFilt;
+posteriorFiltActive(:,indSilence)=0;
+% Loop over the detected events
+    for(detectionNr=1:nrEdges)
+        [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2));
+    end,
+%% Go over to the AASP metrics    
+pianorollClassification=zeros(size(posteriorFiltActive));
+for(detectionNr=1:nrEdges)
+    pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones;
+end,
+% Go to a text file complementary to the challange requirements
+eventBased(pianorollClassification,nameOutput,dirOutput, audioconf);
+disp(['Stage 5 of ' num2str(nrStages) ' is completed']);
+end
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/challange/convertEventListToEventRoll.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,17 @@
+function [eventRoll] = convertEventListToEventRoll(onset,offset,classNames)
+
+
+% Initialize
+eventRoll = zeros(ceil(offset(length(offset))*100),16);
+eventID = {'alert','clearthroat','cough','doorslam','drawer','keyboard','keys',...
+    'doorknock','laughter','mouse','pageturn','pendrop','phone','printer','speech','switch'};
+
+
+% Fill-in eventRoll
+for i=1:length(onset)
+    
+    pos = strmatch(classNames{i}, eventID);
+    
+    eventRoll(floor(onset(i)*100):ceil(offset(i)*100),pos) = 1;
+    
+end;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/challange/eventBased.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,58 @@
+function eventBased(script, fileName, directory, conf)
+%%
+%MOBILAB 2012 - 2013
+%
+%   Input parameters
+%       -script:        Each row is a representation of a certain class
+%                       Each column contains the value (ones) of wich class is active at
+%                       that certain moment.
+%                       [N x M] matrix:
+%                           -N: nr classes
+%                           -M: nr of frames in the script
+%       -fileName:      Is the name of the output text file.
+%       -directory:     Is the location where the output text file must be written
+%       -conf:          Is a struct and contains information about the time of the
+%                       differrent frames
+%%
+fileID = fopen([directory filesep fileName],'a');
+
+%Determine the number of classes and the number of framees
+[nr_classes nr_frames] = size(script(1:16,:));
+%classnames
+eventID = {'alert','clearthroat','cough','doorslam','drawer','keyboard','keys',...
+            'doorknock','laughter','mouse','pageturn','pendrop','phone','printer','speech','switch'};
+k=1;
+
+for class=1:nr_classes
+    b = [1 -1];
+    changes = conv(script(class,:),b);                                      %Find transitions
+    changes = changes(1:length(script));                                    %Apply same lenght as script
+    
+    onset = find(changes==1);                                               %Find the 0-1 transitions // onset
+    offset = find(changes==-1);                                             %Find the 1-0 transitions // offset
+    
+    for(nr_transtion=1 : length(onset))
+        onsetOffset(k,1) = onset(nr_transtion)*(conf.framestep_ms/1000);                            
+        onsetOffset(k,2) = offset(nr_transtion)*(conf.framestep_ms/1000);                           
+        classifiedEvent{k,1} = eventID{1,class};
+        k=k+1;
+    end,    
+end,
+
+% Sort the data with respect to the onset times
+[data_sorted(:,1) I] = sort(onsetOffset(:,1));
+data_sorted(:,2) = onsetOffset(I,2);
+
+for (nr=1:length(data_sorted(:,1)))    
+fprintf(fileID,'%f',data_sorted(nr,1));                                    %Print onset time
+fprintf(fileID, '\t');                                                     %Print space
+fprintf(fileID,'%f',data_sorted(nr,2));                                    %Print offset time
+fprintf(fileID, '\t');                                                     %Print space
+fprintf(fileID,'%s',classifiedEvent{I(nr),1});
+fprintf(fileID,'\n');                                                      %Print newline
+end,    
+
+fclose(fileID);
+
+end
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/challange/eventDetectionMetrics_classWiseEventBased.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,80 @@
+function [results] = eventDetectionMetrics_classWiseEventBased(outputFile,GTFile)
+
+% Class-wise event-based evaluation for event detection task
+% outputFile: the output of the event detection system
+% GTFile: the ground truth list of events
+
+% Initialize
+eventID = {'alert','clearthroat','cough','doorslam','drawer','keyboard','keys',...
+    'knock','laughter','mouse','pageturn','pendrop','phone','printer','speech','switch'};
+
+
+% Load event list from output and ground-truth
+[onset,offset,classNames] = loadEventsList(outputFile);
+[onsetGT,offsetGT,classNamesGT] = loadEventsList(GTFile);
+
+
+% Total number of detected and reference events per class
+Ntot = zeros(16,1);
+for i=1:length(onset)
+    pos = strmatch(classNames{i}, eventID);
+    Ntot(pos) = Ntot(pos)+1;
+end;
+
+Nref = zeros(16,1);
+for i=1:length(onsetGT)
+    pos = strmatch(classNamesGT{i}, eventID);
+    Nref(pos) = Nref(pos)+1;
+end;
+I = find(Nref>0); % index for classes present in ground-truth
+
+
+% Number of correctly transcribed events per class, onset within a +/-100 ms range
+Ncorr = zeros(16,1);
+NcorrOff = zeros(16,1);
+for j=1:length(onsetGT)
+    for i=1:length(onset)
+        
+        if( strcmp(classNames{i},classNamesGT{j}) && (abs(onsetGT(j)-onset(i))<=0.1) )
+            pos = strmatch(classNames{i}, eventID);
+            Ncorr(pos) = Ncorr(pos)+1;
+            
+            % If offset within a +/-100 ms range or within 50% of ground-truth event's duration
+            if abs(offsetGT(j) - offset(i)) <= max(0.1, 0.5 * (offsetGT(j) - onsetGT(j)))
+                pos = strmatch(classNames{i}, eventID);
+                NcorrOff(pos) = NcorrOff(pos) +1;
+            end;
+            
+            break; % In order to not evaluate duplicates
+            
+        end;
+    end;
+end;
+
+
+% Compute onset-only class-wise event-based metrics
+Nfp = Ntot-Ncorr;
+Nfn = Nref-Ncorr;
+Nsubs = min(Nfp,Nfn);
+tempRec = Ncorr(I)./(Nref(I)+eps);
+tempPre = Ncorr(I)./(Ntot(I)+eps);
+results.Rec = mean(tempRec);
+results.Pre = mean(tempPre);
+tempF =  2*((tempPre.*tempRec)./(tempPre+tempRec+eps));
+results.F = mean(tempF);
+tempAEER = (Nfn(I)+Nfp(I)+Nsubs(I))./(Nref(I)+eps);
+results.AEER = mean(tempAEER);
+
+
+% Compute onset-offset class-wise event-based metrics
+NfpOff = Ntot-NcorrOff;
+NfnOff = Nref-NcorrOff;
+NsubsOff = min(NfpOff,NfnOff);
+tempRecOff = NcorrOff(I)./(Nref(I)+eps);
+tempPreOff = NcorrOff(I)./(Ntot(I)+eps);
+results.RecOff = mean(tempRecOff);
+results.PreOff = mean(tempPreOff);
+tempFOff =  2*((tempPreOff.*tempRecOff)./(tempPreOff+tempRecOff+eps));
+results.FOff = mean(tempFOff);
+tempAEEROff = (NfnOff(I)+NfpOff(I)+NsubsOff(I))./(Nref(I)+eps);
+results.AEEROff = mean(tempAEEROff);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/challange/eventDetectionMetrics_eventBased.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,56 @@
+function [results] = eventDetectionMetrics_eventBased(outputFile,GTFile)
+
+% Event-based evaluation for event detection task
+% outputFile: the output of the event detection system
+% GTFile: the ground truth list of events
+
+% Load event list from output and ground-truth
+[onset,offset,classNames] = loadEventsList(outputFile);
+[onsetGT,offsetGT,classNamesGT] = loadEventsList(GTFile);
+
+
+% Total number of detected and reference events
+Ntot = length(onset);
+Nref = length(onsetGT);
+
+
+% Number of correctly transcribed events, onset within a +/-100 ms range
+Ncorr = 0;
+NcorrOff = 0;
+for j=1:length(onsetGT)
+    for i=1:length(onset)
+        
+        if( strcmp(classNames{i},classNamesGT{j}) && (abs(onsetGT(j)-onset(i))<=0.1) )
+            Ncorr = Ncorr+1; 
+            
+            % If offset within a +/-100 ms range or within 50% of ground-truth event's duration
+            if abs(offsetGT(j) - offset(i)) <= max(0.1, 0.5 * (offsetGT(j) - onsetGT(j)))
+                NcorrOff = NcorrOff +1;
+            end;
+            
+            break; % In order to not evaluate duplicates
+            
+        end;
+    end;
+    
+end;
+
+
+% Compute onset-only event-based metrics
+Nfp = Ntot-Ncorr;
+Nfn = Nref-Ncorr;
+Nsubs = min(Nfp,Nfn);
+results.Rec = Ncorr/(Nref+eps);
+results.Pre = Ncorr/(Ntot+eps);
+results.F = 2*((results.Pre*results.Rec)/(results.Pre+results.Rec+eps));
+results.AEER= (Nfn+Nfp+Nsubs)/(Nref+eps);
+
+
+% Compute onset-offset event-based metrics
+NfpOff = Ntot-NcorrOff;
+NfnOff = Nref-NcorrOff;
+NsubsOff = min(NfpOff,NfnOff);
+results.RecOff = NcorrOff/(Nref+eps);
+results.PreOff = NcorrOff/(Ntot+eps);
+results.FOff = 2*((results.PreOff*results.RecOff)/(results.PreOff+results.RecOff+eps));
+results.AEEROff= (NfnOff+NfpOff+NsubsOff)/(Nref+eps);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/challange/eventDetectionMetrics_frameBased.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,34 @@
+function [results] = eventDetectionMetrics_frameBased(outputFile,GTFile)
+
+% Frame-based evaluation for event detection task
+% outputFile: the output of the event detection system
+% GTFile: the ground truth list of events
+
+% Load event list from output and ground-truth
+[onset,offset,classNames] = loadEventsList(outputFile);
+[onsetGT,offsetGT,classNamesGT] = loadEventsList(GTFile);
+
+
+% Convert event list into frame-based representation (10msec resolution)
+[eventRoll] = convertEventListToEventRoll(onset,offset,classNames);
+[eventRollGT] = convertEventListToEventRoll(onsetGT,offsetGT,classNamesGT);
+
+
+% Fix durations of eventRolls
+if (size(eventRollGT,1) > size(eventRoll,1)) eventRoll = [eventRoll; zeros(size(eventRollGT,1)-size(eventRoll,1),16)]; end;
+if (size(eventRoll,1) > size(eventRollGT,1)) eventRollGT = [eventRollGT; zeros(size(eventRoll,1)-size(eventRollGT,1),16)]; end;
+
+
+% Compute frame-based metrics
+Nref = sum(sum(eventRollGT));
+Ntot = sum(sum(eventRoll));
+Ntp = sum(sum(eventRoll+eventRollGT > 1));
+Nfp = sum(sum(eventRoll-eventRollGT > 0));
+Nfn = sum(sum(eventRollGT-eventRoll > 0));
+Nsubs = min(Nfp,Nfn);
+
+
+results.Rec = Ntp/(Nref+eps);
+results.Pre = Ntp/(Ntot+eps);
+results.F = 2*((results.Pre*results.Rec)/(results.Pre+results.Rec+eps));
+results.AEER = (Nfn+Nfp+Nsubs)/(Nref+eps);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/challange/loadEventsList.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,27 @@
+function [onset,offset,classNames] = loadEventsList(filename)
+
+% Open raw file
+fid = fopen(filename,'r+');
+
+% Read 1st line
+tline = fgetl(fid);
+onset_offset(:,1) = sscanf(tline, '%f\t%f\t%*s');
+classNames{1} = char(sscanf(tline, '%*f\t%*f\t%s')');
+
+% Read rest of the lines
+i=1;
+while ischar(tline)
+    i = i+1;
+    tline = fgetl(fid);
+    if (ischar(tline))
+        onset_offset(:,i) = sscanf(tline, '%f\t%f\t%*s');
+        classNames{i} = char(sscanf(tline, '%*f\t%*f\t%s')');
+    end;
+end
+
+% Split onset_offset
+onset = onset_offset(1,:)';
+offset = onset_offset(2,:)';
+
+% Close file
+fclose(fid);
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/fe_funcs/FE.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,217 @@
+% Yet another feature extractor, this time with CHiME in mind.
+% Some cleanup, more support for customised audio parameters.
+% Updated 9th Aug 2011
+%
+% Outputs have been changed. Currently no logarithms are taken here any 
+% more. 
+%
+% Input:
+% - 'sam' is a the audio, either as column or row channels vectors. 
+%   (Longer dimension is treated as time, shorter as channel count.)
+% - 'audioconf' is as defined in getconfigs.m . All of its parameters
+%   ARE respected now, so pass a temporary, edited copy if you want
+%   to change the behaviour.
+%
+% Output:
+% - 'feats' is a [bands x frames x featchannels] array of mel features.
+%   If audioconf.melbands is zero, FFT magnitudes are returned instead.
+% - 'energies' is a [frames x audiochannels] matrix of frame energies
+% - 'frameaudio' is a [framelen x frames x audiochannels] array of chopped
+%   audio data (with preprocessing but without the window function).
+% - 'frameFFT' is an [FFTlow x frames x audiochannels] array of frame FFTs.
+%   The windowing function has been applied, and the result has been
+%   truncated to Nfft/2 + 1 bands. However, no abs is taken. You can
+%   do this in the calling function, or pick the abs values from 'feats'
+%   by using zero melbands.
+% 
+% The main feature output respects audioconf.featchannels, which should
+% be either the same as audioconf.channels (the number of input streams)
+% or 1 (downmixed to mono by taking the mean of feature channels). Other 
+% outputs use original audio channels, because their averaging is not as 
+% well defined. Note that there is a significant difference between
+% averaging the audio (causing waveform level phase attenuation) and the
+% abs-FFT or Mel features (phase-invariant energy mean). If the former is
+% what you need, downmix the audio in the calling function.
+%
+% Some warnings are shown if audio parameters are missing or they do not
+% match with the data.
+
+function [feats, energies, frameaudio, frameFFT] = FE(sam, audioconf)
+
+verbose = 0;
+
+% Default config. To guarantee intended operation, you should always
+% pass your own, though.
+
+defconf.channels = 2;         % input channels, in CHiME always 2
+defconf.featchannels = 1;     % feature level channels
+defconf.fs = 16000;           % sampling rate for internal processing
+defconf.maxf = 8000;          % maximum frequency to be considered
+defconf.minf = 64;            % maximum frequency to be considered
+defconf.melbands = 26;        % mel band count (0 to disable)
+defconf.framelen_ms = 25;     % millisecond length of each frame
+defconf.framestep_ms = 10;    % millisecond step between frames
+defconf.windowfunc = 'hamming';  % window function name
+defconf.preemphasis = 0.97;   % 0 to disable
+defconf.dcremoval = true;     % DC removal in the feature extractor
+defconf.Nfft = 0;             % Number of FFT bands (0 to calculate from framelength)
+
+if nargin < 2
+    if verbose
+        disp('No audioconf given, using defaults.')
+    end
+    audioconf = defconf;
+else
+    fldnames = fieldnames(defconf);
+    for fl = 1:length(fldnames)
+        if ~isfield(audioconf, fldnames{fl})
+            if verbose
+                fprintf('Field %s missing, copying from defaults.\n', fldnames{fl})
+            end
+            audioconf.(f)=defconf.(f);
+        end
+    end
+end
+
+% Fetch the shorthand variables.
+featbands = audioconf.melbands;
+featchans = audioconf.featchannels;
+fs = audioconf.fs;
+fhigh = audioconf.maxf;
+flow = audioconf.minf;
+
+framelen = ceil(fs * audioconf.framelen_ms / 1000);
+frameshift = ceil(fs * audioconf.framestep_ms / 1000);
+% framelen = (fs * audioconf.framelen_ms / 1000);
+% frameshift = (fs * audioconf.framestep_ms / 1000);
+
+if audioconf.Nfft == 0
+    Nfft = 2^nextpow2(framelen);
+else
+    Nfft = audioconf.Nfft;
+end
+
+winfunc = str2func(audioconf.windowfunc);
+win = winfunc(framelen);
+
+if featbands == 0
+    melmode = false;
+else
+    melmode = true;
+end
+
+% Switch audio to columns.
+if size(sam, 1) < size(sam,2)
+    sam = sam';
+end
+
+samlen = size(sam, 1);
+samchans = size(sam, 2);
+
+if samchans ~= audioconf.channels
+    if verbose
+        fprintf('Warning: Audio has %i channels, config states %i.\n', samchans, audioconf.channels);
+    end
+end
+
+if melmode
+    melmat = mel_matrix(fs, featbands, Nfft, 1, fhigh, flow)';
+    if size(melmat, 1) ~= featbands
+        fprintf('Mel matrix has %i bands (config: %i).\n', size(melmat, 1), featbands);
+    end
+    if size(melmat, 2) ~= (Nfft/2 + 1)
+        fprintf('Mel matrix has %i FFT coeffs (expected: %i).\n', size(melmat, 2), Nfft/2 + 1);
+    end
+end
+
+% Truncate to full frames, get the number.
+numframes = floor((samlen-framelen+frameshift) / frameshift);
+sam = sam(1:(numframes*frameshift+framelen-frameshift), :);
+
+% DC removal - introduces a 1-unit filter delay, thus we discard the
+% first sample. Note that this behaviour has changed from earlier
+% versions of FE.
+if audioconf.dcremoval
+    samf = filter([1;-1], [1;-0.999], [zeros(1,samchans);sam]);
+    sam = samf(2:end, :);
+end
+samtrlen = size(sam, 1); % trimmed length
+
+% Pre-emphasis if nonzero. Can be done for the whole audio at once.
+if (audioconf.preemphasis > 0)
+    sam = [zeros(1, samchans); sam(2:samtrlen, :) - audioconf.preemphasis * sam(1:(end-1), :)];
+end
+
+if melmode
+    tmpfeats = zeros(featbands, numframes, samchans);
+else
+    tmpfeats = zeros(Nfft/2 + 1, numframes, samchans);
+end
+
+energies = zeros(numframes, samchans);
+frameaudio = zeros(framelen, numframes, samchans);
+frameFFT = zeros(Nfft/2+1, numframes, samchans);
+
+
+% Process channels one by one. Trying to perform these ops simultaneously
+% for all channels might be possible but tricky.
+for c = 1:samchans
+    
+    % starting sample numbers of each frame
+    ind1 = 1:frameshift:samtrlen-1-framelen+frameshift;
+    % linear 1-step vector (1...frame length)
+    ind2 = (1:framelen)';
+    
+    % Pick frame audio. The index matrix (framelen x numframes) consists 
+    % of four summed parts:
+    % 1) Constant column vectors, each denoting the frame's start sample.
+    % 2) Increasing sample index column vectors
+    % 3) Scalar jump to get into the correct channel in linear indexing
+    % 4) -1 because the first two indices are both one-based.
+    %
+    %       [start1  start2 ]   [ 1    1  ]
+    %  sam( [ ...     ...   ] + [...  ... ] + channel jump - 1) =
+    %       [start1  start2 ]   [frl  frl ]
+    %
+    %       [ start1+1   start2+1  ]
+    %  sam( [   ...        ...     ] + channel jump - 1)
+    %       [start1+frl start2+frl ]
+    %
+    % Thus we get an index matrix, where each frame column picks the 
+    % samples belonging to it. These samples are then fetched to 'fra'.
+    
+    fra = sam(ind1(ones(framelen,1),:) + ind2(:,ones(1,numframes)) + (c-1)*samtrlen - 1);
+    frameaudio(:,:,c) = fra;
+
+    % Calculate the energies.
+    energies(:,c) = sum(fra.^2,1)';            
+        
+    % Apply window function, take FFT.
+    fFFT = fft(win(:,ones(1,numframes)) .* fra, Nfft);
+    % Truncate and reset constant factor, but do not take abs yet.
+    fFFT(1,:) = 0;
+    fFFT = fFFT(1:Nfft/2+1,:);
+    
+    % Store the returned FFTs with phase.
+    frameFFT(:,:,c) = fFFT;
+    
+    if melmode
+        tmpfeats(:,:,c) = melmat * abs(fFFT);
+    else
+        tmpfeats(:,:,c) = abs(fFFT);
+    end
+end
+
+% Flatten the features if downmixing to 1 is defined.
+if featchans == 1 
+    if samchans > 1
+        feats = mean(tmpfeats, 3);
+    else
+        feats = tmpfeats;
+    end
+else
+    if samchans ~= featchans
+        fprintf('Requested %i feature channels for %i audio - not defined. Returning %i.\n', featchans, samchans, samchans)
+    end
+    feats = tmpfeats;
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/fe_funcs/mel_matrix.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,39 @@
+function [M,fCen,Mfull]=mel_matrix(fs,NbCh,Nfft,warp,fhigh,flow)
+% [M,fCen,Mfull]=mel_matrix(fs,NbCh,Nfft,warp,fhigh)
+% returns Nfft/2+1 - by - NbCh matrix of triangular weights
+% NbCh filterbanks linearly spaced on a MEL scale till fhigh
+% warp: default 1.0
+% fhigh: default=fs/2
+% Mfull: matrix without truncation to Nfft/2+1 points
+
+if nargin<4,
+    warp=1;
+end
+if nargin<5,
+    fhigh=fs/2;
+end
+
+LowMel=2595 * log10(1+flow/700);
+NyqMel=2595 * log10(1+fhigh/700);
+
+StartMel=LowMel + (0:NbCh-1)/(NbCh+1)*(NyqMel-LowMel);
+fCen=warp*700*(10.^(StartMel/2595)-1);
+StartBin=round(Nfft/fs*fCen)+1;
+
+EndMel=LowMel + (2:NbCh+1)/(NbCh+1)*(NyqMel-LowMel);
+EndBin=round(warp*Nfft/fs*700*(10.^(EndMel/2595)-1))+1;
+
+TotLen=EndBin-StartBin+1;
+
+LowLen=[StartBin(2:NbCh) EndBin(NbCh-1)]-StartBin+1;
+HiLen=TotLen-LowLen+1;
+
+M=sparse([],[],[],ceil(warp*Nfft/2+1),NbCh,sum(TotLen));
+for k=1:NbCh,
+    M(StartBin(k):StartBin(k)+LowLen(k)-1,k)=(1:LowLen(k))'/LowLen(k);
+    M(EndBin(k)-HiLen(k)+1:EndBin(k),k)=(HiLen(k):-1:1)'/HiLen(k);
+end
+%TotWeight=sum(M,1);
+Mfull=M;
+M=M(1:Nfft/2+1,:);
+%WeightSum=full([sum(M,1);TotWeight]);
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/fe_funcs/stacklp.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,14 @@
+function y=stacklp(x,N)
+
+if nargin<2,
+  N=5;
+end
+
+[D,T]=size(x);
+xext=x(:,[ones(1,N) 1:T T*ones(1,N)]);
+
+y=zeros((2*N+1)*D,T);
+if islogical(x), y=logical(y);end % copy logical property
+for k=0:2*N,
+  y(k*D+(1:D),:)=xext(:,k+1:k+T);
+end
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/fe_funcs/vec2featmat.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,9 @@
+function [M,Mdct]=vec2featmat(D,cep_ord)
+  
+b0=[0 0 0 0 1  0  0  0 0];
+b1=[0 0 2 1 0 -1 -2 0 0]/10;
+b2=conv(b1(3:7),[2 1 0 -1 -2]/10);
+
+Mdct=[cos(pi*(1:cep_ord)'*((1:D)-0.5)/D);ones(1,D)];
+
+M=kron([b0;-b1;b2],Mdct);
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/funcsMobilab/feature_extraction.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,51 @@
+function  [features] = feature_extraction(segment,config,startStop,minC0)
+%%
+%   Mobilab 2012-2013  
+%
+%%
+if nargin < 4, minC0 = []; end,                                            %minC0 will be placed empty                                   
+%if nargin < 3, startStop = [0 size(segment,1)/config.fs]; end,            %Default no segmentation
+if nargin < 3, startStop = []; end,                                        %startSop is left empty, no segmentation occurs
+%%
+% Calculate the features
+mel_feat = FE(segment, config);
+logmel_feat = log(mel_feat);
+[MM,Mstatic] = vec2featmat(config.melbands, config.cepOrder);
+mfcc_static = Mstatic*logmel_feat;  
+mfcc_d_dd = MM*stacklp(logmel_feat,4);
+
+%Features
+features.mel_feat=mel_feat;
+features.logmel_feat=logmel_feat;
+features.mfcc_static=mfcc_static;
+features.mfcc_d_dd=mfcc_d_dd;
+
+% Standard no segmentation and threholding
+indicesEvent = [1:size(mel_feat,2)];
+indicesSilence = [];
+indicesAboveThreshold = [1:size(mel_feat,2)];
+indicesBelowThreshold = [];
+
+%Do a segmentation
+if (~isempty(startStop))
+    %Determine start and stop frame
+    startFrame = 1 + floor((startStop(1)/(config.framestep_ms/1000)));
+    stopFrame = startFrame + ceil((startStop(2)-startStop(1))/(config.framestep_ms/1000));
+    if(stopFrame > size(mel_feat,2)),stopFrame = size(mel_feat,2); end,
+    %These frame indices correspond to the event
+    indicesEvent = [startFrame : stopFrame];
+    %These frame indices correspond to the silence
+    indicesSilence = [1:startFrame-1 stopFrame+1:size(mel_feat,2)];
+    %Take only those event frames where C0 > minC0
+    %Take only those silence frames where C0 <= minC0
+    if (~isempty(minC0))
+       indicesAboveThreshold=find(mfcc_static(end,:)>minC0);
+       indicesBelowThreshold=find(mfcc_static(end,:)<minC0); 
+    end,
+end,
+features.indicesEvent=indicesEvent;
+features.indicesSilence=indicesSilence;
+features.indicesAboveThreshold=indicesAboveThreshold;
+features.indicesBelowThreshold=indicesBelowThreshold;
+end
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/funcsMobilab/find_format.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,29 @@
+function [files] = find_format(DIR,format)
+
+i = 1;
+inhoud_DIR = dir(DIR);
+aantal_bestanden = length(inhoud_DIR);
+for j = 1:aantal_bestanden
+    bestandsnaam = inhoud_DIR(j,1).name;
+    if strcmp(format,'map')
+        if inhoud_DIR(j,1).isdir ==true
+            if bestandsnaam(1) ~= '.'
+                files.labels(i) = {bestandsnaam(1:(length(bestandsnaam)-length('01.wav')))};
+                files.names(i) = {bestandsnaam};
+                i = i+1;
+            end
+        end  
+    else
+        if length(bestandsnaam)>=length(format),
+            formaatBestandsnaam = bestandsnaam(end-length(format)+1:end);
+            if strcmp(formaatBestandsnaam, char(ones(1,0)));
+                formaatBestandsnaam = '';
+            end
+            if strcmp(formaatBestandsnaam, format),
+                files.labels(i) = {bestandsnaam(1:(length(bestandsnaam)-length('01.wav')))};
+                files.names(i) = {bestandsnaam};
+                i = i+1;
+            end
+        end
+    end
+end
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/funcsMobilab/gmm_classification.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,34 @@
+function [ loglik y_classified] = gmm_classification(features, gmmParam)
+%%
+%   Mobilab 2012-2013  
+%   
+%   Input arguments:
+%       - features is a matrix structure:: 
+%               *) rows: different coefficients
+%               *) columns: different frames
+%       - gmmParem is a cellStructure:
+%            As many cells as GMM models:
+%       - Config:
+%               *) windowClassification_ms: the size of the for classification
+%               *) samplfeFrequency: the sample frequency of the .wav files
+%               *) framelen_ms: is the size of the frames where the MFCC are
+%               on computed
+%   Output arguments
+%       - probs is a matrix structure:
+%               *) rows: the prob of the corresponding event
+%               %) columns: the corresponding window
+%%
+%Prealloction for speed
+nrEvents=length(features);
+nrClasses=length(gmmParam);
+loglik=zeros(nrClasses, nrEvents);
+%Do a classification
+for(classNr=1:nrClasses)
+    for(eventNr=1:nrEvents)
+         loglik(classNr,eventNr) = mean(log(pdf(gmmParam{classNr},features{eventNr,1}')));         
+    end,
+end,
+[ignore y_classified] = max(loglik);
+end
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/funcsMobilab/readCSV.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,64 @@
+function [txt_struct] = readCSV(file)
+
+    % text openen om volledig in te lezen
+    fid = fopen(file);
+    txt = '';
+    if fid ~= -1
+        while 1
+            line = fgetl(fid);
+            if ~ischar(line), break, end
+            txt = [txt line ' \n '];
+        end
+    end
+    fclose(fid);
+    
+    % text aanpassen
+    txt_struct = {};
+    l = 1;
+    k = 1;
+    voorlopigVak = '';
+    volgendeOverslaan = false;
+    for i = 1:length(txt)
+        switch txt(i)
+            case ';'
+                txt_struct{l,k} = voorlopigVak;
+                voorlopigVak = '';
+                k = k+1;
+            case '\'
+                if txt(i+1) == 'n'
+                    txt_struct{l,k} = voorlopigVak;
+                    voorlopigVak = '';
+                    l = l+1;
+                    k = 1;
+                    volgendeOverslaan = true;
+                else
+                    if volgendeOverslaan == false
+                        voorlopigVak = [voorlopigVak txt(i)];
+                    else
+                        volgendeOverslaan = false;
+                    end
+                end
+            otherwise
+                if volgendeOverslaan == false
+                    voorlopigVak = [voorlopigVak txt(i)];
+                else
+                    volgendeOverslaan = false;
+                end
+        end
+    end
+    
+    % lege lijnen (of met spaties) onderaan wissen
+    ok = false;
+    while ok == false
+        for i = 1:size(txt_struct,2);
+            for j = 1:length(txt_struct{end,i})
+                if not(strcmp(txt_struct{end,i}(j), ' '))
+                    ok = true;
+                end
+            end
+        end
+        if ok == false
+            txt_struct = txt_struct(1:end-1,:);
+        end
+    end
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/funcsMobilab/read_annotation_develop.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,31 @@
+function [startStop labels] = read_annotation_develop(directory, fileID)
+%% Open corresponding textfile
+fid=fopen([directory filesep fileID], 'r'); 
+text = fread(fid, '*char'); 
+fclose(fid);
+%% Preallocation
+% Find rows (separated by enter) and is equal by the nr of Events
+posEnter = strfind(text', char(10));
+nrEvents = length(posEnter);
+% Preallocation for speed
+startStop = zeros(nrEvents,2);
+labels = cell(nrEvents,1);
+%% Extract data from the script
+%First event
+subText = text(1:posEnter(1,1)-1)';
+%Find tab positions
+tabs = strfind(subText, char(9));
+startStop(1,1) = str2num(subText(1:tabs(1)-1));
+startStop(1,2) = str2num(subText(tabs(1)+1:tabs(2)-1));
+labels{1,1} = subText(tabs(2)+1:length(subText));
+
+%All remaining events
+for(eventNr=2:nrEvents)
+    subText = text(posEnter(1,eventNr-1)+1:posEnter(1,eventNr)-1)';
+    %Find tab positions
+    tabs = strfind(subText, char(9));
+    startStop(eventNr,1) = str2num(subText(1:tabs(1)-1));
+    startStop(eventNr,2) = str2num(subText(tabs(1)+1:tabs(2)-1));
+    labels{eventNr,1} = subText(tabs(2)+1:length(subText));
+end,
+end
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/functions/funcsMobilab/read_data_develop.m	Fri Oct 11 12:02:43 2013 +0100
@@ -0,0 +1,80 @@
+function [segments,samples,labels] = read_data_develop(directoryAudio,directoryAnnotation,new_fs)
+%%
+%   Mobilab 2012-2013  
+%   
+%   Input arguments:
+%       - directoryAudi: 
+%            Contains the path to the folder where the data is located
+%       - directoryAnnotation:
+%            Contains the path to the folders where the annotations are located
+%       - new_fs:
+%             Resample the .wav files to this sample frequency. If left
+%             blank, no resampling occur
+%   Output arguments
+%       - segments is a cell structure:
+%             Columns: # number of development examples
+%             Rows: # numer of annotators
+%             Each cell contains the segmented files from corresponding
+%             development file and annotator
+%       - samples is a cell structure:
+%             Same structure as segments. Each cell contains the start and
+%             stop time of the event.
+%       - labels is a cell structure:
+%             Same structure as segments. Each cell contains the labels of
+%             the corresponding events
+%% Defaults
+down=1; if nargin < 3; down=0; end
+%% Annotation
+addpath(directoryAudio);
+files=find_format(directoryAudio, '.wav');
+names=files.names;
+nrScripts = length(names);
+
+% Preallocation for speed
+segments=cell(1,nrScripts);
+samples=cell(1,nrScripts);
+labels=cell(1,nrScripts);
+
+for(scriptNr=1:nrScripts)
+   
+   [x_temp fs] = wavread(names{1,scriptNr});
+   if(down==1)
+       x_temp = resample(x_temp,new_fs,fs);
+       fs=new_fs;
+   end,
+       
+
+        addpath(directoryAnnotation);
+        files2=find_format(directoryAnnotation, 'txt');
+        annotation=textread(files2.names{1,i}, '%s' ,'delimiter', '\t');
+        startStop=zeros(length(annotation)/3,2);
+        id=cell(length(annotation)/3,1);
+        u=1;
+        for(v=1:3:length(annotation))
+            startStop(u,1)=str2num(annotation{v,1});
+            startStop(u,2)=str2num(annotation{v+1,1});
+            x{u,2}=fs;
+            x{u,1}=x_temp(ceil(startStop(u,1)*x{u,2})+1:ceil(startStop(u,2)*x{u,2}));
+            id{u,1}=annotation{v+2,1};
+            u=u+1;
+        end,
+        segments{k,i}=x;
+        samples{k,i}=startStop;
+        labels{k,i}=id;
+        rmpath(directoryAnnotation{k,1});
+        clear x;
+    end,
+end
+
+
+    
+    
+
+    
+    
+
+    
+
+  
+
+
Binary file readme/Readme.docx has changed
Binary file readme/Readme.pdf has changed