# HG changeset patch # User Dan Stowell # Date 1381489363 -3600 # Node ID 2fadb31a9d5576f4b9e94f900743e621deca8464 Import code by Vuegen et al diff -r 000000000000 -r 2fadb31a9d55 abstract/AbstractAASP.pdf Binary file abstract/AbstractAASP.pdf has changed diff -r 000000000000 -r 2fadb31a9d55 acousticModel/GMM.mat Binary file acousticModel/GMM.mat has changed diff -r 000000000000 -r 2fadb31a9d55 eventdetection.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eventdetection.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,109 @@ +function eventdetection(dirFuncs, dirInput,nameInput,dirOutput,nameOutput,COindex) +%% Readme of the eventdetection function +% Input parameters +% dirFuncs: should link to the directory with the toolboxes +% e.g. 'D:\Projects\AASP\functions' +% dirInput: should be the directory linking to the test scripts +% e.g. 'D:\Projects\AASP\Datasets' +% nameInput: is the name of the test set +% e.g. 'test01.wav' +% dirOutput: Is the directory where the output file shoud be saved +% e.g. 'D:\Projects\AASP\Output' +% nameOutput: Is the name of the output files +% e.g. 'outputTest01.txt'. +% C0index: Determines the loaded threshold +% Use 1 for Office Life +% Use 2 for Office Synthetic with a SNR of -6 +% Use 3 for Office Synthetic with a SNR of 0 +% Use 4 for Office Synthetic with a SNR of 6% +%% add paths to the functions +addpath([dirFuncs filesep 'functions' filesep 'challange']); +addpath([dirFuncs filesep 'functions' filesep 'fe_funcs']); +addpath([dirFuncs filesep 'functions' filesep 'funcsMobilab']); +%% load the GMMs +load([pwd filesep 'acousticModel' filesep 'GMM.mat']); +%% Load the threholds +nrStages = 5; +minC0 = [-189.5 -35 -45 -95]; +%% Load the development audio file +dirDataName = [dirInput filesep nameInput]; +[x_develop fs] = wavread(dirDataName); +disp(['Stage 1 of ' num2str(nrStages) ' is completed']); +%% Apply a downssampling to new_fs=16kHz +new_fs=16000; +x_develop = resample(x_develop, new_fs, fs); +fs = new_fs; +%% Extract the features +features=feature_extraction(x_develop, audioconf); +disp(['Stage 2 of ' num2str(nrStages) ' is completed']); +%% Determine where C0 > minimum +C0 = features.mfcc_static(14,:); +frames=(C0>minC0(COindex)); +%% Do moving average filtering on the indices +minWindowFrames=50; +% Filter coefficients +b_mov_avg=ones(minWindowFrames,1)/minWindowFrames; +frames_filt=fftfilt(b_mov_avg,double(frames)); +% Seek for ones and go to doubles +frames_filt_ones=double((frames_filt>=0.999)); +% Seek risig edges for compensating with the filter delay +% Each rising edge corresponds to an event +b_edges=[1 -1]; +edges=fftfilt(b_edges,frames_filt_ones); +[ignore indRisingEdges]=find(edges>=0.999); +[ignore indFallingEdges]=find(edges<=-0.999); + nrEdges=size(indRisingEdges,2); +% Preallocation of variable eventFrames for speed +eventFrames=zeros(nrEdges,2); + for(edgeNr=1:nrEdges) + frames_filt_ones(indRisingEdges(edgeNr)-minWindowFrames+1:indRisingEdges(edgeNr))=1; + eventFrames(edgeNr,:)=[indRisingEdges(edgeNr)-minWindowFrames+1 indFallingEdges(edgeNr)]; + end, +indEvents=find(frames_filt_ones); +indSilence=find(ones(1,length(C0))-frames_filt_ones); +disp(['Stage 3 of ' num2str(nrStages) ' is completed']); +%% Compare the developpment script with all the GMMs (posteriorgram) +% Preallcoation of variables for speed +nrClasses = size(gmm_class_mfcc_feat,1); +likelihood = zeros(nrClasses,length(C0)); +% Loop over the classes and determine likelihood + for(classNr=1:nrClasses) + likelihood(classNr,:) = pdf(gmm_class_mfcc_feat{classNr},features.mfcc_d_dd'); + end, +% Compare with the silence class GMM +likelihood(classNr+1,:) = pdf(gmm_silence_mfcc_feat,features.mfcc_d_dd'); +labelsClass{classNr+1}='silence'; +% Go to posteriors +posterior=bsxfun(@rdivide,likelihood,sum(likelihood,1)); +%% Apply an moving average filtering on the posteriorgram +% Preallocation for speed +likelihoodFilt = zeros(nrClasses,length(C0)); +% Min duration devided by 2 as filterlengths +minDuration = [0.325 0.3599 0.3612 0.4448 0.7662 1.026 0.725 0.4601 0.5899 0.2379 0.7102 0.243 4.2318 6.1003 0.487 0.0579 0.0579]/2; +numFrames = floor((minDuration-(audioconf.framelen_ms/1000)+(audioconf.framestep_ms/1000)) / (audioconf.framestep_ms/1000)); +% Loop over the posteriorgram + for(classNr=1:nrClasses+1) + b=(ones(numFrames(classNr),1)/numFrames(classNr)); + likelihoodFilt(classNr,:) = fftfilt(b,posterior(classNr,:)); + end, +% Make sure no complex values occur (some have a very small imaginary valuee +likelihoodFilt = abs(likelihoodFilt); +% Back to posteriogram +posteriorFilt=bsxfun(@rdivide,likelihoodFilt,sum(likelihoodFilt,1)); +disp(['Stage 4 of ' num2str(nrStages) ' is completed']); +%% Apply the thresholding on the moving averaged posteriorgram +posteriorFiltActive=posteriorFilt; +posteriorFiltActive(:,indSilence)=0; +% Loop over the detected events + for(detectionNr=1:nrEdges) + [ignore(detectionNr) ID(detectionNr)] = max(mean(posteriorFilt(:,eventFrames(detectionNr,1):eventFrames(detectionNr,2)),2)); + end, +%% Go over to the AASP metrics +pianorollClassification=zeros(size(posteriorFiltActive)); +for(detectionNr=1:nrEdges) + pianorollClassification(ID(detectionNr),eventFrames(detectionNr,1):eventFrames(detectionNr,2))=ones; +end, +% Go to a text file complementary to the challange requirements +eventBased(pianorollClassification,nameOutput,dirOutput, audioconf); +disp(['Stage 5 of ' num2str(nrStages) ' is completed']); +end \ No newline at end of file diff -r 000000000000 -r 2fadb31a9d55 functions/challange/convertEventListToEventRoll.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/challange/convertEventListToEventRoll.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,17 @@ +function [eventRoll] = convertEventListToEventRoll(onset,offset,classNames) + + +% Initialize +eventRoll = zeros(ceil(offset(length(offset))*100),16); +eventID = {'alert','clearthroat','cough','doorslam','drawer','keyboard','keys',... + 'doorknock','laughter','mouse','pageturn','pendrop','phone','printer','speech','switch'}; + + +% Fill-in eventRoll +for i=1:length(onset) + + pos = strmatch(classNames{i}, eventID); + + eventRoll(floor(onset(i)*100):ceil(offset(i)*100),pos) = 1; + +end; diff -r 000000000000 -r 2fadb31a9d55 functions/challange/eventBased.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/challange/eventBased.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,58 @@ +function eventBased(script, fileName, directory, conf) +%% +%MOBILAB 2012 - 2013 +% +% Input parameters +% -script: Each row is a representation of a certain class +% Each column contains the value (ones) of wich class is active at +% that certain moment. +% [N x M] matrix: +% -N: nr classes +% -M: nr of frames in the script +% -fileName: Is the name of the output text file. +% -directory: Is the location where the output text file must be written +% -conf: Is a struct and contains information about the time of the +% differrent frames +%% +fileID = fopen([directory filesep fileName],'a'); + +%Determine the number of classes and the number of framees +[nr_classes nr_frames] = size(script(1:16,:)); +%classnames +eventID = {'alert','clearthroat','cough','doorslam','drawer','keyboard','keys',... + 'doorknock','laughter','mouse','pageturn','pendrop','phone','printer','speech','switch'}; +k=1; + +for class=1:nr_classes + b = [1 -1]; + changes = conv(script(class,:),b); %Find transitions + changes = changes(1:length(script)); %Apply same lenght as script + + onset = find(changes==1); %Find the 0-1 transitions // onset + offset = find(changes==-1); %Find the 1-0 transitions // offset + + for(nr_transtion=1 : length(onset)) + onsetOffset(k,1) = onset(nr_transtion)*(conf.framestep_ms/1000); + onsetOffset(k,2) = offset(nr_transtion)*(conf.framestep_ms/1000); + classifiedEvent{k,1} = eventID{1,class}; + k=k+1; + end, +end, + +% Sort the data with respect to the onset times +[data_sorted(:,1) I] = sort(onsetOffset(:,1)); +data_sorted(:,2) = onsetOffset(I,2); + +for (nr=1:length(data_sorted(:,1))) +fprintf(fileID,'%f',data_sorted(nr,1)); %Print onset time +fprintf(fileID, '\t'); %Print space +fprintf(fileID,'%f',data_sorted(nr,2)); %Print offset time +fprintf(fileID, '\t'); %Print space +fprintf(fileID,'%s',classifiedEvent{I(nr),1}); +fprintf(fileID,'\n'); %Print newline +end, + +fclose(fileID); + +end + diff -r 000000000000 -r 2fadb31a9d55 functions/challange/eventDetectionMetrics_classWiseEventBased.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/challange/eventDetectionMetrics_classWiseEventBased.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,80 @@ +function [results] = eventDetectionMetrics_classWiseEventBased(outputFile,GTFile) + +% Class-wise event-based evaluation for event detection task +% outputFile: the output of the event detection system +% GTFile: the ground truth list of events + +% Initialize +eventID = {'alert','clearthroat','cough','doorslam','drawer','keyboard','keys',... + 'knock','laughter','mouse','pageturn','pendrop','phone','printer','speech','switch'}; + + +% Load event list from output and ground-truth +[onset,offset,classNames] = loadEventsList(outputFile); +[onsetGT,offsetGT,classNamesGT] = loadEventsList(GTFile); + + +% Total number of detected and reference events per class +Ntot = zeros(16,1); +for i=1:length(onset) + pos = strmatch(classNames{i}, eventID); + Ntot(pos) = Ntot(pos)+1; +end; + +Nref = zeros(16,1); +for i=1:length(onsetGT) + pos = strmatch(classNamesGT{i}, eventID); + Nref(pos) = Nref(pos)+1; +end; +I = find(Nref>0); % index for classes present in ground-truth + + +% Number of correctly transcribed events per class, onset within a +/-100 ms range +Ncorr = zeros(16,1); +NcorrOff = zeros(16,1); +for j=1:length(onsetGT) + for i=1:length(onset) + + if( strcmp(classNames{i},classNamesGT{j}) && (abs(onsetGT(j)-onset(i))<=0.1) ) + pos = strmatch(classNames{i}, eventID); + Ncorr(pos) = Ncorr(pos)+1; + + % If offset within a +/-100 ms range or within 50% of ground-truth event's duration + if abs(offsetGT(j) - offset(i)) <= max(0.1, 0.5 * (offsetGT(j) - onsetGT(j))) + pos = strmatch(classNames{i}, eventID); + NcorrOff(pos) = NcorrOff(pos) +1; + end; + + break; % In order to not evaluate duplicates + + end; + end; +end; + + +% Compute onset-only class-wise event-based metrics +Nfp = Ntot-Ncorr; +Nfn = Nref-Ncorr; +Nsubs = min(Nfp,Nfn); +tempRec = Ncorr(I)./(Nref(I)+eps); +tempPre = Ncorr(I)./(Ntot(I)+eps); +results.Rec = mean(tempRec); +results.Pre = mean(tempPre); +tempF = 2*((tempPre.*tempRec)./(tempPre+tempRec+eps)); +results.F = mean(tempF); +tempAEER = (Nfn(I)+Nfp(I)+Nsubs(I))./(Nref(I)+eps); +results.AEER = mean(tempAEER); + + +% Compute onset-offset class-wise event-based metrics +NfpOff = Ntot-NcorrOff; +NfnOff = Nref-NcorrOff; +NsubsOff = min(NfpOff,NfnOff); +tempRecOff = NcorrOff(I)./(Nref(I)+eps); +tempPreOff = NcorrOff(I)./(Ntot(I)+eps); +results.RecOff = mean(tempRecOff); +results.PreOff = mean(tempPreOff); +tempFOff = 2*((tempPreOff.*tempRecOff)./(tempPreOff+tempRecOff+eps)); +results.FOff = mean(tempFOff); +tempAEEROff = (NfnOff(I)+NfpOff(I)+NsubsOff(I))./(Nref(I)+eps); +results.AEEROff = mean(tempAEEROff); diff -r 000000000000 -r 2fadb31a9d55 functions/challange/eventDetectionMetrics_eventBased.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/challange/eventDetectionMetrics_eventBased.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,56 @@ +function [results] = eventDetectionMetrics_eventBased(outputFile,GTFile) + +% Event-based evaluation for event detection task +% outputFile: the output of the event detection system +% GTFile: the ground truth list of events + +% Load event list from output and ground-truth +[onset,offset,classNames] = loadEventsList(outputFile); +[onsetGT,offsetGT,classNamesGT] = loadEventsList(GTFile); + + +% Total number of detected and reference events +Ntot = length(onset); +Nref = length(onsetGT); + + +% Number of correctly transcribed events, onset within a +/-100 ms range +Ncorr = 0; +NcorrOff = 0; +for j=1:length(onsetGT) + for i=1:length(onset) + + if( strcmp(classNames{i},classNamesGT{j}) && (abs(onsetGT(j)-onset(i))<=0.1) ) + Ncorr = Ncorr+1; + + % If offset within a +/-100 ms range or within 50% of ground-truth event's duration + if abs(offsetGT(j) - offset(i)) <= max(0.1, 0.5 * (offsetGT(j) - onsetGT(j))) + NcorrOff = NcorrOff +1; + end; + + break; % In order to not evaluate duplicates + + end; + end; + +end; + + +% Compute onset-only event-based metrics +Nfp = Ntot-Ncorr; +Nfn = Nref-Ncorr; +Nsubs = min(Nfp,Nfn); +results.Rec = Ncorr/(Nref+eps); +results.Pre = Ncorr/(Ntot+eps); +results.F = 2*((results.Pre*results.Rec)/(results.Pre+results.Rec+eps)); +results.AEER= (Nfn+Nfp+Nsubs)/(Nref+eps); + + +% Compute onset-offset event-based metrics +NfpOff = Ntot-NcorrOff; +NfnOff = Nref-NcorrOff; +NsubsOff = min(NfpOff,NfnOff); +results.RecOff = NcorrOff/(Nref+eps); +results.PreOff = NcorrOff/(Ntot+eps); +results.FOff = 2*((results.PreOff*results.RecOff)/(results.PreOff+results.RecOff+eps)); +results.AEEROff= (NfnOff+NfpOff+NsubsOff)/(Nref+eps); diff -r 000000000000 -r 2fadb31a9d55 functions/challange/eventDetectionMetrics_frameBased.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/challange/eventDetectionMetrics_frameBased.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,34 @@ +function [results] = eventDetectionMetrics_frameBased(outputFile,GTFile) + +% Frame-based evaluation for event detection task +% outputFile: the output of the event detection system +% GTFile: the ground truth list of events + +% Load event list from output and ground-truth +[onset,offset,classNames] = loadEventsList(outputFile); +[onsetGT,offsetGT,classNamesGT] = loadEventsList(GTFile); + + +% Convert event list into frame-based representation (10msec resolution) +[eventRoll] = convertEventListToEventRoll(onset,offset,classNames); +[eventRollGT] = convertEventListToEventRoll(onsetGT,offsetGT,classNamesGT); + + +% Fix durations of eventRolls +if (size(eventRollGT,1) > size(eventRoll,1)) eventRoll = [eventRoll; zeros(size(eventRollGT,1)-size(eventRoll,1),16)]; end; +if (size(eventRoll,1) > size(eventRollGT,1)) eventRollGT = [eventRollGT; zeros(size(eventRoll,1)-size(eventRollGT,1),16)]; end; + + +% Compute frame-based metrics +Nref = sum(sum(eventRollGT)); +Ntot = sum(sum(eventRoll)); +Ntp = sum(sum(eventRoll+eventRollGT > 1)); +Nfp = sum(sum(eventRoll-eventRollGT > 0)); +Nfn = sum(sum(eventRollGT-eventRoll > 0)); +Nsubs = min(Nfp,Nfn); + + +results.Rec = Ntp/(Nref+eps); +results.Pre = Ntp/(Ntot+eps); +results.F = 2*((results.Pre*results.Rec)/(results.Pre+results.Rec+eps)); +results.AEER = (Nfn+Nfp+Nsubs)/(Nref+eps); diff -r 000000000000 -r 2fadb31a9d55 functions/challange/loadEventsList.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/challange/loadEventsList.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,27 @@ +function [onset,offset,classNames] = loadEventsList(filename) + +% Open raw file +fid = fopen(filename,'r+'); + +% Read 1st line +tline = fgetl(fid); +onset_offset(:,1) = sscanf(tline, '%f\t%f\t%*s'); +classNames{1} = char(sscanf(tline, '%*f\t%*f\t%s')'); + +% Read rest of the lines +i=1; +while ischar(tline) + i = i+1; + tline = fgetl(fid); + if (ischar(tline)) + onset_offset(:,i) = sscanf(tline, '%f\t%f\t%*s'); + classNames{i} = char(sscanf(tline, '%*f\t%*f\t%s')'); + end; +end + +% Split onset_offset +onset = onset_offset(1,:)'; +offset = onset_offset(2,:)'; + +% Close file +fclose(fid); \ No newline at end of file diff -r 000000000000 -r 2fadb31a9d55 functions/fe_funcs/FE.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/fe_funcs/FE.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,217 @@ +% Yet another feature extractor, this time with CHiME in mind. +% Some cleanup, more support for customised audio parameters. +% Updated 9th Aug 2011 +% +% Outputs have been changed. Currently no logarithms are taken here any +% more. +% +% Input: +% - 'sam' is a the audio, either as column or row channels vectors. +% (Longer dimension is treated as time, shorter as channel count.) +% - 'audioconf' is as defined in getconfigs.m . All of its parameters +% ARE respected now, so pass a temporary, edited copy if you want +% to change the behaviour. +% +% Output: +% - 'feats' is a [bands x frames x featchannels] array of mel features. +% If audioconf.melbands is zero, FFT magnitudes are returned instead. +% - 'energies' is a [frames x audiochannels] matrix of frame energies +% - 'frameaudio' is a [framelen x frames x audiochannels] array of chopped +% audio data (with preprocessing but without the window function). +% - 'frameFFT' is an [FFTlow x frames x audiochannels] array of frame FFTs. +% The windowing function has been applied, and the result has been +% truncated to Nfft/2 + 1 bands. However, no abs is taken. You can +% do this in the calling function, or pick the abs values from 'feats' +% by using zero melbands. +% +% The main feature output respects audioconf.featchannels, which should +% be either the same as audioconf.channels (the number of input streams) +% or 1 (downmixed to mono by taking the mean of feature channels). Other +% outputs use original audio channels, because their averaging is not as +% well defined. Note that there is a significant difference between +% averaging the audio (causing waveform level phase attenuation) and the +% abs-FFT or Mel features (phase-invariant energy mean). If the former is +% what you need, downmix the audio in the calling function. +% +% Some warnings are shown if audio parameters are missing or they do not +% match with the data. + +function [feats, energies, frameaudio, frameFFT] = FE(sam, audioconf) + +verbose = 0; + +% Default config. To guarantee intended operation, you should always +% pass your own, though. + +defconf.channels = 2; % input channels, in CHiME always 2 +defconf.featchannels = 1; % feature level channels +defconf.fs = 16000; % sampling rate for internal processing +defconf.maxf = 8000; % maximum frequency to be considered +defconf.minf = 64; % maximum frequency to be considered +defconf.melbands = 26; % mel band count (0 to disable) +defconf.framelen_ms = 25; % millisecond length of each frame +defconf.framestep_ms = 10; % millisecond step between frames +defconf.windowfunc = 'hamming'; % window function name +defconf.preemphasis = 0.97; % 0 to disable +defconf.dcremoval = true; % DC removal in the feature extractor +defconf.Nfft = 0; % Number of FFT bands (0 to calculate from framelength) + +if nargin < 2 + if verbose + disp('No audioconf given, using defaults.') + end + audioconf = defconf; +else + fldnames = fieldnames(defconf); + for fl = 1:length(fldnames) + if ~isfield(audioconf, fldnames{fl}) + if verbose + fprintf('Field %s missing, copying from defaults.\n', fldnames{fl}) + end + audioconf.(f)=defconf.(f); + end + end +end + +% Fetch the shorthand variables. +featbands = audioconf.melbands; +featchans = audioconf.featchannels; +fs = audioconf.fs; +fhigh = audioconf.maxf; +flow = audioconf.minf; + +framelen = ceil(fs * audioconf.framelen_ms / 1000); +frameshift = ceil(fs * audioconf.framestep_ms / 1000); +% framelen = (fs * audioconf.framelen_ms / 1000); +% frameshift = (fs * audioconf.framestep_ms / 1000); + +if audioconf.Nfft == 0 + Nfft = 2^nextpow2(framelen); +else + Nfft = audioconf.Nfft; +end + +winfunc = str2func(audioconf.windowfunc); +win = winfunc(framelen); + +if featbands == 0 + melmode = false; +else + melmode = true; +end + +% Switch audio to columns. +if size(sam, 1) < size(sam,2) + sam = sam'; +end + +samlen = size(sam, 1); +samchans = size(sam, 2); + +if samchans ~= audioconf.channels + if verbose + fprintf('Warning: Audio has %i channels, config states %i.\n', samchans, audioconf.channels); + end +end + +if melmode + melmat = mel_matrix(fs, featbands, Nfft, 1, fhigh, flow)'; + if size(melmat, 1) ~= featbands + fprintf('Mel matrix has %i bands (config: %i).\n', size(melmat, 1), featbands); + end + if size(melmat, 2) ~= (Nfft/2 + 1) + fprintf('Mel matrix has %i FFT coeffs (expected: %i).\n', size(melmat, 2), Nfft/2 + 1); + end +end + +% Truncate to full frames, get the number. +numframes = floor((samlen-framelen+frameshift) / frameshift); +sam = sam(1:(numframes*frameshift+framelen-frameshift), :); + +% DC removal - introduces a 1-unit filter delay, thus we discard the +% first sample. Note that this behaviour has changed from earlier +% versions of FE. +if audioconf.dcremoval + samf = filter([1;-1], [1;-0.999], [zeros(1,samchans);sam]); + sam = samf(2:end, :); +end +samtrlen = size(sam, 1); % trimmed length + +% Pre-emphasis if nonzero. Can be done for the whole audio at once. +if (audioconf.preemphasis > 0) + sam = [zeros(1, samchans); sam(2:samtrlen, :) - audioconf.preemphasis * sam(1:(end-1), :)]; +end + +if melmode + tmpfeats = zeros(featbands, numframes, samchans); +else + tmpfeats = zeros(Nfft/2 + 1, numframes, samchans); +end + +energies = zeros(numframes, samchans); +frameaudio = zeros(framelen, numframes, samchans); +frameFFT = zeros(Nfft/2+1, numframes, samchans); + + +% Process channels one by one. Trying to perform these ops simultaneously +% for all channels might be possible but tricky. +for c = 1:samchans + + % starting sample numbers of each frame + ind1 = 1:frameshift:samtrlen-1-framelen+frameshift; + % linear 1-step vector (1...frame length) + ind2 = (1:framelen)'; + + % Pick frame audio. The index matrix (framelen x numframes) consists + % of four summed parts: + % 1) Constant column vectors, each denoting the frame's start sample. + % 2) Increasing sample index column vectors + % 3) Scalar jump to get into the correct channel in linear indexing + % 4) -1 because the first two indices are both one-based. + % + % [start1 start2 ] [ 1 1 ] + % sam( [ ... ... ] + [... ... ] + channel jump - 1) = + % [start1 start2 ] [frl frl ] + % + % [ start1+1 start2+1 ] + % sam( [ ... ... ] + channel jump - 1) + % [start1+frl start2+frl ] + % + % Thus we get an index matrix, where each frame column picks the + % samples belonging to it. These samples are then fetched to 'fra'. + + fra = sam(ind1(ones(framelen,1),:) + ind2(:,ones(1,numframes)) + (c-1)*samtrlen - 1); + frameaudio(:,:,c) = fra; + + % Calculate the energies. + energies(:,c) = sum(fra.^2,1)'; + + % Apply window function, take FFT. + fFFT = fft(win(:,ones(1,numframes)) .* fra, Nfft); + % Truncate and reset constant factor, but do not take abs yet. + fFFT(1,:) = 0; + fFFT = fFFT(1:Nfft/2+1,:); + + % Store the returned FFTs with phase. + frameFFT(:,:,c) = fFFT; + + if melmode + tmpfeats(:,:,c) = melmat * abs(fFFT); + else + tmpfeats(:,:,c) = abs(fFFT); + end +end + +% Flatten the features if downmixing to 1 is defined. +if featchans == 1 + if samchans > 1 + feats = mean(tmpfeats, 3); + else + feats = tmpfeats; + end +else + if samchans ~= featchans + fprintf('Requested %i feature channels for %i audio - not defined. Returning %i.\n', featchans, samchans, samchans) + end + feats = tmpfeats; +end diff -r 000000000000 -r 2fadb31a9d55 functions/fe_funcs/mel_matrix.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/fe_funcs/mel_matrix.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,39 @@ +function [M,fCen,Mfull]=mel_matrix(fs,NbCh,Nfft,warp,fhigh,flow) +% [M,fCen,Mfull]=mel_matrix(fs,NbCh,Nfft,warp,fhigh) +% returns Nfft/2+1 - by - NbCh matrix of triangular weights +% NbCh filterbanks linearly spaced on a MEL scale till fhigh +% warp: default 1.0 +% fhigh: default=fs/2 +% Mfull: matrix without truncation to Nfft/2+1 points + +if nargin<4, + warp=1; +end +if nargin<5, + fhigh=fs/2; +end + +LowMel=2595 * log10(1+flow/700); +NyqMel=2595 * log10(1+fhigh/700); + +StartMel=LowMel + (0:NbCh-1)/(NbCh+1)*(NyqMel-LowMel); +fCen=warp*700*(10.^(StartMel/2595)-1); +StartBin=round(Nfft/fs*fCen)+1; + +EndMel=LowMel + (2:NbCh+1)/(NbCh+1)*(NyqMel-LowMel); +EndBin=round(warp*Nfft/fs*700*(10.^(EndMel/2595)-1))+1; + +TotLen=EndBin-StartBin+1; + +LowLen=[StartBin(2:NbCh) EndBin(NbCh-1)]-StartBin+1; +HiLen=TotLen-LowLen+1; + +M=sparse([],[],[],ceil(warp*Nfft/2+1),NbCh,sum(TotLen)); +for k=1:NbCh, + M(StartBin(k):StartBin(k)+LowLen(k)-1,k)=(1:LowLen(k))'/LowLen(k); + M(EndBin(k)-HiLen(k)+1:EndBin(k),k)=(HiLen(k):-1:1)'/HiLen(k); +end +%TotWeight=sum(M,1); +Mfull=M; +M=M(1:Nfft/2+1,:); +%WeightSum=full([sum(M,1);TotWeight]); \ No newline at end of file diff -r 000000000000 -r 2fadb31a9d55 functions/fe_funcs/stacklp.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/fe_funcs/stacklp.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,14 @@ +function y=stacklp(x,N) + +if nargin<2, + N=5; +end + +[D,T]=size(x); +xext=x(:,[ones(1,N) 1:T T*ones(1,N)]); + +y=zeros((2*N+1)*D,T); +if islogical(x), y=logical(y);end % copy logical property +for k=0:2*N, + y(k*D+(1:D),:)=xext(:,k+1:k+T); +end \ No newline at end of file diff -r 000000000000 -r 2fadb31a9d55 functions/fe_funcs/vec2featmat.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/fe_funcs/vec2featmat.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,9 @@ +function [M,Mdct]=vec2featmat(D,cep_ord) + +b0=[0 0 0 0 1 0 0 0 0]; +b1=[0 0 2 1 0 -1 -2 0 0]/10; +b2=conv(b1(3:7),[2 1 0 -1 -2]/10); + +Mdct=[cos(pi*(1:cep_ord)'*((1:D)-0.5)/D);ones(1,D)]; + +M=kron([b0;-b1;b2],Mdct); \ No newline at end of file diff -r 000000000000 -r 2fadb31a9d55 functions/funcsMobilab/feature_extraction.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/funcsMobilab/feature_extraction.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,51 @@ +function [features] = feature_extraction(segment,config,startStop,minC0) +%% +% Mobilab 2012-2013 +% +%% +if nargin < 4, minC0 = []; end, %minC0 will be placed empty +%if nargin < 3, startStop = [0 size(segment,1)/config.fs]; end, %Default no segmentation +if nargin < 3, startStop = []; end, %startSop is left empty, no segmentation occurs +%% +% Calculate the features +mel_feat = FE(segment, config); +logmel_feat = log(mel_feat); +[MM,Mstatic] = vec2featmat(config.melbands, config.cepOrder); +mfcc_static = Mstatic*logmel_feat; +mfcc_d_dd = MM*stacklp(logmel_feat,4); + +%Features +features.mel_feat=mel_feat; +features.logmel_feat=logmel_feat; +features.mfcc_static=mfcc_static; +features.mfcc_d_dd=mfcc_d_dd; + +% Standard no segmentation and threholding +indicesEvent = [1:size(mel_feat,2)]; +indicesSilence = []; +indicesAboveThreshold = [1:size(mel_feat,2)]; +indicesBelowThreshold = []; + +%Do a segmentation +if (~isempty(startStop)) + %Determine start and stop frame + startFrame = 1 + floor((startStop(1)/(config.framestep_ms/1000))); + stopFrame = startFrame + ceil((startStop(2)-startStop(1))/(config.framestep_ms/1000)); + if(stopFrame > size(mel_feat,2)),stopFrame = size(mel_feat,2); end, + %These frame indices correspond to the event + indicesEvent = [startFrame : stopFrame]; + %These frame indices correspond to the silence + indicesSilence = [1:startFrame-1 stopFrame+1:size(mel_feat,2)]; + %Take only those event frames where C0 > minC0 + %Take only those silence frames where C0 <= minC0 + if (~isempty(minC0)) + indicesAboveThreshold=find(mfcc_static(end,:)>minC0); + indicesBelowThreshold=find(mfcc_static(end,:)=length(format), + formaatBestandsnaam = bestandsnaam(end-length(format)+1:end); + if strcmp(formaatBestandsnaam, char(ones(1,0))); + formaatBestandsnaam = ''; + end + if strcmp(formaatBestandsnaam, format), + files.labels(i) = {bestandsnaam(1:(length(bestandsnaam)-length('01.wav')))}; + files.names(i) = {bestandsnaam}; + i = i+1; + end + end + end +end \ No newline at end of file diff -r 000000000000 -r 2fadb31a9d55 functions/funcsMobilab/gmm_classification.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/funcsMobilab/gmm_classification.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,34 @@ +function [ loglik y_classified] = gmm_classification(features, gmmParam) +%% +% Mobilab 2012-2013 +% +% Input arguments: +% - features is a matrix structure:: +% *) rows: different coefficients +% *) columns: different frames +% - gmmParem is a cellStructure: +% As many cells as GMM models: +% - Config: +% *) windowClassification_ms: the size of the for classification +% *) samplfeFrequency: the sample frequency of the .wav files +% *) framelen_ms: is the size of the frames where the MFCC are +% on computed +% Output arguments +% - probs is a matrix structure: +% *) rows: the prob of the corresponding event +% %) columns: the corresponding window +%% +%Prealloction for speed +nrEvents=length(features); +nrClasses=length(gmmParam); +loglik=zeros(nrClasses, nrEvents); +%Do a classification +for(classNr=1:nrClasses) + for(eventNr=1:nrEvents) + loglik(classNr,eventNr) = mean(log(pdf(gmmParam{classNr},features{eventNr,1}'))); + end, +end, +[ignore y_classified] = max(loglik); +end + + diff -r 000000000000 -r 2fadb31a9d55 functions/funcsMobilab/readCSV.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/funcsMobilab/readCSV.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,64 @@ +function [txt_struct] = readCSV(file) + + % text openen om volledig in te lezen + fid = fopen(file); + txt = ''; + if fid ~= -1 + while 1 + line = fgetl(fid); + if ~ischar(line), break, end + txt = [txt line ' \n ']; + end + end + fclose(fid); + + % text aanpassen + txt_struct = {}; + l = 1; + k = 1; + voorlopigVak = ''; + volgendeOverslaan = false; + for i = 1:length(txt) + switch txt(i) + case ';' + txt_struct{l,k} = voorlopigVak; + voorlopigVak = ''; + k = k+1; + case '\' + if txt(i+1) == 'n' + txt_struct{l,k} = voorlopigVak; + voorlopigVak = ''; + l = l+1; + k = 1; + volgendeOverslaan = true; + else + if volgendeOverslaan == false + voorlopigVak = [voorlopigVak txt(i)]; + else + volgendeOverslaan = false; + end + end + otherwise + if volgendeOverslaan == false + voorlopigVak = [voorlopigVak txt(i)]; + else + volgendeOverslaan = false; + end + end + end + + % lege lijnen (of met spaties) onderaan wissen + ok = false; + while ok == false + for i = 1:size(txt_struct,2); + for j = 1:length(txt_struct{end,i}) + if not(strcmp(txt_struct{end,i}(j), ' ')) + ok = true; + end + end + end + if ok == false + txt_struct = txt_struct(1:end-1,:); + end + end +end diff -r 000000000000 -r 2fadb31a9d55 functions/funcsMobilab/read_annotation_develop.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/funcsMobilab/read_annotation_develop.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,31 @@ +function [startStop labels] = read_annotation_develop(directory, fileID) +%% Open corresponding textfile +fid=fopen([directory filesep fileID], 'r'); +text = fread(fid, '*char'); +fclose(fid); +%% Preallocation +% Find rows (separated by enter) and is equal by the nr of Events +posEnter = strfind(text', char(10)); +nrEvents = length(posEnter); +% Preallocation for speed +startStop = zeros(nrEvents,2); +labels = cell(nrEvents,1); +%% Extract data from the script +%First event +subText = text(1:posEnter(1,1)-1)'; +%Find tab positions +tabs = strfind(subText, char(9)); +startStop(1,1) = str2num(subText(1:tabs(1)-1)); +startStop(1,2) = str2num(subText(tabs(1)+1:tabs(2)-1)); +labels{1,1} = subText(tabs(2)+1:length(subText)); + +%All remaining events +for(eventNr=2:nrEvents) + subText = text(posEnter(1,eventNr-1)+1:posEnter(1,eventNr)-1)'; + %Find tab positions + tabs = strfind(subText, char(9)); + startStop(eventNr,1) = str2num(subText(1:tabs(1)-1)); + startStop(eventNr,2) = str2num(subText(tabs(1)+1:tabs(2)-1)); + labels{eventNr,1} = subText(tabs(2)+1:length(subText)); +end, +end \ No newline at end of file diff -r 000000000000 -r 2fadb31a9d55 functions/funcsMobilab/read_data_develop.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/functions/funcsMobilab/read_data_develop.m Fri Oct 11 12:02:43 2013 +0100 @@ -0,0 +1,80 @@ +function [segments,samples,labels] = read_data_develop(directoryAudio,directoryAnnotation,new_fs) +%% +% Mobilab 2012-2013 +% +% Input arguments: +% - directoryAudi: +% Contains the path to the folder where the data is located +% - directoryAnnotation: +% Contains the path to the folders where the annotations are located +% - new_fs: +% Resample the .wav files to this sample frequency. If left +% blank, no resampling occur +% Output arguments +% - segments is a cell structure: +% Columns: # number of development examples +% Rows: # numer of annotators +% Each cell contains the segmented files from corresponding +% development file and annotator +% - samples is a cell structure: +% Same structure as segments. Each cell contains the start and +% stop time of the event. +% - labels is a cell structure: +% Same structure as segments. Each cell contains the labels of +% the corresponding events +%% Defaults +down=1; if nargin < 3; down=0; end +%% Annotation +addpath(directoryAudio); +files=find_format(directoryAudio, '.wav'); +names=files.names; +nrScripts = length(names); + +% Preallocation for speed +segments=cell(1,nrScripts); +samples=cell(1,nrScripts); +labels=cell(1,nrScripts); + +for(scriptNr=1:nrScripts) + + [x_temp fs] = wavread(names{1,scriptNr}); + if(down==1) + x_temp = resample(x_temp,new_fs,fs); + fs=new_fs; + end, + + + addpath(directoryAnnotation); + files2=find_format(directoryAnnotation, 'txt'); + annotation=textread(files2.names{1,i}, '%s' ,'delimiter', '\t'); + startStop=zeros(length(annotation)/3,2); + id=cell(length(annotation)/3,1); + u=1; + for(v=1:3:length(annotation)) + startStop(u,1)=str2num(annotation{v,1}); + startStop(u,2)=str2num(annotation{v+1,1}); + x{u,2}=fs; + x{u,1}=x_temp(ceil(startStop(u,1)*x{u,2})+1:ceil(startStop(u,2)*x{u,2})); + id{u,1}=annotation{v+2,1}; + u=u+1; + end, + segments{k,i}=x; + samples{k,i}=startStop; + labels{k,i}=id; + rmpath(directoryAnnotation{k,1}); + clear x; + end, +end + + + + + + + + + + + + + diff -r 000000000000 -r 2fadb31a9d55 readme/Readme.docx Binary file readme/Readme.docx has changed diff -r 000000000000 -r 2fadb31a9d55 readme/Readme.pdf Binary file readme/Readme.pdf has changed