view training.m @ 1:3ea8ed09af0f tip

additional clarifications
author Dimitrios Giannoulis
date Wed, 13 Mar 2013 11:57:24 +0000
parents 22b10c5b72e8
children
line wrap: on
line source
function [] = training(numBases, ANOT_FLAG, save_flag)
% Training Algorithm for the Event Detection Task
% Collating all recordings of a specific Class from Training Set
%


% PARAMETERS:
%
% numBases 
%       Decide on the number of Bases (can be changed)
%       suggestions: 5, 8, 10, 12, 15, 20
% ANOT_FLAG 
%       Choose Annotation: 1 or 2
% save_flag
%       Flag for saving the output Dictionary
%       1: ON, 0: OFF

% PARAMETER DEFAULTS:
%
if ~exist('save_flag','var') || isempty(save_flag), save_flag = 1; end
if ~exist('ANOT_FLAG','var') || isempty(ANOT_FLAG), ANOT_FLAG = 1; end

% INITIALISATIONS

if isempty(find([5, 8, 10, 12, 15, 20] == numBases))
warning('Chosen number of Bases different than suggested values')
end

if isempty(find([1, 2] == ANOT_FLAG))
error('ANNOT_FLAG can be either 1 or 2 (depending on chosen annotation')
end

%Annotations
Annotators = {'_bdm', '_sid'};

% addpath('Training_Set\');
% Path to Data
% datapath = './singlesounds_stereo';
datapath = 'Training_Set/singlesounds_stereo';
anotpath = ['Training_Set/Annotation' num2str(ANOT_FLAG) '/'];

% List of all the Audio files:
AudioList = dir([datapath '/*wav']);

% Get the sampling frequency from the 1st recorded sample
[~,Fs] = wavread([datapath '/' AudioList(1).name]);

Classes = {'alert','clearthroat','cough','doorslam','drawer','keyboard','keyes',...
           'knock','laughter','mouse','pageturn','pendrop','phone','printer',...
           'speech','switch'};
       
% Initialise Audio stream for all event classes
xin = cell(length(Classes),1);

% Loading signals for each of the 16 classes
for i = 1 : 16
    
    % Take all 20 train instances for each class
    for k = 1 : 20
% Find path to annotation
    AnotPath = [AudioList((i-1)*20+k).name(1:end-4) Annotators{ANOT_FLAG} '.txt'];
    AudioPath = [AudioList((i-1)*20+k).name];
    % Read The annotation from the text file:    
    % beg: beggining sample
    % fin: ending sample
    [beg,fin] = textread(['./Training_Set/Annotation' num2str(ANOT_FLAG) '/' AnotPath],'%f%f');
    % Read the audio for the Event, making sure no overflow occurs
    [x] = wavread([datapath '/' AudioPath]);
    Max_sample = length(x);
    [xnow,fs] = wavread([datapath '/' AudioPath] ,[max(round(beg*Fs),1) min(round(fin*Fs),Max_sample)]);
    xnow = sum(xnow,2)/2;
    if fs ~= Fs
        error('The sampling frequrncy is not the sam for all recordings!');
    end
    % Normalize individual segments to avoid over-energetic transients
    % in the audio streams per class.
    xnow = xnow./std(xnow);
    xin{i,:} = [xin{i,:} ; xnow];
    end
end
% CLear Uneeded variables
clear x xnow;

% 
% % Normalize Audio for each class to Unit energy
% for i = 1 : length(classes)
%    xin{:,i} = xin{:,i}./std(xin{:,i});
% end

% LEARNING
% Learn Bases for every Class

% Initialise Dictionary
Dict = [];

% Calculate the CQT for each training audio stream for all different
% Classes.
for i = 1 : size(xin,1)
    i
    [intCQT] = computeCQT(xin{i});
    cqt_rep = intCQT(:,round(1:7.1128:size(intCQT,2)));
    
    
    [W,H,errs,vout] = nmf_beta(cqt_rep,numBases,'beta',1);
    Dict = [Dict W];
end

% figure; imagesc(cqt_rep'); axis xy;

%eval(sprintf('Dict_%d=Dict;',numBases));
if save_flag == 1
    savefile = ['Dictionaries' Annotators{ANOT_FLAG} '/Dict' num2str(numBases) '.mat']; 
    save(savefile,'Dict');
end

% Clear Unneeded variables
clear xin intCQT cqt_rep;