Dimitrios@0
|
1 function [] = training(numBases, ANOT_FLAG, save_flag)
|
Dimitrios@0
|
2 % Training Algorithm for the Event Detection Task
|
Dimitrios@0
|
3 % Collating all recordings of a specific Class from Training Set
|
Dimitrios@0
|
4 %
|
Dimitrios@0
|
5
|
Dimitrios@0
|
6
|
Dimitrios@0
|
7 % PARAMETERS:
|
Dimitrios@0
|
8 %
|
Dimitrios@0
|
9 % numBases
|
Dimitrios@0
|
10 % Decide on the number of Bases (can be changed)
|
Dimitrios@0
|
11 % suggestions: 5, 8, 10, 12, 15, 20
|
Dimitrios@0
|
12 % ANOT_FLAG
|
Dimitrios@0
|
13 % Choose Annotation: 1 or 2
|
Dimitrios@0
|
14 % save_flag
|
Dimitrios@0
|
15 % Flag for saving the output Dictionary
|
Dimitrios@0
|
16 % 1: ON, 0: OFF
|
Dimitrios@0
|
17
|
Dimitrios@0
|
18 % PARAMETER DEFAULTS:
|
Dimitrios@0
|
19 %
|
Dimitrios@0
|
20 if ~exist('save_flag','var') || isempty(save_flag), save_flag = 1; end
|
Dimitrios@0
|
21 if ~exist('ANOT_FLAG','var') || isempty(ANOT_FLAG), ANOT_FLAG = 1; end
|
Dimitrios@0
|
22
|
Dimitrios@0
|
23 % INITIALISATIONS
|
Dimitrios@0
|
24
|
Dimitrios@0
|
25 if isempty(find([5, 8, 10, 12, 15, 20] == numBases))
|
Dimitrios@0
|
26 warning('Chosen number of Bases different than suggested values')
|
Dimitrios@0
|
27 end
|
Dimitrios@0
|
28
|
Dimitrios@0
|
29 if isempty(find([1, 2] == ANOT_FLAG))
|
Dimitrios@0
|
30 error('ANNOT_FLAG can be either 1 or 2 (depending on chosen annotation')
|
Dimitrios@0
|
31 end
|
Dimitrios@0
|
32
|
Dimitrios@0
|
33 %Annotations
|
Dimitrios@0
|
34 Annotators = {'_bdm', '_sid'};
|
Dimitrios@0
|
35
|
Dimitrios@0
|
36 % addpath('Training_Set\');
|
Dimitrios@0
|
37 % Path to Data
|
Dimitrios@0
|
38 % datapath = './singlesounds_stereo';
|
Dimitrios@0
|
39 datapath = 'Training_Set/singlesounds_stereo';
|
Dimitrios@0
|
40 anotpath = ['Training_Set/Annotation' num2str(ANOT_FLAG) '/'];
|
Dimitrios@0
|
41
|
Dimitrios@0
|
42 % List of all the Audio files:
|
Dimitrios@0
|
43 AudioList = dir([datapath '/*wav']);
|
Dimitrios@0
|
44
|
Dimitrios@0
|
45 % Get the sampling frequency from the 1st recorded sample
|
Dimitrios@0
|
46 [~,Fs] = wavread([datapath '/' AudioList(1).name]);
|
Dimitrios@0
|
47
|
Dimitrios@0
|
48 Classes = {'alert','clearthroat','cough','doorslam','drawer','keyboard','keyes',...
|
Dimitrios@0
|
49 'knock','laughter','mouse','pageturn','pendrop','phone','printer',...
|
Dimitrios@0
|
50 'speech','switch'};
|
Dimitrios@0
|
51
|
Dimitrios@0
|
52 % Initialise Audio stream for all event classes
|
Dimitrios@0
|
53 xin = cell(length(Classes),1);
|
Dimitrios@0
|
54
|
Dimitrios@0
|
55 % Loading signals for each of the 16 classes
|
Dimitrios@0
|
56 for i = 1 : 16
|
Dimitrios@0
|
57
|
Dimitrios@0
|
58 % Take all 20 train instances for each class
|
Dimitrios@0
|
59 for k = 1 : 20
|
Dimitrios@0
|
60 % Find path to annotation
|
Dimitrios@0
|
61 AnotPath = [AudioList((i-1)*20+k).name(1:end-4) Annotators{ANOT_FLAG} '.txt'];
|
Dimitrios@0
|
62 AudioPath = [AudioList((i-1)*20+k).name];
|
Dimitrios@0
|
63 % Read The annotation from the text file:
|
Dimitrios@0
|
64 % beg: beggining sample
|
Dimitrios@0
|
65 % fin: ending sample
|
Dimitrios@0
|
66 [beg,fin] = textread(['./Training_Set/Annotation' num2str(ANOT_FLAG) '/' AnotPath],'%f%f');
|
Dimitrios@0
|
67 % Read the audio for the Event, making sure no overflow occurs
|
Dimitrios@0
|
68 [x] = wavread([datapath '/' AudioPath]);
|
Dimitrios@0
|
69 Max_sample = length(x);
|
Dimitrios@0
|
70 [xnow,fs] = wavread([datapath '/' AudioPath] ,[max(round(beg*Fs),1) min(round(fin*Fs),Max_sample)]);
|
Dimitrios@0
|
71 xnow = sum(xnow,2)/2;
|
Dimitrios@0
|
72 if fs ~= Fs
|
Dimitrios@0
|
73 error('The sampling frequrncy is not the sam for all recordings!');
|
Dimitrios@0
|
74 end
|
Dimitrios@0
|
75 % Normalize individual segments to avoid over-energetic transients
|
Dimitrios@0
|
76 % in the audio streams per class.
|
Dimitrios@0
|
77 xnow = xnow./std(xnow);
|
Dimitrios@0
|
78 xin{i,:} = [xin{i,:} ; xnow];
|
Dimitrios@0
|
79 end
|
Dimitrios@0
|
80 end
|
Dimitrios@0
|
81 % CLear Uneeded variables
|
Dimitrios@0
|
82 clear x xnow;
|
Dimitrios@0
|
83
|
Dimitrios@0
|
84 %
|
Dimitrios@0
|
85 % % Normalize Audio for each class to Unit energy
|
Dimitrios@0
|
86 % for i = 1 : length(classes)
|
Dimitrios@0
|
87 % xin{:,i} = xin{:,i}./std(xin{:,i});
|
Dimitrios@0
|
88 % end
|
Dimitrios@0
|
89
|
Dimitrios@0
|
90 % LEARNING
|
Dimitrios@0
|
91 % Learn Bases for every Class
|
Dimitrios@0
|
92
|
Dimitrios@0
|
93 % Initialise Dictionary
|
Dimitrios@0
|
94 Dict = [];
|
Dimitrios@0
|
95
|
Dimitrios@0
|
96 % Calculate the CQT for each training audio stream for all different
|
Dimitrios@0
|
97 % Classes.
|
Dimitrios@0
|
98 for i = 1 : size(xin,1)
|
Dimitrios@0
|
99 i
|
Dimitrios@0
|
100 [intCQT] = computeCQT(xin{i});
|
Dimitrios@0
|
101 cqt_rep = intCQT(:,round(1:7.1128:size(intCQT,2)));
|
Dimitrios@0
|
102
|
Dimitrios@0
|
103
|
Dimitrios@0
|
104 [W,H,errs,vout] = nmf_beta(cqt_rep,numBases,'beta',1);
|
Dimitrios@0
|
105 Dict = [Dict W];
|
Dimitrios@0
|
106 end
|
Dimitrios@0
|
107
|
Dimitrios@0
|
108 % figure; imagesc(cqt_rep'); axis xy;
|
Dimitrios@0
|
109
|
Dimitrios@0
|
110 %eval(sprintf('Dict_%d=Dict;',numBases));
|
Dimitrios@0
|
111 if save_flag == 1
|
Dimitrios@0
|
112 savefile = ['Dictionaries' Annotators{ANOT_FLAG} '/Dict' num2str(numBases) '.mat'];
|
Dimitrios@0
|
113 save(savefile,'Dict');
|
Dimitrios@0
|
114 end
|
Dimitrios@0
|
115
|
Dimitrios@0
|
116 % Clear Unneeded variables
|
Dimitrios@0
|
117 clear xin intCQT cqt_rep;
|
Dimitrios@0
|
118
|