comparison core/magnatagatune/tests_evals/test_generic_features_parameters_crossval.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function out = test_generic_features_parameters_crossval...
2 (fparams_all, trainparams_all, trainfun, featuretype)
3 %
4 % this is a special function designed for the ISMIR 2012
5 % publication. A fixed and partitioned similarity dataset is used,
6 % and the clip set as well as the features are SPECIFICALLY SELECTED
7 % to each combination of training and test sets.
8
9 % svn hook
10 my_revision = str2double(substr('$Rev$', 5, -1));
11 [ST,I] = dbstack();
12 testscript = ST(end).name;
13
14 global globalvars;
15 global comparison_ids;
16 global comparison_invids;
17
18 eval(sprintf('global %s', MTTAudioFeatureDBgen.db_name(featuretype)));
19
20 % ---
21 % get all combinations resulting from the above parameter set
22 % descriptions
23 % ---
24 fparams = param_combinations(fparams_all);
25
26 trainparams = param_combinations(trainparams_all);
27
28 % ---
29 % the clips: sorted by comparison_id
30 %
31 % Now the clip type can be specified in the first training set
32 % ---
33 tmp_simdata = load(trainparams(1).dataset);
34 if isfield(tmp_simdata, 'comparison_ids');
35 comparison_ids = tmp_simdata.comparison_ids;
36 %comparison_invids = tmp_simdata.comparison_invids;
37 clip_type = tmp_simdata.clip_type;
38 else
39 load ('db.mat', 'comparison','comparison_ids','comparison_names');
40
41 clip_type = 'MTTClip';
42 end
43 comparison_invids = sparse(comparison_ids,1,[1:numel(comparison_ids)]);
44 clips = feval(clip_type,comparison_ids);
45
46 % ---
47 % provide some timing information
48 % ---
49 nruns = numel(fparams);
50 tatic = [];
51 tatoc = [];
52 ftoc = [];
53 mlrtic = [];
54 mlrtoc = [];
55 runs = 0;
56
57 res = [];
58 % TEST
59 for i = 1:numel(fparams)
60
61 % TIMING start
62 runs = runs + 1;
63 tatic(end+1) = cputime();
64
65 % warning ('disabled feature cache reset');
66 MTTAudioFeatureDBgen.reset_feature_dbs('exclude',{'db_magnaaudiofeat'});
67 eval('%s.reset;', MTTAudioFeatureDBgen.db_name(featuretype));
68
69 % extract features
70 try
71 % ---
72 % try loading old features with the same parameter hash.
73 % we use the md5 hash to distinguish between features
74 % ---
75 %paramhash = MTTAudioFeature.param_hash(featuretype, fparams(i));
76 %featfile = sprintf('runlog_%s_feat.mat', paramhash);
77 %if exist(featfile,'file') == 2
78 % eval(sprintf('%s.import(featfile);', MTTAudioFeatureDBgen.db_name(featuretype)));
79
80 % load db
81 paramhash=eval(sprintf('%s.load(featuretype, fparams(i), clips);', MTTAudioFeatureDBgen.db_name(featuretype)));
82 paramhash=substr(paramhash, 0, -4);
83 if ~isempty(paramhash)
84 % ---
85 % Here, we make sure the clips are
86 % associated to the feature values
87 % ---
88 features = clips.features(featuretype, fparams(i));
89 X = features.vector();
90
91 else
92 % ---
93 % we extract the mixed features, but leave the option of
94 % using no tags
95 % ---
96 features = clips.features(featuretype, fparams(i));
97 features.define_global_transform();
98
99 % get the feature vector
100 X = features.vector();
101
102 % % save features with specific filename to disc
103 % xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i));
104 % features.saveto(featfile);
105
106 paramhash=eval(sprintf('%s.save();', MTTAudioFeatureDBgen.db_name(featuretype)));
107 paramhash=substr(paramhash, 0, -4);
108 end
109
110
111 % TIMING
112 ftoc(end+1) = cputime - tatic(end);
113 fprintf('Got features. took %2.2f minutes\n', ftoc(end) / 60);
114 catch err
115
116 print_error(err);
117
118 % ---
119 % TODO: save feature configuration and indicate faliure
120 % ---
121 if ~exist('paramhash')
122 paramhash = hash(xml_format(fparams(i)),'MD5');
123 end
124 xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i));
125 xml_save(sprintf('runlog_%s_err.xml', paramhash), print_error(err));
126
127 continue;
128 end
129
130
131 % skip empty feature set;
132 if isempty(X)
133 continue;
134 end
135 % iterate over trainparams
136 for j = 1:numel(trainparams)
137
138 % TIMING
139 mlrtic(end+1) = cputime;
140 paramhash_mlr = hash(xml_format(trainparams(j)),'MD5');
141
142 % ---
143 % Note: here we load the similarity data.
144 % this data is trunated if inctrain
145 % ---
146 simdata = load(trainparams(j).dataset);
147 if isfield(trainparams(j),'inctrain') && (trainparams(j).inctrain == 0)
148
149 simdata.partBinTrn = simdata.partBinTrn(:,end);
150
151 if isfield(simdata, 'partBinNoTrn')
152 simdata.partBinNoTrn = simdata.partBinNoTrn(:,end);
153 end
154 end
155
156 if isfield(trainparams(j),'notintrain') && (trainparams(j).notintrain == 0)
157
158 if isfield(simdata, 'partBinNoTrn')
159 simdata = rmfield(simdata, 'partBinNoTrn');
160 end
161 end
162
163 % ---
164 % NOTE: THIS IS TRAINING
165 % call training function
166 % ---
167 [tmp] = do_test_rounds(trainfun, X, simdata, trainparams(j), fparams(i),...
168 paramhash, paramhash_mlr, clips);
169
170 tmp.finfo = features(1).data.info;
171 tmp.fparams = features(1).my_params;
172 tmp.fparamhash = paramhash;
173 tmp.script = testscript;
174
175 % TIMING
176 mlrtoc(end+1) = cputime - mlrtic(end);
177 tmp.timeused = mlrtoc(end);
178
179 % save result to result struct;
180 if ~isempty(tmp)
181 if isempty(res)
182
183 res = tmp;
184 else
185
186 res(end+1) = tmp;
187 end
188
189 if size(tmp.mean_ok_test,1) == 2
190 maxperf = max(max(tmp.mean_ok_test(1,:)));
191 else
192 maxperf = max(max(tmp.mean_ok_test));
193 end
194 fprintf('Learned something: %2.2f perc. Took %2.2f minutes \n',...
195 maxperf * 100, mlrtoc(end) / 60 );
196 pause(0.5);
197 else
198 warning('Learned nothing \n');
199 end
200
201 % save output
202 out = res;
203
204 % save final results
205 save(sprintf('runlog_%s_%s_finalresults.mat',...
206 hash(xml_format(fparams),'MD5'),...
207 hash(xml_format(trainparams),'MD5')...
208 ), 'out');
209
210 xml_save(sprintf('runlog_%s_%s_params.mat',...
211 hash(xml_format(fparams),'MD5'),...
212 hash(xml_format(trainparams),'MD5')...
213 ), struct('fparams', fparams_all, 'trainparams', trainparams_all) );
214
215 end
216
217 % TIMING
218 clc;
219 tatoc(end+1) = cputime - tatic(end);
220 cprint(0,'%3.2f percent done, %2.2fh spent, %2.2fh to go. \n mlr / feature: %3.3f \n',...
221 (runs / nruns) *100 , sum(tatoc) / 3600,...
222 ((sum(tatoc) / runs) * (nruns - runs)) / 3600, mean(mlrtoc) / mean(ftoc) );
223 cprint(0,'\nGo get a coffee, the next round will take %3.0f minutes \n', ...
224 (mean(mlrtoc) * numel(trainparams) + mean(ftoc)) / 60);
225 end
226 end
227
228