annotate core/magnatagatune/tests_evals/test_generic_features_parameters_crossval.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function out = test_generic_features_parameters_crossval...
wolffd@0 2 (fparams_all, trainparams_all, trainfun, featuretype)
wolffd@0 3 %
wolffd@0 4 % this is a special function designed for the ISMIR 2012
wolffd@0 5 % publication. A fixed and partitioned similarity dataset is used,
wolffd@0 6 % and the clip set as well as the features are SPECIFICALLY SELECTED
wolffd@0 7 % to each combination of training and test sets.
wolffd@0 8
wolffd@0 9 % svn hook
wolffd@0 10 my_revision = str2double(substr('$Rev$', 5, -1));
wolffd@0 11 [ST,I] = dbstack();
wolffd@0 12 testscript = ST(end).name;
wolffd@0 13
wolffd@0 14 global globalvars;
wolffd@0 15 global comparison_ids;
wolffd@0 16 global comparison_invids;
wolffd@0 17
wolffd@0 18 eval(sprintf('global %s', MTTAudioFeatureDBgen.db_name(featuretype)));
wolffd@0 19
wolffd@0 20 % ---
wolffd@0 21 % get all combinations resulting from the above parameter set
wolffd@0 22 % descriptions
wolffd@0 23 % ---
wolffd@0 24 fparams = param_combinations(fparams_all);
wolffd@0 25
wolffd@0 26 trainparams = param_combinations(trainparams_all);
wolffd@0 27
wolffd@0 28 % ---
wolffd@0 29 % the clips: sorted by comparison_id
wolffd@0 30 %
wolffd@0 31 % Now the clip type can be specified in the first training set
wolffd@0 32 % ---
wolffd@0 33 tmp_simdata = load(trainparams(1).dataset);
wolffd@0 34 if isfield(tmp_simdata, 'comparison_ids');
wolffd@0 35 comparison_ids = tmp_simdata.comparison_ids;
wolffd@0 36 %comparison_invids = tmp_simdata.comparison_invids;
wolffd@0 37 clip_type = tmp_simdata.clip_type;
wolffd@0 38 else
wolffd@0 39 load ('db.mat', 'comparison','comparison_ids','comparison_names');
wolffd@0 40
wolffd@0 41 clip_type = 'MTTClip';
wolffd@0 42 end
wolffd@0 43 comparison_invids = sparse(comparison_ids,1,[1:numel(comparison_ids)]);
wolffd@0 44 clips = feval(clip_type,comparison_ids);
wolffd@0 45
wolffd@0 46 % ---
wolffd@0 47 % provide some timing information
wolffd@0 48 % ---
wolffd@0 49 nruns = numel(fparams);
wolffd@0 50 tatic = [];
wolffd@0 51 tatoc = [];
wolffd@0 52 ftoc = [];
wolffd@0 53 mlrtic = [];
wolffd@0 54 mlrtoc = [];
wolffd@0 55 runs = 0;
wolffd@0 56
wolffd@0 57 res = [];
wolffd@0 58 % TEST
wolffd@0 59 for i = 1:numel(fparams)
wolffd@0 60
wolffd@0 61 % TIMING start
wolffd@0 62 runs = runs + 1;
wolffd@0 63 tatic(end+1) = cputime();
wolffd@0 64
wolffd@0 65 % warning ('disabled feature cache reset');
wolffd@0 66 MTTAudioFeatureDBgen.reset_feature_dbs('exclude',{'db_magnaaudiofeat'});
wolffd@0 67 eval('%s.reset;', MTTAudioFeatureDBgen.db_name(featuretype));
wolffd@0 68
wolffd@0 69 % extract features
wolffd@0 70 try
wolffd@0 71 % ---
wolffd@0 72 % try loading old features with the same parameter hash.
wolffd@0 73 % we use the md5 hash to distinguish between features
wolffd@0 74 % ---
wolffd@0 75 %paramhash = MTTAudioFeature.param_hash(featuretype, fparams(i));
wolffd@0 76 %featfile = sprintf('runlog_%s_feat.mat', paramhash);
wolffd@0 77 %if exist(featfile,'file') == 2
wolffd@0 78 % eval(sprintf('%s.import(featfile);', MTTAudioFeatureDBgen.db_name(featuretype)));
wolffd@0 79
wolffd@0 80 % load db
wolffd@0 81 paramhash=eval(sprintf('%s.load(featuretype, fparams(i), clips);', MTTAudioFeatureDBgen.db_name(featuretype)));
wolffd@0 82 paramhash=substr(paramhash, 0, -4);
wolffd@0 83 if ~isempty(paramhash)
wolffd@0 84 % ---
wolffd@0 85 % Here, we make sure the clips are
wolffd@0 86 % associated to the feature values
wolffd@0 87 % ---
wolffd@0 88 features = clips.features(featuretype, fparams(i));
wolffd@0 89 X = features.vector();
wolffd@0 90
wolffd@0 91 else
wolffd@0 92 % ---
wolffd@0 93 % we extract the mixed features, but leave the option of
wolffd@0 94 % using no tags
wolffd@0 95 % ---
wolffd@0 96 features = clips.features(featuretype, fparams(i));
wolffd@0 97 features.define_global_transform();
wolffd@0 98
wolffd@0 99 % get the feature vector
wolffd@0 100 X = features.vector();
wolffd@0 101
wolffd@0 102 % % save features with specific filename to disc
wolffd@0 103 % xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i));
wolffd@0 104 % features.saveto(featfile);
wolffd@0 105
wolffd@0 106 paramhash=eval(sprintf('%s.save();', MTTAudioFeatureDBgen.db_name(featuretype)));
wolffd@0 107 paramhash=substr(paramhash, 0, -4);
wolffd@0 108 end
wolffd@0 109
wolffd@0 110
wolffd@0 111 % TIMING
wolffd@0 112 ftoc(end+1) = cputime - tatic(end);
wolffd@0 113 fprintf('Got features. took %2.2f minutes\n', ftoc(end) / 60);
wolffd@0 114 catch err
wolffd@0 115
wolffd@0 116 print_error(err);
wolffd@0 117
wolffd@0 118 % ---
wolffd@0 119 % TODO: save feature configuration and indicate faliure
wolffd@0 120 % ---
wolffd@0 121 if ~exist('paramhash')
wolffd@0 122 paramhash = hash(xml_format(fparams(i)),'MD5');
wolffd@0 123 end
wolffd@0 124 xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i));
wolffd@0 125 xml_save(sprintf('runlog_%s_err.xml', paramhash), print_error(err));
wolffd@0 126
wolffd@0 127 continue;
wolffd@0 128 end
wolffd@0 129
wolffd@0 130
wolffd@0 131 % skip empty feature set;
wolffd@0 132 if isempty(X)
wolffd@0 133 continue;
wolffd@0 134 end
wolffd@0 135 % iterate over trainparams
wolffd@0 136 for j = 1:numel(trainparams)
wolffd@0 137
wolffd@0 138 % TIMING
wolffd@0 139 mlrtic(end+1) = cputime;
wolffd@0 140 paramhash_mlr = hash(xml_format(trainparams(j)),'MD5');
wolffd@0 141
wolffd@0 142 % ---
wolffd@0 143 % Note: here we load the similarity data.
wolffd@0 144 % this data is trunated if inctrain
wolffd@0 145 % ---
wolffd@0 146 simdata = load(trainparams(j).dataset);
wolffd@0 147 if isfield(trainparams(j),'inctrain') && (trainparams(j).inctrain == 0)
wolffd@0 148
wolffd@0 149 simdata.partBinTrn = simdata.partBinTrn(:,end);
wolffd@0 150
wolffd@0 151 if isfield(simdata, 'partBinNoTrn')
wolffd@0 152 simdata.partBinNoTrn = simdata.partBinNoTrn(:,end);
wolffd@0 153 end
wolffd@0 154 end
wolffd@0 155
wolffd@0 156 if isfield(trainparams(j),'notintrain') && (trainparams(j).notintrain == 0)
wolffd@0 157
wolffd@0 158 if isfield(simdata, 'partBinNoTrn')
wolffd@0 159 simdata = rmfield(simdata, 'partBinNoTrn');
wolffd@0 160 end
wolffd@0 161 end
wolffd@0 162
wolffd@0 163 % ---
wolffd@0 164 % NOTE: THIS IS TRAINING
wolffd@0 165 % call training function
wolffd@0 166 % ---
wolffd@0 167 [tmp] = do_test_rounds(trainfun, X, simdata, trainparams(j), fparams(i),...
wolffd@0 168 paramhash, paramhash_mlr, clips);
wolffd@0 169
wolffd@0 170 tmp.finfo = features(1).data.info;
wolffd@0 171 tmp.fparams = features(1).my_params;
wolffd@0 172 tmp.fparamhash = paramhash;
wolffd@0 173 tmp.script = testscript;
wolffd@0 174
wolffd@0 175 % TIMING
wolffd@0 176 mlrtoc(end+1) = cputime - mlrtic(end);
wolffd@0 177 tmp.timeused = mlrtoc(end);
wolffd@0 178
wolffd@0 179 % save result to result struct;
wolffd@0 180 if ~isempty(tmp)
wolffd@0 181 if isempty(res)
wolffd@0 182
wolffd@0 183 res = tmp;
wolffd@0 184 else
wolffd@0 185
wolffd@0 186 res(end+1) = tmp;
wolffd@0 187 end
wolffd@0 188
wolffd@0 189 if size(tmp.mean_ok_test,1) == 2
wolffd@0 190 maxperf = max(max(tmp.mean_ok_test(1,:)));
wolffd@0 191 else
wolffd@0 192 maxperf = max(max(tmp.mean_ok_test));
wolffd@0 193 end
wolffd@0 194 fprintf('Learned something: %2.2f perc. Took %2.2f minutes \n',...
wolffd@0 195 maxperf * 100, mlrtoc(end) / 60 );
wolffd@0 196 pause(0.5);
wolffd@0 197 else
wolffd@0 198 warning('Learned nothing \n');
wolffd@0 199 end
wolffd@0 200
wolffd@0 201 % save output
wolffd@0 202 out = res;
wolffd@0 203
wolffd@0 204 % save final results
wolffd@0 205 save(sprintf('runlog_%s_%s_finalresults.mat',...
wolffd@0 206 hash(xml_format(fparams),'MD5'),...
wolffd@0 207 hash(xml_format(trainparams),'MD5')...
wolffd@0 208 ), 'out');
wolffd@0 209
wolffd@0 210 xml_save(sprintf('runlog_%s_%s_params.mat',...
wolffd@0 211 hash(xml_format(fparams),'MD5'),...
wolffd@0 212 hash(xml_format(trainparams),'MD5')...
wolffd@0 213 ), struct('fparams', fparams_all, 'trainparams', trainparams_all) );
wolffd@0 214
wolffd@0 215 end
wolffd@0 216
wolffd@0 217 % TIMING
wolffd@0 218 clc;
wolffd@0 219 tatoc(end+1) = cputime - tatic(end);
wolffd@0 220 cprint(0,'%3.2f percent done, %2.2fh spent, %2.2fh to go. \n mlr / feature: %3.3f \n',...
wolffd@0 221 (runs / nruns) *100 , sum(tatoc) / 3600,...
wolffd@0 222 ((sum(tatoc) / runs) * (nruns - runs)) / 3600, mean(mlrtoc) / mean(ftoc) );
wolffd@0 223 cprint(0,'\nGo get a coffee, the next round will take %3.0f minutes \n', ...
wolffd@0 224 (mean(mlrtoc) * numel(trainparams) + mean(ftoc)) / 60);
wolffd@0 225 end
wolffd@0 226 end
wolffd@0 227
wolffd@0 228