Mercurial > hg > camir-aes2014
diff core/magnatagatune/tests_evals/test_generic_features_parameters_crossval.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/magnatagatune/tests_evals/test_generic_features_parameters_crossval.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,228 @@ +function out = test_generic_features_parameters_crossval... + (fparams_all, trainparams_all, trainfun, featuretype) +% +% this is a special function designed for the ISMIR 2012 +% publication. A fixed and partitioned similarity dataset is used, +% and the clip set as well as the features are SPECIFICALLY SELECTED +% to each combination of training and test sets. + +% svn hook +my_revision = str2double(substr('$Rev$', 5, -1)); +[ST,I] = dbstack(); +testscript = ST(end).name; + +global globalvars; +global comparison_ids; +global comparison_invids; + +eval(sprintf('global %s', MTTAudioFeatureDBgen.db_name(featuretype))); + +% --- +% get all combinations resulting from the above parameter set +% descriptions +% --- +fparams = param_combinations(fparams_all); + +trainparams = param_combinations(trainparams_all); + +% --- +% the clips: sorted by comparison_id +% +% Now the clip type can be specified in the first training set +% --- +tmp_simdata = load(trainparams(1).dataset); +if isfield(tmp_simdata, 'comparison_ids'); + comparison_ids = tmp_simdata.comparison_ids; + %comparison_invids = tmp_simdata.comparison_invids; + clip_type = tmp_simdata.clip_type; +else + load ('db.mat', 'comparison','comparison_ids','comparison_names'); + + clip_type = 'MTTClip'; +end +comparison_invids = sparse(comparison_ids,1,[1:numel(comparison_ids)]); +clips = feval(clip_type,comparison_ids); + +% --- +% provide some timing information +% --- +nruns = numel(fparams); +tatic = []; +tatoc = []; +ftoc = []; +mlrtic = []; +mlrtoc = []; +runs = 0; + +res = []; +% TEST +for i = 1:numel(fparams) + + % TIMING start + runs = runs + 1; + tatic(end+1) = cputime(); + + % warning ('disabled feature cache reset'); + MTTAudioFeatureDBgen.reset_feature_dbs('exclude',{'db_magnaaudiofeat'}); + eval('%s.reset;', MTTAudioFeatureDBgen.db_name(featuretype)); + + % extract features + try + % --- + % try loading old features with the same parameter hash. + % we use the md5 hash to distinguish between features + % --- + %paramhash = MTTAudioFeature.param_hash(featuretype, fparams(i)); + %featfile = sprintf('runlog_%s_feat.mat', paramhash); + %if exist(featfile,'file') == 2 + % eval(sprintf('%s.import(featfile);', MTTAudioFeatureDBgen.db_name(featuretype))); + + % load db + paramhash=eval(sprintf('%s.load(featuretype, fparams(i), clips);', MTTAudioFeatureDBgen.db_name(featuretype))); + paramhash=substr(paramhash, 0, -4); + if ~isempty(paramhash) + % --- + % Here, we make sure the clips are + % associated to the feature values + % --- + features = clips.features(featuretype, fparams(i)); + X = features.vector(); + + else + % --- + % we extract the mixed features, but leave the option of + % using no tags + % --- + features = clips.features(featuretype, fparams(i)); + features.define_global_transform(); + + % get the feature vector + X = features.vector(); + +% % save features with specific filename to disc +% xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i)); +% features.saveto(featfile); + + paramhash=eval(sprintf('%s.save();', MTTAudioFeatureDBgen.db_name(featuretype))); + paramhash=substr(paramhash, 0, -4); + end + + + % TIMING + ftoc(end+1) = cputime - tatic(end); + fprintf('Got features. took %2.2f minutes\n', ftoc(end) / 60); + catch err + + print_error(err); + + % --- + % TODO: save feature configuration and indicate faliure + % --- + if ~exist('paramhash') + paramhash = hash(xml_format(fparams(i)),'MD5'); + end + xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i)); + xml_save(sprintf('runlog_%s_err.xml', paramhash), print_error(err)); + + continue; + end + + + % skip empty feature set; + if isempty(X) + continue; + end + % iterate over trainparams + for j = 1:numel(trainparams) + + % TIMING + mlrtic(end+1) = cputime; + paramhash_mlr = hash(xml_format(trainparams(j)),'MD5'); + + % --- + % Note: here we load the similarity data. + % this data is trunated if inctrain + % --- + simdata = load(trainparams(j).dataset); + if isfield(trainparams(j),'inctrain') && (trainparams(j).inctrain == 0) + + simdata.partBinTrn = simdata.partBinTrn(:,end); + + if isfield(simdata, 'partBinNoTrn') + simdata.partBinNoTrn = simdata.partBinNoTrn(:,end); + end + end + + if isfield(trainparams(j),'notintrain') && (trainparams(j).notintrain == 0) + + if isfield(simdata, 'partBinNoTrn') + simdata = rmfield(simdata, 'partBinNoTrn'); + end + end + + % --- + % NOTE: THIS IS TRAINING + % call training function + % --- + [tmp] = do_test_rounds(trainfun, X, simdata, trainparams(j), fparams(i),... + paramhash, paramhash_mlr, clips); + + tmp.finfo = features(1).data.info; + tmp.fparams = features(1).my_params; + tmp.fparamhash = paramhash; + tmp.script = testscript; + + % TIMING + mlrtoc(end+1) = cputime - mlrtic(end); + tmp.timeused = mlrtoc(end); + + % save result to result struct; + if ~isempty(tmp) + if isempty(res) + + res = tmp; + else + + res(end+1) = tmp; + end + + if size(tmp.mean_ok_test,1) == 2 + maxperf = max(max(tmp.mean_ok_test(1,:))); + else + maxperf = max(max(tmp.mean_ok_test)); + end + fprintf('Learned something: %2.2f perc. Took %2.2f minutes \n',... + maxperf * 100, mlrtoc(end) / 60 ); + pause(0.5); + else + warning('Learned nothing \n'); + end + + % save output + out = res; + + % save final results + save(sprintf('runlog_%s_%s_finalresults.mat',... + hash(xml_format(fparams),'MD5'),... + hash(xml_format(trainparams),'MD5')... + ), 'out'); + + xml_save(sprintf('runlog_%s_%s_params.mat',... + hash(xml_format(fparams),'MD5'),... + hash(xml_format(trainparams),'MD5')... + ), struct('fparams', fparams_all, 'trainparams', trainparams_all) ); + + end + + % TIMING + clc; + tatoc(end+1) = cputime - tatic(end); + cprint(0,'%3.2f percent done, %2.2fh spent, %2.2fh to go. \n mlr / feature: %3.3f \n',... + (runs / nruns) *100 , sum(tatoc) / 3600,... + ((sum(tatoc) / runs) * (nruns - runs)) / 3600, mean(mlrtoc) / mean(ftoc) ); + cprint(0,'\nGo get a coffee, the next round will take %3.0f minutes \n', ... + (mean(mlrtoc) * numel(trainparams) + mean(ftoc)) / 60); +end +end + +