diff core/magnatagatune/tests_evals/test_generic_features_parameters_crossval.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/magnatagatune/tests_evals/test_generic_features_parameters_crossval.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,228 @@
+function out = test_generic_features_parameters_crossval...
+    (fparams_all, trainparams_all, trainfun, featuretype)
+% 
+% this is a special function designed for the ISMIR 2012 
+% publication. A fixed and partitioned similarity dataset is used,
+% and the clip set as well as the features are SPECIFICALLY SELECTED
+% to each combination of training and test sets.
+
+% svn hook   
+my_revision = str2double(substr('$Rev$',  5, -1));
+[ST,I] = dbstack();
+testscript = ST(end).name;
+
+global globalvars;
+global comparison_ids;
+global comparison_invids;
+
+eval(sprintf('global %s', MTTAudioFeatureDBgen.db_name(featuretype)));
+
+% ---
+% get all combinations resulting from the above parameter set 
+% descriptions
+% ---
+fparams = param_combinations(fparams_all);
+
+trainparams = param_combinations(trainparams_all);
+
+% ---
+% the clips: sorted by comparison_id
+%
+% Now the clip type can be specified in the first training set
+% ---
+tmp_simdata = load(trainparams(1).dataset);
+if isfield(tmp_simdata, 'comparison_ids');
+    comparison_ids = tmp_simdata.comparison_ids;
+    %comparison_invids = tmp_simdata.comparison_invids;
+    clip_type = tmp_simdata.clip_type;
+else
+    load ('db.mat', 'comparison','comparison_ids','comparison_names');
+   
+	clip_type = 'MTTClip';
+end
+comparison_invids = sparse(comparison_ids,1,[1:numel(comparison_ids)]);
+clips = feval(clip_type,comparison_ids);
+
+% ---
+% provide some timing information
+% ---
+nruns = numel(fparams);
+tatic = [];
+tatoc = [];
+ftoc = [];
+mlrtic = [];
+mlrtoc = [];
+runs = 0;
+
+res = [];
+% TEST
+for i = 1:numel(fparams)
+
+    % TIMING start
+    runs = runs + 1;
+    tatic(end+1) = cputime();
+    
+    % warning ('disabled feature cache reset');
+    MTTAudioFeatureDBgen.reset_feature_dbs('exclude',{'db_magnaaudiofeat'});
+    eval('%s.reset;', MTTAudioFeatureDBgen.db_name(featuretype));
+    
+    % extract features 
+    try
+        % ---
+        % try loading old features with the same parameter hash.
+        % we use the md5 hash to distinguish between features
+        % ---
+        %paramhash = MTTAudioFeature.param_hash(featuretype, fparams(i));
+        %featfile = sprintf('runlog_%s_feat.mat', paramhash);
+        %if exist(featfile,'file') == 2
+        %   eval(sprintf('%s.import(featfile);', MTTAudioFeatureDBgen.db_name(featuretype)));
+        
+        % load db
+        paramhash=eval(sprintf('%s.load(featuretype, fparams(i), clips);', MTTAudioFeatureDBgen.db_name(featuretype)));
+        paramhash=substr(paramhash,  0, -4);
+        if ~isempty(paramhash)
+            % ---
+            % Here, we make sure the clips are 
+            % associated to the feature values
+            % ---
+            features = clips.features(featuretype, fparams(i));
+            X = features.vector();
+            
+        else
+            % ---
+            % we extract the mixed features, but leave the option of 
+            % using no tags
+            % ---
+            features = clips.features(featuretype, fparams(i));
+            features.define_global_transform();
+            
+            % get the feature vector
+            X = features.vector();
+            
+%             % save features with specific filename to disc
+%             xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i));
+%             features.saveto(featfile);
+
+            paramhash=eval(sprintf('%s.save();', MTTAudioFeatureDBgen.db_name(featuretype)));
+            paramhash=substr(paramhash,  0, -4);
+        end
+        
+        
+        % TIMING
+        ftoc(end+1) = cputime - tatic(end);
+        fprintf('Got features. took %2.2f minutes\n', ftoc(end) / 60);
+    catch err
+        
+        print_error(err);
+       
+        % ---
+        % TODO: save feature configuration and indicate faliure
+        % ---
+        if ~exist('paramhash')
+            paramhash = hash(xml_format(fparams(i)),'MD5');
+        end
+        xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i));
+        xml_save(sprintf('runlog_%s_err.xml', paramhash), print_error(err));
+        
+        continue;
+    end
+
+       
+    % skip empty feature set;
+    if isempty(X)
+        continue;
+    end
+    % iterate over trainparams
+    for j = 1:numel(trainparams)
+        
+        % TIMING
+        mlrtic(end+1) = cputime;
+        paramhash_mlr = hash(xml_format(trainparams(j)),'MD5');
+        
+        % ---
+        % Note: here we load the similarity data.
+        %       this data is trunated if inctrain
+        % ---
+        simdata = load(trainparams(j).dataset);
+        if isfield(trainparams(j),'inctrain') && (trainparams(j).inctrain == 0)
+            
+            simdata.partBinTrn = simdata.partBinTrn(:,end);
+            
+            if isfield(simdata, 'partBinNoTrn')
+                simdata.partBinNoTrn = simdata.partBinNoTrn(:,end);
+            end
+        end
+        
+        if isfield(trainparams(j),'notintrain') && (trainparams(j).notintrain == 0)
+            
+            if isfield(simdata, 'partBinNoTrn')
+                simdata = rmfield(simdata, 'partBinNoTrn');
+            end    
+        end
+        
+        % ---
+        % NOTE: THIS IS TRAINING
+        % call training function
+        % ---
+        [tmp] = do_test_rounds(trainfun, X, simdata, trainparams(j), fparams(i),...
+            paramhash, paramhash_mlr, clips);
+        
+        tmp.finfo = features(1).data.info;
+        tmp.fparams = features(1).my_params;
+        tmp.fparamhash = paramhash;
+        tmp.script = testscript;
+ 
+        % TIMING 
+        mlrtoc(end+1) = cputime - mlrtic(end);
+        tmp.timeused = mlrtoc(end);
+        
+        % save result to result struct;
+        if ~isempty(tmp)
+            if isempty(res)
+                
+                res = tmp;
+            else
+                
+                res(end+1) = tmp;
+            end
+            
+            if size(tmp.mean_ok_test,1) == 2
+                maxperf = max(max(tmp.mean_ok_test(1,:)));
+            else
+                maxperf = max(max(tmp.mean_ok_test));
+            end
+            fprintf('Learned something: %2.2f perc. Took %2.2f minutes \n',...
+                 maxperf * 100, mlrtoc(end) / 60 );
+            pause(0.5);
+        else
+            warning('Learned nothing \n');
+        end
+        
+        % save output
+        out = res;
+        
+        % save final results
+        save(sprintf('runlog_%s_%s_finalresults.mat',...
+                hash(xml_format(fparams),'MD5'),...
+                hash(xml_format(trainparams),'MD5')...
+                ), 'out');
+
+        xml_save(sprintf('runlog_%s_%s_params.mat',...
+                hash(xml_format(fparams),'MD5'),...
+                hash(xml_format(trainparams),'MD5')...
+                ), struct('fparams', fparams_all, 'trainparams', trainparams_all) );
+
+    end
+    
+    % TIMING
+    clc;
+    tatoc(end+1) = cputime - tatic(end);
+    cprint(0,'%3.2f percent done, %2.2fh spent, %2.2fh to go. \n mlr / feature: %3.3f \n',...
+        (runs / nruns) *100 , sum(tatoc) / 3600,...
+        ((sum(tatoc) / runs) * (nruns - runs)) / 3600, mean(mlrtoc) / mean(ftoc) );
+     cprint(0,'\nGo get a coffee, the next round will take %3.0f minutes \n', ...
+                (mean(mlrtoc) * numel(trainparams) + mean(ftoc)) / 60);
+end
+end
+
+