view core/magnatagatune/tests_evals/test_generic_features_parameters_crossval.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function out = test_generic_features_parameters_crossval...
    (fparams_all, trainparams_all, trainfun, featuretype)
% 
% this is a special function designed for the ISMIR 2012 
% publication. A fixed and partitioned similarity dataset is used,
% and the clip set as well as the features are SPECIFICALLY SELECTED
% to each combination of training and test sets.

% svn hook   
my_revision = str2double(substr('$Rev$',  5, -1));
[ST,I] = dbstack();
testscript = ST(end).name;

global globalvars;
global comparison_ids;
global comparison_invids;

eval(sprintf('global %s', MTTAudioFeatureDBgen.db_name(featuretype)));

% ---
% get all combinations resulting from the above parameter set 
% descriptions
% ---
fparams = param_combinations(fparams_all);

trainparams = param_combinations(trainparams_all);

% ---
% the clips: sorted by comparison_id
%
% Now the clip type can be specified in the first training set
% ---
tmp_simdata = load(trainparams(1).dataset);
if isfield(tmp_simdata, 'comparison_ids');
    comparison_ids = tmp_simdata.comparison_ids;
    %comparison_invids = tmp_simdata.comparison_invids;
    clip_type = tmp_simdata.clip_type;
else
    load ('db.mat', 'comparison','comparison_ids','comparison_names');
   
	clip_type = 'MTTClip';
end
comparison_invids = sparse(comparison_ids,1,[1:numel(comparison_ids)]);
clips = feval(clip_type,comparison_ids);

% ---
% provide some timing information
% ---
nruns = numel(fparams);
tatic = [];
tatoc = [];
ftoc = [];
mlrtic = [];
mlrtoc = [];
runs = 0;

res = [];
% TEST
for i = 1:numel(fparams)

    % TIMING start
    runs = runs + 1;
    tatic(end+1) = cputime();
    
    % warning ('disabled feature cache reset');
    MTTAudioFeatureDBgen.reset_feature_dbs('exclude',{'db_magnaaudiofeat'});
    eval('%s.reset;', MTTAudioFeatureDBgen.db_name(featuretype));
    
    % extract features 
    try
        % ---
        % try loading old features with the same parameter hash.
        % we use the md5 hash to distinguish between features
        % ---
        %paramhash = MTTAudioFeature.param_hash(featuretype, fparams(i));
        %featfile = sprintf('runlog_%s_feat.mat', paramhash);
        %if exist(featfile,'file') == 2
        %   eval(sprintf('%s.import(featfile);', MTTAudioFeatureDBgen.db_name(featuretype)));
        
        % load db
        paramhash=eval(sprintf('%s.load(featuretype, fparams(i), clips);', MTTAudioFeatureDBgen.db_name(featuretype)));
        paramhash=substr(paramhash,  0, -4);
        if ~isempty(paramhash)
            % ---
            % Here, we make sure the clips are 
            % associated to the feature values
            % ---
            features = clips.features(featuretype, fparams(i));
            X = features.vector();
            
        else
            % ---
            % we extract the mixed features, but leave the option of 
            % using no tags
            % ---
            features = clips.features(featuretype, fparams(i));
            features.define_global_transform();
            
            % get the feature vector
            X = features.vector();
            
%             % save features with specific filename to disc
%             xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i));
%             features.saveto(featfile);

            paramhash=eval(sprintf('%s.save();', MTTAudioFeatureDBgen.db_name(featuretype)));
            paramhash=substr(paramhash,  0, -4);
        end
        
        
        % TIMING
        ftoc(end+1) = cputime - tatic(end);
        fprintf('Got features. took %2.2f minutes\n', ftoc(end) / 60);
    catch err
        
        print_error(err);
       
        % ---
        % TODO: save feature configuration and indicate faliure
        % ---
        if ~exist('paramhash')
            paramhash = hash(xml_format(fparams(i)),'MD5');
        end
        xml_save(sprintf('runlog_%s_param.xml', paramhash), fparams(i));
        xml_save(sprintf('runlog_%s_err.xml', paramhash), print_error(err));
        
        continue;
    end

       
    % skip empty feature set;
    if isempty(X)
        continue;
    end
    % iterate over trainparams
    for j = 1:numel(trainparams)
        
        % TIMING
        mlrtic(end+1) = cputime;
        paramhash_mlr = hash(xml_format(trainparams(j)),'MD5');
        
        % ---
        % Note: here we load the similarity data.
        %       this data is trunated if inctrain
        % ---
        simdata = load(trainparams(j).dataset);
        if isfield(trainparams(j),'inctrain') && (trainparams(j).inctrain == 0)
            
            simdata.partBinTrn = simdata.partBinTrn(:,end);
            
            if isfield(simdata, 'partBinNoTrn')
                simdata.partBinNoTrn = simdata.partBinNoTrn(:,end);
            end
        end
        
        if isfield(trainparams(j),'notintrain') && (trainparams(j).notintrain == 0)
            
            if isfield(simdata, 'partBinNoTrn')
                simdata = rmfield(simdata, 'partBinNoTrn');
            end    
        end
        
        % ---
        % NOTE: THIS IS TRAINING
        % call training function
        % ---
        [tmp] = do_test_rounds(trainfun, X, simdata, trainparams(j), fparams(i),...
            paramhash, paramhash_mlr, clips);
        
        tmp.finfo = features(1).data.info;
        tmp.fparams = features(1).my_params;
        tmp.fparamhash = paramhash;
        tmp.script = testscript;
 
        % TIMING 
        mlrtoc(end+1) = cputime - mlrtic(end);
        tmp.timeused = mlrtoc(end);
        
        % save result to result struct;
        if ~isempty(tmp)
            if isempty(res)
                
                res = tmp;
            else
                
                res(end+1) = tmp;
            end
            
            if size(tmp.mean_ok_test,1) == 2
                maxperf = max(max(tmp.mean_ok_test(1,:)));
            else
                maxperf = max(max(tmp.mean_ok_test));
            end
            fprintf('Learned something: %2.2f perc. Took %2.2f minutes \n',...
                 maxperf * 100, mlrtoc(end) / 60 );
            pause(0.5);
        else
            warning('Learned nothing \n');
        end
        
        % save output
        out = res;
        
        % save final results
        save(sprintf('runlog_%s_%s_finalresults.mat',...
                hash(xml_format(fparams),'MD5'),...
                hash(xml_format(trainparams),'MD5')...
                ), 'out');

        xml_save(sprintf('runlog_%s_%s_params.mat',...
                hash(xml_format(fparams),'MD5'),...
                hash(xml_format(trainparams),'MD5')...
                ), struct('fparams', fparams_all, 'trainparams', trainparams_all) );

    end
    
    % TIMING
    clc;
    tatoc(end+1) = cputime - tatic(end);
    cprint(0,'%3.2f percent done, %2.2fh spent, %2.2fh to go. \n mlr / feature: %3.3f \n',...
        (runs / nruns) *100 , sum(tatoc) / 3600,...
        ((sum(tatoc) / runs) * (nruns - runs)) / 3600, mean(mlrtoc) / mean(ftoc) );
     cprint(0,'\nGo get a coffee, the next round will take %3.0f minutes \n', ...
                (mean(mlrtoc) * numel(trainparams) + mean(ftoc)) / 60);
end
end