view core/magnatagatune/tests_evals/rbm_subspace/write_mat_results_ISMIR13RBM_singletraining.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function [out, stats] = write_mat_results_ISMIR13RBM_singletraining(dirin,fileout)
% combine the test results from the directories supplied,
% group them according to dataset parameter values
% combine the test results from the directories supplied,
% group them according to dataset parameter values

features = [];
show = 1;

if nargin == 0
    dirin{1} = './';
end

global comparison;
global comparison_ids;
     
newout = [];
thisdir = pwd;
% loop through al lthe result directories and 
for diri = 1:numel(dirin)

    % ---
    % go to directory and locate file
    % ---
    cd(dirin{diri});

    u = dir();
    u = {u.name};
    [idx, strpos] = substrcellfind(u, '_finalresults.mat', 1);
    
    if numel(idx) < 1
        error 'This directory contains no valid test data';
    end
    
    % just one or more tests in this folder?
    if exist('file','var') && isnumeric(file)
        cprint(1, 'loading one result file');
        file = u{idx(file)};
        data = load(file);
        sappend(out,data.out);
    else
        for filei = 1:numel(idx)
            cprint(1, 'loading result file %i of %i',filei, numel(idx));
            file = u{idx(filei)};
            data = load(file);
            newout = sappend(newout,data.out);
        end
    end
    % reset act directory
    cd(thisdir);
end

% ---
% filter according to training parameter C
%
% NOTE :if we don't filter by C, we get strong overfitting with training
% success > 96 % and test set performance aorund 65 %
% ---
cs = zeros(numel(newout),1);
for i=1:numel(newout)
    cs(i) = newout(i).trainparams.C;
end
cvals = unique(cs);

for ci=1:numel(cvals)
    valididx = find(cs == cvals(ci));
    filteredout = newout(valididx);
    
    % ---
    % get parameter statistics
    % ---
    stats = test_generic_display_param_influence(filteredout, show);

    % get maximal values for each test set bin
    % ---
    % trainparams.dataset contains sets which have each only one bin of the
    % ismir testsets 
    % ---
    max_idx = [stats.trainparams.dataset.mean_ok_train.max_idx];   
    ok_test = zeros(2, numel(max_idx));
    ok_train =  zeros(2, numel(max_idx));
    ok_config = [];
    % cycle over all test sets and save best result
    for i=1:numel(max_idx)
        ok_test(:,i) = filteredout(max_idx(i)).mean_ok_test;
        ok_train(:,i) = filteredout(max_idx(i)).mean_ok_train;
        ok_config = sappend(ok_config,struct('trainparams',filteredout(max_idx(i)).trainparams, ...
                                                'fparams',filteredout(max_idx(i)).fparams));
    end
    % save the stuff
    out(ci).mean_ok_test = mean(ok_test,2);
    out(ci).var_ok_test = var(ok_test,0,2);
    out(ci).mean_ok_train = mean(ok_train,2);
    out(ci).var_ok_train = var(ok_train,0,2);
    out(ci).trainparams.C = cvals(ci);
    out(ci).ok_config = ok_config;
    out(ci).ok_test = ok_test;
    out(ci).ok_train = ok_train;
end

% ---
% show results for different C
% ---
if numel([out.mean_ok_test]) > 1 && show
    
    % plot means  % plot std = sqrt(var) % plot training results
    figure;
    boxplot([out.mean_ok_test], sqrt([out.var_ok_test]), [out.mean_ok_train]);
    title (sprintf('Performance for all configs'));
end

% --- 
% write max. test success
% ---
mean_ok_test = [out.mean_ok_test];
[val, idx] = max(mean_ok_test(1,:));
if show
    fprintf(' --- Maximal test set success: nr. %d, %3.2f percent. --- \n', idx, val * 100)
end

% save
save([hash(strcat(dirin{:}),'md5') '_summary'], 'out');

end



function boxplot(mean, std, train);

    bar([train; mean]', 1.5);
    hold on;
    errorbar(1:size(mean,2), mean(1,:), std(1,:),'.');
%     plot(train,'rO');
    colormap(spring);
    axis([0 size(mean,2)+1 max(0, min(min([train mean] - 0.1))) max(max([train mean] + 0.1))]);
end