annotate core/tools/machine_learning/sim_get_traintest_clip_overlap.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [relative, absolute, test_cn, train_cn] = sim_get_traintest_clip_overlap(datafile)
wolffd@0 2 %
wolffd@0 3 % get_traintest_clip_overlap(datafile)
wolffd@0 4 %
wolffd@0 5 % returns the percentage of overlapping constraints
wolffd@0 6 % with the corresponding training set for each test set
wolffd@0 7 %
wolffd@0 8 % how many percent of the test set are reappearing in the training set
wolffd@0 9
wolffd@0 10
wolffd@0 11 % simdata = load(datafile);
wolffd@0 12 if nargin < 1
wolffd@0 13 simdata = load('comp_partBinData_unclustered_cupaper_01');
wolffd@0 14 else
wolffd@0 15 simdata = load(datafile);
wolffd@0 16 end
wolffd@0 17 nTestSets = size(simdata.partBinTst, 2); % num cv bins
wolffd@0 18 ntrainsizes = size(simdata.partBinTrn, 2); % num increases of training
wolffd@0 19
wolffd@0 20
wolffd@0 21 absolute = zeros(nTestSets, ntrainsizes);
wolffd@0 22 relative = zeros(nTestSets, ntrainsizes);
wolffd@0 23 for k = 1:nTestSets % all test/training combinatios
wolffd@0 24
wolffd@0 25 % get clips of this test set
wolffd@0 26 test_clips = unique([simdata.partBinTst{k}(:,1); simdata.partBinTst{k}(:,2); simdata.partBinTst{k}(:,3)]);
wolffd@0 27 test_cn(k) = numel(test_clips);
wolffd@0 28 for m = 1:ntrainsizes
wolffd@0 29
wolffd@0 30 % get clips of this training set
wolffd@0 31 train_clips = unique([simdata.partBinTrn{k,m}(:,1); simdata.partBinTrn{k,m}(:,2); simdata.partBinTrn{k,m}(:,3)]);
wolffd@0 32
wolffd@0 33 % intersect both clip sets
wolffd@0 34 same = intersect(train_clips, test_clips);
wolffd@0 35
wolffd@0 36 % get stats
wolffd@0 37 absolute(k,m) = numel(same);
wolffd@0 38 relative(k,m) = absolute(k,m) / numel(test_clips);
wolffd@0 39 end
wolffd@0 40 train_cn(k) = numel(train_clips);
wolffd@0 41 end