wolffd@0: function [relative, absolute, test_cn, train_cn] = sim_get_traintest_clip_overlap(datafile) wolffd@0: % wolffd@0: % get_traintest_clip_overlap(datafile) wolffd@0: % wolffd@0: % returns the percentage of overlapping constraints wolffd@0: % with the corresponding training set for each test set wolffd@0: % wolffd@0: % how many percent of the test set are reappearing in the training set wolffd@0: wolffd@0: wolffd@0: % simdata = load(datafile); wolffd@0: if nargin < 1 wolffd@0: simdata = load('comp_partBinData_unclustered_cupaper_01'); wolffd@0: else wolffd@0: simdata = load(datafile); wolffd@0: end wolffd@0: nTestSets = size(simdata.partBinTst, 2); % num cv bins wolffd@0: ntrainsizes = size(simdata.partBinTrn, 2); % num increases of training wolffd@0: wolffd@0: wolffd@0: absolute = zeros(nTestSets, ntrainsizes); wolffd@0: relative = zeros(nTestSets, ntrainsizes); wolffd@0: for k = 1:nTestSets % all test/training combinatios wolffd@0: wolffd@0: % get clips of this test set wolffd@0: test_clips = unique([simdata.partBinTst{k}(:,1); simdata.partBinTst{k}(:,2); simdata.partBinTst{k}(:,3)]); wolffd@0: test_cn(k) = numel(test_clips); wolffd@0: for m = 1:ntrainsizes wolffd@0: wolffd@0: % get clips of this training set wolffd@0: train_clips = unique([simdata.partBinTrn{k,m}(:,1); simdata.partBinTrn{k,m}(:,2); simdata.partBinTrn{k,m}(:,3)]); wolffd@0: wolffd@0: % intersect both clip sets wolffd@0: same = intersect(train_clips, test_clips); wolffd@0: wolffd@0: % get stats wolffd@0: absolute(k,m) = numel(same); wolffd@0: relative(k,m) = absolute(k,m) / numel(test_clips); wolffd@0: end wolffd@0: train_cn(k) = numel(train_clips); wolffd@0: end