Mercurial > hg > camir-aes2014
diff core/tools/machine_learning/sim_get_traintest_clip_overlap.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/tools/machine_learning/sim_get_traintest_clip_overlap.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,41 @@ +function [relative, absolute, test_cn, train_cn] = sim_get_traintest_clip_overlap(datafile) +% +% get_traintest_clip_overlap(datafile) +% +% returns the percentage of overlapping constraints +% with the corresponding training set for each test set +% +% how many percent of the test set are reappearing in the training set + + +% simdata = load(datafile); +if nargin < 1 + simdata = load('comp_partBinData_unclustered_cupaper_01'); +else + simdata = load(datafile); +end +nTestSets = size(simdata.partBinTst, 2); % num cv bins +ntrainsizes = size(simdata.partBinTrn, 2); % num increases of training + + +absolute = zeros(nTestSets, ntrainsizes); +relative = zeros(nTestSets, ntrainsizes); +for k = 1:nTestSets % all test/training combinatios + + % get clips of this test set + test_clips = unique([simdata.partBinTst{k}(:,1); simdata.partBinTst{k}(:,2); simdata.partBinTst{k}(:,3)]); + test_cn(k) = numel(test_clips); + for m = 1:ntrainsizes + + % get clips of this training set + train_clips = unique([simdata.partBinTrn{k,m}(:,1); simdata.partBinTrn{k,m}(:,2); simdata.partBinTrn{k,m}(:,3)]); + + % intersect both clip sets + same = intersect(train_clips, test_clips); + + % get stats + absolute(k,m) = numel(same); + relative(k,m) = absolute(k,m) / numel(test_clips); + end + train_cn(k) = numel(train_clips); +end \ No newline at end of file