diff core/tools/machine_learning/sim_get_traintest_clip_overlap.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/tools/machine_learning/sim_get_traintest_clip_overlap.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,41 @@
+function [relative, absolute, test_cn, train_cn] =  sim_get_traintest_clip_overlap(datafile)
+%
+%  get_traintest_clip_overlap(datafile)
+%
+% returns the percentage of overlapping constraints 
+% with the corresponding training set for each test set
+% 
+% how many percent of the test set are reappearing in the training set
+
+
+% simdata = load(datafile);
+if nargin < 1
+    simdata = load('comp_partBinData_unclustered_cupaper_01');
+else
+    simdata = load(datafile);
+end
+nTestSets = size(simdata.partBinTst, 2); % num cv bins
+ntrainsizes = size(simdata.partBinTrn, 2); % num increases of training
+
+
+absolute = zeros(nTestSets, ntrainsizes);
+relative = zeros(nTestSets, ntrainsizes);
+for k = 1:nTestSets % all test/training combinatios
+    
+    % get clips of this test set
+    test_clips = unique([simdata.partBinTst{k}(:,1); simdata.partBinTst{k}(:,2); simdata.partBinTst{k}(:,3)]);
+    test_cn(k) = numel(test_clips);
+    for m = 1:ntrainsizes
+        
+        % get clips of this training set
+        train_clips = unique([simdata.partBinTrn{k,m}(:,1); simdata.partBinTrn{k,m}(:,2); simdata.partBinTrn{k,m}(:,3)]);
+        
+        % intersect both clip sets
+        same = intersect(train_clips, test_clips);
+        
+        % get stats
+        absolute(k,m) = numel(same);
+        relative(k,m) = absolute(k,m) / numel(test_clips);
+    end
+    train_cn(k) = numel(train_clips);
+end
\ No newline at end of file