diff core/tools/machine_learning/cvpartition_trunctrain_incsubsets.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/tools/machine_learning/cvpartition_trunctrain_incsubsets.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,128 @@
+% ---
+% class cvpartition_trunctrain
+% NOTE: this is a fake cvpartition double for 
+% using cvpartitions in truncated-training size experiments
+%
+% differently from cvpartition_trunctrain, we take all the training sizes 
+% at once and generate training partitions where the smaller ones are subsets 
+% of the bigger ones
+% ---
+classdef cvpartition_trunctrain_incsubsets
+  
+properties (Hidden)
+
+    mtest;
+    mtraining;
+end
+properties
+    N;
+    NumTestSets;
+    TrainSize;
+    TestSize;
+end
+    
+        
+methods
+    
+% ---
+% constuctor: directly calculates the truncated testset
+% ---
+function P = cvpartition_trunctrain_incsubsets(Pin, perctrain)
+  
+    % ---
+    % NOTE: we use a different permutation for each cv-Buun (testset), 
+    % as otherwise the very small training sets will have about the same 
+    % data
+    % ---
+    if ~cvpartition_trunctrain_incsubsets.exists_permutation(Pin)
+        cvpartition_trunctrain_incsubsets.renew_permutation(Pin);
+    end
+
+    P.N = Pin.N;
+    P.NumTestSets = Pin.NumTestSets;
+    
+    for i = 1:Pin.NumTestSets
+
+        % copy testing data
+        P.TestSize(i) = Pin.TestSize(i);
+        P.mtest{i} = Pin.test(i);
+
+        % calculate new training size
+        P.TrainSize(i) = ceil(perctrain * Pin.TrainSize(i));
+        
+        % get actual training indices
+        idx = find(Pin.training(i));
+        
+        % ---
+        % NOTE: the Test-Set-Specific permutation is applied
+        % we only extract as many indices as fit in Pin
+        % ---
+        permu = cvpartition_trunctrain_incsubsets.get_permutation(i,Pin.TrainSize(i));
+        
+        % truncate the indices
+        idx = idx(permu(1:P.TrainSize(i)));
+        
+        % build truncated training set
+        P.mtraining{i} = false(P.N, 1);
+        P.mtraining{i}(idx) = true;
+    end
+end   
+function out = test(P, i)
+
+    out = P.mtest{i};
+end
+
+function out = training(P, i)
+
+    out = P.mtraining{i};
+end
+end
+
+methods (Static)
+    
+    % ---
+    % TODO: save the permutation in a global variable,
+    % tomake the same smaller set available
+    % for all further experiments.
+    % moreover, it would be great if the smaller training sets
+    % are subsets of the bigger ones
+    % ---
+    function renew_permutation(P)
+        global globalvars;
+        
+        if isfield(globalvars.camir, ...
+                'cvpartition_trunctrain_incsubsets');
+            warning 'renwewing permutations for train sets';
+        end
+        
+        for i = 1:P.NumTestSets
+                globalvars.camir.cvpartition_trunctrain_incsubsets.permutation(i).data = ...
+                    randperm(P.N);
+        end
+    end
+    
+    function idx = get_permutation(testId, trainSize)
+        % returns the permutation for specific test set
+        global globalvars;
+
+        idx = globalvars.camir.cvpartition_trunctrain_incsubsets.permutation(testId).data;
+        
+        % cut the permutation to contain no exxcess numbers
+        idx = idx(idx <= trainSize);
+    end
+    
+    function out = exists_permutation(P)
+        global globalvars;
+        if isfield(globalvars.camir, ...
+                'cvpartition_trunctrain_incsubsets');
+            
+            out = (numel(globalvars.camir.cvpartition_trunctrain_incsubsets.permutation) == P.NumTestSets)  ...
+                    && (numel(globalvars.camir.cvpartition_trunctrain_incsubsets.permutation(1).data) == P.N);
+
+  
+        else out = false;
+        end
+    end
+    
+end
+end
\ No newline at end of file