annotate core/tools/machine_learning/cvpartition_trunctrain_incsubsets.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 % ---
Daniel@0 2 % class cvpartition_trunctrain
Daniel@0 3 % NOTE: this is a fake cvpartition double for
Daniel@0 4 % using cvpartitions in truncated-training size experiments
Daniel@0 5 %
Daniel@0 6 % differently from cvpartition_trunctrain, we take all the training sizes
Daniel@0 7 % at once and generate training partitions where the smaller ones are subsets
Daniel@0 8 % of the bigger ones
Daniel@0 9 % ---
Daniel@0 10 classdef cvpartition_trunctrain_incsubsets
Daniel@0 11
Daniel@0 12 properties (Hidden)
Daniel@0 13
Daniel@0 14 mtest;
Daniel@0 15 mtraining;
Daniel@0 16 end
Daniel@0 17 properties
Daniel@0 18 N;
Daniel@0 19 NumTestSets;
Daniel@0 20 TrainSize;
Daniel@0 21 TestSize;
Daniel@0 22 end
Daniel@0 23
Daniel@0 24
Daniel@0 25 methods
Daniel@0 26
Daniel@0 27 % ---
Daniel@0 28 % constuctor: directly calculates the truncated testset
Daniel@0 29 % ---
Daniel@0 30 function P = cvpartition_trunctrain_incsubsets(Pin, perctrain)
Daniel@0 31
Daniel@0 32 % ---
Daniel@0 33 % NOTE: we use a different permutation for each cv-Buun (testset),
Daniel@0 34 % as otherwise the very small training sets will have about the same
Daniel@0 35 % data
Daniel@0 36 % ---
Daniel@0 37 if ~cvpartition_trunctrain_incsubsets.exists_permutation(Pin)
Daniel@0 38 cvpartition_trunctrain_incsubsets.renew_permutation(Pin);
Daniel@0 39 end
Daniel@0 40
Daniel@0 41 P.N = Pin.N;
Daniel@0 42 P.NumTestSets = Pin.NumTestSets;
Daniel@0 43
Daniel@0 44 for i = 1:Pin.NumTestSets
Daniel@0 45
Daniel@0 46 % copy testing data
Daniel@0 47 P.TestSize(i) = Pin.TestSize(i);
Daniel@0 48 P.mtest{i} = Pin.test(i);
Daniel@0 49
Daniel@0 50 % calculate new training size
Daniel@0 51 P.TrainSize(i) = ceil(perctrain * Pin.TrainSize(i));
Daniel@0 52
Daniel@0 53 % get actual training indices
Daniel@0 54 idx = find(Pin.training(i));
Daniel@0 55
Daniel@0 56 % ---
Daniel@0 57 % NOTE: the Test-Set-Specific permutation is applied
Daniel@0 58 % we only extract as many indices as fit in Pin
Daniel@0 59 % ---
Daniel@0 60 permu = cvpartition_trunctrain_incsubsets.get_permutation(i,Pin.TrainSize(i));
Daniel@0 61
Daniel@0 62 % truncate the indices
Daniel@0 63 idx = idx(permu(1:P.TrainSize(i)));
Daniel@0 64
Daniel@0 65 % build truncated training set
Daniel@0 66 P.mtraining{i} = false(P.N, 1);
Daniel@0 67 P.mtraining{i}(idx) = true;
Daniel@0 68 end
Daniel@0 69 end
Daniel@0 70 function out = test(P, i)
Daniel@0 71
Daniel@0 72 out = P.mtest{i};
Daniel@0 73 end
Daniel@0 74
Daniel@0 75 function out = training(P, i)
Daniel@0 76
Daniel@0 77 out = P.mtraining{i};
Daniel@0 78 end
Daniel@0 79 end
Daniel@0 80
Daniel@0 81 methods (Static)
Daniel@0 82
Daniel@0 83 % ---
Daniel@0 84 % TODO: save the permutation in a global variable,
Daniel@0 85 % tomake the same smaller set available
Daniel@0 86 % for all further experiments.
Daniel@0 87 % moreover, it would be great if the smaller training sets
Daniel@0 88 % are subsets of the bigger ones
Daniel@0 89 % ---
Daniel@0 90 function renew_permutation(P)
Daniel@0 91 global globalvars;
Daniel@0 92
Daniel@0 93 if isfield(globalvars.camir, ...
Daniel@0 94 'cvpartition_trunctrain_incsubsets');
Daniel@0 95 warning 'renwewing permutations for train sets';
Daniel@0 96 end
Daniel@0 97
Daniel@0 98 for i = 1:P.NumTestSets
Daniel@0 99 globalvars.camir.cvpartition_trunctrain_incsubsets.permutation(i).data = ...
Daniel@0 100 randperm(P.N);
Daniel@0 101 end
Daniel@0 102 end
Daniel@0 103
Daniel@0 104 function idx = get_permutation(testId, trainSize)
Daniel@0 105 % returns the permutation for specific test set
Daniel@0 106 global globalvars;
Daniel@0 107
Daniel@0 108 idx = globalvars.camir.cvpartition_trunctrain_incsubsets.permutation(testId).data;
Daniel@0 109
Daniel@0 110 % cut the permutation to contain no exxcess numbers
Daniel@0 111 idx = idx(idx <= trainSize);
Daniel@0 112 end
Daniel@0 113
Daniel@0 114 function out = exists_permutation(P)
Daniel@0 115 global globalvars;
Daniel@0 116 if isfield(globalvars.camir, ...
Daniel@0 117 'cvpartition_trunctrain_incsubsets');
Daniel@0 118
Daniel@0 119 out = (numel(globalvars.camir.cvpartition_trunctrain_incsubsets.permutation) == P.NumTestSets) ...
Daniel@0 120 && (numel(globalvars.camir.cvpartition_trunctrain_incsubsets.permutation(1).data) == P.N);
Daniel@0 121
Daniel@0 122
Daniel@0 123 else out = false;
Daniel@0 124 end
Daniel@0 125 end
Daniel@0 126
Daniel@0 127 end
Daniel@0 128 end