annotate core/magnatagatune/sim_from_comparison_fair_components.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [partBinTrn, partBinTst, partBinNoTrn] = sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, filename)
wolffd@0 2 %
wolffd@0 3 % [partBinTrn, partBinTst, partBinNoTrn] =
wolffd@0 4 % sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, [filename])
wolffd@0 5
wolffd@0 6 % creates a cross-validation partitioning of the
wolffd@0 7 % similarity data in "multiG", PRESERVING the
wolffd@0 8 % connected components in it during partitioning
wolffd@0 9
wolffd@0 10 % ---
wolffd@0 11 % get the similarity multigraph and remove cycles
wolffd@0 12 % ---
wolffd@0 13 cprint(2, 'creating graph')
wolffd@0 14 % Gm = ClipSimGraphMulti(comparison, comparison_ids);
wolffd@0 15 % Gm.remove_cycles_length2;
wolffd@0 16 cprint(2, 'loading Multigraph for Similarity Constraints')
wolffd@0 17 load('comp_SimGraphMulti.mat', 'G');
wolffd@0 18
wolffd@0 19 % ---
wolffd@0 20 % Note: we get the connected components in the graph
wolffd@0 21 % and filter out those who have only one node
wolffd@0 22 % ---
wolffd@0 23 cprint(2, 'extracting connected components')
wolffd@0 24 [Gs, s, id] = connected_components(G);
wolffd@0 25
wolffd@0 26 valid = find(s > 1);
wolffd@0 27 Gsv = Gs(valid);
wolffd@0 28
wolffd@0 29 % ---
wolffd@0 30 % We randomise the graph triplet order,
wolffd@0 31 % as well as the in-component
wolffd@0 32 % constraint succession be randomised here.
wolffd@0 33 % ---
wolffd@0 34 datPermu = randperm(numel(Gsv));
wolffd@0 35 Gsv = Gsv(datPermu);
wolffd@0 36
wolffd@0 37 conPermu = zeros(numel(Gsv),3);
wolffd@0 38 for i = 1:numel(Gsv)
wolffd@0 39 conPermu(i,:) = randperm(3);
wolffd@0 40 end
wolffd@0 41
wolffd@0 42 % ---
wolffd@0 43 % NOTE: we try the easy route: partition the graphs
wolffd@0 44 % and look at which constraints balance we end up with
wolffd@0 45 % ---
wolffd@0 46 P = cvpartition(numel(Gsv), 'k', k);
wolffd@0 47
wolffd@0 48 % ---
wolffd@0 49 % here we export the graphs similarity test sets
wolffd@0 50 % ---
wolffd@0 51 cprint(2, 'export test similarity')
wolffd@0 52 partBinTst = {};
wolffd@0 53 for i = 1:P.NumTestSets % test runs
wolffd@0 54 partBinTst{i} = zeros(0, 3);
wolffd@0 55
wolffd@0 56 tmp_idx = find(P.test(i));
wolffd@0 57 for j = 1:numel(tmp_idx); % componens
wolffd@0 58
wolffd@0 59 % ---
wolffd@0 60 % get the graphs which are associated
wolffd@0 61 % to this set and save them into a new bin.
wolffd@0 62 % ---
wolffd@0 63 [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
wolffd@0 64 partBinTst{i} = [partBinTst{i}; [a' b' c' weights]];
wolffd@0 65 end
wolffd@0 66 end
wolffd@0 67
wolffd@0 68
wolffd@0 69 % ---
wolffd@0 70 % Note: This uses a "truly" increasing training set
wolffd@0 71 % to do the partial training partition
wolffd@0 72 % ---
wolffd@0 73 cprint(2, 'export train similarity')
wolffd@0 74 for m = 1:numel(trainpart)
wolffd@0 75
wolffd@0 76 Ptrain(m) = cvpartition_trunctrain_incsubsets(P, trainpart(m));
wolffd@0 77 end
wolffd@0 78
wolffd@0 79 % ---
wolffd@0 80 % here we export the graph's similarity training sets
wolffd@0 81 % ---
wolffd@0 82 partBinTrn = {};
wolffd@0 83 for i = 1:P.NumTestSets % train runs
wolffd@0 84
wolffd@0 85 for m = 1:numel(trainpart) % increasing training sets
wolffd@0 86 partBinTrn{i,m} = zeros(0, 3);
wolffd@0 87
wolffd@0 88 tmp_idx = find(Ptrain(m).training(i));
wolffd@0 89 for j = 1:numel(tmp_idx); % components
wolffd@0 90
wolffd@0 91 % ---
wolffd@0 92 % get the graphs which are associated
wolffd@0 93 % to this set and save them into a new bin.
wolffd@0 94 % ---
wolffd@0 95 [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
wolffd@0 96
wolffd@0 97 % ---
wolffd@0 98 % NOTE: WE apply the inner-triplet permutation,
wolffd@0 99 % and truncate it where necessary
wolffd@0 100 % ---
wolffd@0 101 tmp_permu = conPermu(tmp_idx(j),:);
wolffd@0 102 if numel(a) < 3
wolffd@0 103 tmp_permu = tmp_permu(tmp_permu <= numel(a));
wolffd@0 104 end
wolffd@0 105
wolffd@0 106 a = a(tmp_permu);
wolffd@0 107 b = b(tmp_permu);
wolffd@0 108 c = c(tmp_permu);
wolffd@0 109 weights = weights(tmp_permu);
wolffd@0 110
wolffd@0 111 % save the clips
wolffd@0 112 partBinTrn{i,m} = [partBinTrn{i,m}; [a' b' c' weights]];
wolffd@0 113 end
wolffd@0 114 end
wolffd@0 115 end
wolffd@0 116
wolffd@0 117 partBinNoTrn = {};
wolffd@0 118 for i = 1:P.NumTestSets % train runs
wolffd@0 119
wolffd@0 120 for m = 1:numel(trainpart) % increasing training sets
wolffd@0 121 partBinNoTrn{i,m} = zeros(0, 3);
wolffd@0 122
wolffd@0 123 tmp_idx = find(~Ptrain(m).training(i) & ~Ptrain(m).test(i));
wolffd@0 124 for j = 1:numel(tmp_idx); % components
wolffd@0 125
wolffd@0 126 % ---
wolffd@0 127 % get the graphs which are associated
wolffd@0 128 % to this set and save them into a new bin.
wolffd@0 129 % ---
wolffd@0 130 [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
wolffd@0 131
wolffd@0 132 % ---
wolffd@0 133 % NOTE: WE apply the inner-triplet permutation,
wolffd@0 134 % and truncate it where necessary
wolffd@0 135 % ---
wolffd@0 136 tmp_permu = conPermu(tmp_idx(j),:);
wolffd@0 137 if numel(a) < 3
wolffd@0 138 tmp_permu = tmp_permu(tmp_permu <= numel(a));
wolffd@0 139 end
wolffd@0 140
wolffd@0 141 a = a(tmp_permu);
wolffd@0 142 b = b(tmp_permu);
wolffd@0 143 c = c(tmp_permu);
wolffd@0 144 weights = weights(tmp_permu);
wolffd@0 145
wolffd@0 146 % save the clips
wolffd@0 147 partBinNoTrn{i,m} = [partBinNoTrn{i,m}; [a' b' c' weights]];
wolffd@0 148 end
wolffd@0 149 end
wolffd@0 150 end
wolffd@0 151
wolffd@0 152 if nargin == 5
wolffd@0 153 save(filename, 'partBinTrn', 'partBinTst', 'partBinNoTrn')
wolffd@0 154 end
wolffd@0 155
wolffd@0 156 end