diff core/magnatagatune/sim_from_comparison_fair_components.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/magnatagatune/sim_from_comparison_fair_components.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,156 @@
+function [partBinTrn, partBinTst, partBinNoTrn] = sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, filename)
+%
+% [partBinTrn, partBinTst, partBinNoTrn] =
+% sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, [filename])
+
+% creates a cross-validation partitioning of the 
+% similarity data in "multiG", PRESERVING the 
+% connected components in it during partitioning
+
+% ---
+% get the similarity multigraph and remove cycles
+% ---
+cprint(2, 'creating graph')
+% Gm = ClipSimGraphMulti(comparison, comparison_ids);
+% Gm.remove_cycles_length2; 
+cprint(2, 'loading Multigraph for Similarity Constraints')
+load('comp_SimGraphMulti.mat', 'G');
+
+% ---
+% Note: we get the connected components in the graph
+% and filter out those who have only one node
+% ---
+cprint(2, 'extracting connected components')
+[Gs, s, id] = connected_components(G);
+
+valid = find(s > 1);
+Gsv = Gs(valid);
+
+% ---
+% We randomise the graph triplet order,
+% as well as the in-component
+% constraint succession be randomised here.
+% ---
+datPermu = randperm(numel(Gsv));
+Gsv = Gsv(datPermu);
+
+conPermu = zeros(numel(Gsv),3);
+for i = 1:numel(Gsv)
+    conPermu(i,:) = randperm(3);
+end
+
+% ---
+% NOTE: we try the easy route: partition the graphs
+% and look at which constraints  balance we end up with
+% ---
+P = cvpartition(numel(Gsv), 'k', k);
+
+% ---
+% here we export the graphs similarity test sets
+% ---
+cprint(2, 'export test similarity')
+partBinTst = {};
+for i = 1:P.NumTestSets  % test runs
+    partBinTst{i} = zeros(0, 3);
+    
+    tmp_idx = find(P.test(i));
+    for j = 1:numel(tmp_idx); % componens
+        
+        % ---
+        % get the graphs which are associated 
+        % to this set and save them into a new bin.
+        % ---
+         [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
+         partBinTst{i} = [partBinTst{i}; [a' b' c' weights]];
+    end
+end
+
+
+% ---
+% Note: This uses a "truly" increasing training set
+% to do the partial training partition
+% ---
+cprint(2, 'export train similarity')
+for m = 1:numel(trainpart)
+
+    Ptrain(m) = cvpartition_trunctrain_incsubsets(P, trainpart(m));
+end
+
+% ---
+% here we export the graph's similarity training sets
+% ---
+partBinTrn = {};
+for i = 1:P.NumTestSets % train runs
+
+    for m = 1:numel(trainpart) % increasing training sets
+        partBinTrn{i,m} = zeros(0, 3);
+    
+        tmp_idx = find(Ptrain(m).training(i));
+        for j = 1:numel(tmp_idx); % components 
+       
+            % ---
+            % get the graphs which are associated 
+            % to this set and save them into a new bin.
+            % ---
+             [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
+             
+             % ---
+             % NOTE: WE apply the inner-triplet permutation,
+             % and truncate it where necessary
+             % ---
+             tmp_permu = conPermu(tmp_idx(j),:);
+             if numel(a) < 3
+                 tmp_permu = tmp_permu(tmp_permu <= numel(a));
+             end
+             
+             a = a(tmp_permu);
+             b = b(tmp_permu);
+             c = c(tmp_permu);
+             weights = weights(tmp_permu);
+             
+             % save the clips
+             partBinTrn{i,m} = [partBinTrn{i,m}; [a' b' c' weights]];
+        end
+    end
+end
+
+partBinNoTrn = {};
+for i = 1:P.NumTestSets % train runs
+
+    for m = 1:numel(trainpart) % increasing training sets
+        partBinNoTrn{i,m} = zeros(0, 3);
+    
+        tmp_idx = find(~Ptrain(m).training(i) & ~Ptrain(m).test(i));
+        for j = 1:numel(tmp_idx); % components 
+       
+            % ---
+            % get the graphs which are associated 
+            % to this set and save them into a new bin.
+            % ---
+             [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
+             
+             % ---
+             % NOTE: WE apply the inner-triplet permutation,
+             % and truncate it where necessary
+             % ---
+             tmp_permu = conPermu(tmp_idx(j),:);
+             if numel(a) < 3
+                 tmp_permu = tmp_permu(tmp_permu <= numel(a));
+             end
+             
+             a = a(tmp_permu);
+             b = b(tmp_permu);
+             c = c(tmp_permu);
+             weights = weights(tmp_permu);
+
+             % save the clips
+             partBinNoTrn{i,m} = [partBinNoTrn{i,m}; [a' b' c' weights]];
+        end
+    end
+end
+
+if nargin == 5
+    save(filename, 'partBinTrn', 'partBinTst', 'partBinNoTrn')
+end
+
+end
\ No newline at end of file