wolffd@0: function [partBinTrn, partBinTst, partBinNoTrn] = sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, filename)
wolffd@0: %
wolffd@0: % [partBinTrn, partBinTst, partBinNoTrn] =
wolffd@0: % sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, [filename])
wolffd@0: 
wolffd@0: % creates a cross-validation partitioning of the 
wolffd@0: % similarity data in "multiG", PRESERVING the 
wolffd@0: % connected components in it during partitioning
wolffd@0: 
wolffd@0: % ---
wolffd@0: % get the similarity multigraph and remove cycles
wolffd@0: % ---
wolffd@0: cprint(2, 'creating graph')
wolffd@0: % Gm = ClipSimGraphMulti(comparison, comparison_ids);
wolffd@0: % Gm.remove_cycles_length2; 
wolffd@0: cprint(2, 'loading Multigraph for Similarity Constraints')
wolffd@0: load('comp_SimGraphMulti.mat', 'G');
wolffd@0: 
wolffd@0: % ---
wolffd@0: % Note: we get the connected components in the graph
wolffd@0: % and filter out those who have only one node
wolffd@0: % ---
wolffd@0: cprint(2, 'extracting connected components')
wolffd@0: [Gs, s, id] = connected_components(G);
wolffd@0: 
wolffd@0: valid = find(s > 1);
wolffd@0: Gsv = Gs(valid);
wolffd@0: 
wolffd@0: % ---
wolffd@0: % We randomise the graph triplet order,
wolffd@0: % as well as the in-component
wolffd@0: % constraint succession be randomised here.
wolffd@0: % ---
wolffd@0: datPermu = randperm(numel(Gsv));
wolffd@0: Gsv = Gsv(datPermu);
wolffd@0: 
wolffd@0: conPermu = zeros(numel(Gsv),3);
wolffd@0: for i = 1:numel(Gsv)
wolffd@0:     conPermu(i,:) = randperm(3);
wolffd@0: end
wolffd@0: 
wolffd@0: % ---
wolffd@0: % NOTE: we try the easy route: partition the graphs
wolffd@0: % and look at which constraints  balance we end up with
wolffd@0: % ---
wolffd@0: P = cvpartition(numel(Gsv), 'k', k);
wolffd@0: 
wolffd@0: % ---
wolffd@0: % here we export the graphs similarity test sets
wolffd@0: % ---
wolffd@0: cprint(2, 'export test similarity')
wolffd@0: partBinTst = {};
wolffd@0: for i = 1:P.NumTestSets  % test runs
wolffd@0:     partBinTst{i} = zeros(0, 3);
wolffd@0:     
wolffd@0:     tmp_idx = find(P.test(i));
wolffd@0:     for j = 1:numel(tmp_idx); % componens
wolffd@0:         
wolffd@0:         % ---
wolffd@0:         % get the graphs which are associated 
wolffd@0:         % to this set and save them into a new bin.
wolffd@0:         % ---
wolffd@0:          [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
wolffd@0:          partBinTst{i} = [partBinTst{i}; [a' b' c' weights]];
wolffd@0:     end
wolffd@0: end
wolffd@0: 
wolffd@0: 
wolffd@0: % ---
wolffd@0: % Note: This uses a "truly" increasing training set
wolffd@0: % to do the partial training partition
wolffd@0: % ---
wolffd@0: cprint(2, 'export train similarity')
wolffd@0: for m = 1:numel(trainpart)
wolffd@0: 
wolffd@0:     Ptrain(m) = cvpartition_trunctrain_incsubsets(P, trainpart(m));
wolffd@0: end
wolffd@0: 
wolffd@0: % ---
wolffd@0: % here we export the graph's similarity training sets
wolffd@0: % ---
wolffd@0: partBinTrn = {};
wolffd@0: for i = 1:P.NumTestSets % train runs
wolffd@0: 
wolffd@0:     for m = 1:numel(trainpart) % increasing training sets
wolffd@0:         partBinTrn{i,m} = zeros(0, 3);
wolffd@0:     
wolffd@0:         tmp_idx = find(Ptrain(m).training(i));
wolffd@0:         for j = 1:numel(tmp_idx); % components 
wolffd@0:        
wolffd@0:             % ---
wolffd@0:             % get the graphs which are associated 
wolffd@0:             % to this set and save them into a new bin.
wolffd@0:             % ---
wolffd@0:              [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
wolffd@0:              
wolffd@0:              % ---
wolffd@0:              % NOTE: WE apply the inner-triplet permutation,
wolffd@0:              % and truncate it where necessary
wolffd@0:              % ---
wolffd@0:              tmp_permu = conPermu(tmp_idx(j),:);
wolffd@0:              if numel(a) < 3
wolffd@0:                  tmp_permu = tmp_permu(tmp_permu <= numel(a));
wolffd@0:              end
wolffd@0:              
wolffd@0:              a = a(tmp_permu);
wolffd@0:              b = b(tmp_permu);
wolffd@0:              c = c(tmp_permu);
wolffd@0:              weights = weights(tmp_permu);
wolffd@0:              
wolffd@0:              % save the clips
wolffd@0:              partBinTrn{i,m} = [partBinTrn{i,m}; [a' b' c' weights]];
wolffd@0:         end
wolffd@0:     end
wolffd@0: end
wolffd@0: 
wolffd@0: partBinNoTrn = {};
wolffd@0: for i = 1:P.NumTestSets % train runs
wolffd@0: 
wolffd@0:     for m = 1:numel(trainpart) % increasing training sets
wolffd@0:         partBinNoTrn{i,m} = zeros(0, 3);
wolffd@0:     
wolffd@0:         tmp_idx = find(~Ptrain(m).training(i) & ~Ptrain(m).test(i));
wolffd@0:         for j = 1:numel(tmp_idx); % components 
wolffd@0:        
wolffd@0:             % ---
wolffd@0:             % get the graphs which are associated 
wolffd@0:             % to this set and save them into a new bin.
wolffd@0:             % ---
wolffd@0:              [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
wolffd@0:              
wolffd@0:              % ---
wolffd@0:              % NOTE: WE apply the inner-triplet permutation,
wolffd@0:              % and truncate it where necessary
wolffd@0:              % ---
wolffd@0:              tmp_permu = conPermu(tmp_idx(j),:);
wolffd@0:              if numel(a) < 3
wolffd@0:                  tmp_permu = tmp_permu(tmp_permu <= numel(a));
wolffd@0:              end
wolffd@0:              
wolffd@0:              a = a(tmp_permu);
wolffd@0:              b = b(tmp_permu);
wolffd@0:              c = c(tmp_permu);
wolffd@0:              weights = weights(tmp_permu);
wolffd@0: 
wolffd@0:              % save the clips
wolffd@0:              partBinNoTrn{i,m} = [partBinNoTrn{i,m}; [a' b' c' weights]];
wolffd@0:         end
wolffd@0:     end
wolffd@0: end
wolffd@0: 
wolffd@0: if nargin == 5
wolffd@0:     save(filename, 'partBinTrn', 'partBinTst', 'partBinNoTrn')
wolffd@0: end
wolffd@0: 
wolffd@0: end