wolffd@0: function [partBinTrn, partBinTst, partBinNoTrn] = sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, filename) wolffd@0: % wolffd@0: % [partBinTrn, partBinTst, partBinNoTrn] = wolffd@0: % sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, [filename]) wolffd@0: wolffd@0: % creates a cross-validation partitioning of the wolffd@0: % similarity data in "multiG", PRESERVING the wolffd@0: % connected components in it during partitioning wolffd@0: wolffd@0: % --- wolffd@0: % get the similarity multigraph and remove cycles wolffd@0: % --- wolffd@0: cprint(2, 'creating graph') wolffd@0: % Gm = ClipSimGraphMulti(comparison, comparison_ids); wolffd@0: % Gm.remove_cycles_length2; wolffd@0: cprint(2, 'loading Multigraph for Similarity Constraints') wolffd@0: load('comp_SimGraphMulti.mat', 'G'); wolffd@0: wolffd@0: % --- wolffd@0: % Note: we get the connected components in the graph wolffd@0: % and filter out those who have only one node wolffd@0: % --- wolffd@0: cprint(2, 'extracting connected components') wolffd@0: [Gs, s, id] = connected_components(G); wolffd@0: wolffd@0: valid = find(s > 1); wolffd@0: Gsv = Gs(valid); wolffd@0: wolffd@0: % --- wolffd@0: % We randomise the graph triplet order, wolffd@0: % as well as the in-component wolffd@0: % constraint succession be randomised here. wolffd@0: % --- wolffd@0: datPermu = randperm(numel(Gsv)); wolffd@0: Gsv = Gsv(datPermu); wolffd@0: wolffd@0: conPermu = zeros(numel(Gsv),3); wolffd@0: for i = 1:numel(Gsv) wolffd@0: conPermu(i,:) = randperm(3); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: we try the easy route: partition the graphs wolffd@0: % and look at which constraints balance we end up with wolffd@0: % --- wolffd@0: P = cvpartition(numel(Gsv), 'k', k); wolffd@0: wolffd@0: % --- wolffd@0: % here we export the graphs similarity test sets wolffd@0: % --- wolffd@0: cprint(2, 'export test similarity') wolffd@0: partBinTst = {}; wolffd@0: for i = 1:P.NumTestSets % test runs wolffd@0: partBinTst{i} = zeros(0, 3); wolffd@0: wolffd@0: tmp_idx = find(P.test(i)); wolffd@0: for j = 1:numel(tmp_idx); % componens wolffd@0: wolffd@0: % --- wolffd@0: % get the graphs which are associated wolffd@0: % to this set and save them into a new bin. wolffd@0: % --- wolffd@0: [weights, a, b, c] = Gsv(tmp_idx(j)).similarities(); wolffd@0: partBinTst{i} = [partBinTst{i}; [a' b' c' weights]]; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % --- wolffd@0: % Note: This uses a "truly" increasing training set wolffd@0: % to do the partial training partition wolffd@0: % --- wolffd@0: cprint(2, 'export train similarity') wolffd@0: for m = 1:numel(trainpart) wolffd@0: wolffd@0: Ptrain(m) = cvpartition_trunctrain_incsubsets(P, trainpart(m)); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % here we export the graph's similarity training sets wolffd@0: % --- wolffd@0: partBinTrn = {}; wolffd@0: for i = 1:P.NumTestSets % train runs wolffd@0: wolffd@0: for m = 1:numel(trainpart) % increasing training sets wolffd@0: partBinTrn{i,m} = zeros(0, 3); wolffd@0: wolffd@0: tmp_idx = find(Ptrain(m).training(i)); wolffd@0: for j = 1:numel(tmp_idx); % components wolffd@0: wolffd@0: % --- wolffd@0: % get the graphs which are associated wolffd@0: % to this set and save them into a new bin. wolffd@0: % --- wolffd@0: [weights, a, b, c] = Gsv(tmp_idx(j)).similarities(); wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: WE apply the inner-triplet permutation, wolffd@0: % and truncate it where necessary wolffd@0: % --- wolffd@0: tmp_permu = conPermu(tmp_idx(j),:); wolffd@0: if numel(a) < 3 wolffd@0: tmp_permu = tmp_permu(tmp_permu <= numel(a)); wolffd@0: end wolffd@0: wolffd@0: a = a(tmp_permu); wolffd@0: b = b(tmp_permu); wolffd@0: c = c(tmp_permu); wolffd@0: weights = weights(tmp_permu); wolffd@0: wolffd@0: % save the clips wolffd@0: partBinTrn{i,m} = [partBinTrn{i,m}; [a' b' c' weights]]; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: partBinNoTrn = {}; wolffd@0: for i = 1:P.NumTestSets % train runs wolffd@0: wolffd@0: for m = 1:numel(trainpart) % increasing training sets wolffd@0: partBinNoTrn{i,m} = zeros(0, 3); wolffd@0: wolffd@0: tmp_idx = find(~Ptrain(m).training(i) & ~Ptrain(m).test(i)); wolffd@0: for j = 1:numel(tmp_idx); % components wolffd@0: wolffd@0: % --- wolffd@0: % get the graphs which are associated wolffd@0: % to this set and save them into a new bin. wolffd@0: % --- wolffd@0: [weights, a, b, c] = Gsv(tmp_idx(j)).similarities(); wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: WE apply the inner-triplet permutation, wolffd@0: % and truncate it where necessary wolffd@0: % --- wolffd@0: tmp_permu = conPermu(tmp_idx(j),:); wolffd@0: if numel(a) < 3 wolffd@0: tmp_permu = tmp_permu(tmp_permu <= numel(a)); wolffd@0: end wolffd@0: wolffd@0: a = a(tmp_permu); wolffd@0: b = b(tmp_permu); wolffd@0: c = c(tmp_permu); wolffd@0: weights = weights(tmp_permu); wolffd@0: wolffd@0: % save the clips wolffd@0: partBinNoTrn{i,m} = [partBinNoTrn{i,m}; [a' b' c' weights]]; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: if nargin == 5 wolffd@0: save(filename, 'partBinTrn', 'partBinTst', 'partBinNoTrn') wolffd@0: end wolffd@0: wolffd@0: end