view core/magnatagatune/sim_from_comparison_fair_components.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function [partBinTrn, partBinTst, partBinNoTrn] = sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, filename)
%
% [partBinTrn, partBinTst, partBinNoTrn] =
% sim_from_comparison_fair_components(comparison, comparison_ids, k, trainpart, [filename])

% creates a cross-validation partitioning of the 
% similarity data in "multiG", PRESERVING the 
% connected components in it during partitioning

% ---
% get the similarity multigraph and remove cycles
% ---
cprint(2, 'creating graph')
% Gm = ClipSimGraphMulti(comparison, comparison_ids);
% Gm.remove_cycles_length2; 
cprint(2, 'loading Multigraph for Similarity Constraints')
load('comp_SimGraphMulti.mat', 'G');

% ---
% Note: we get the connected components in the graph
% and filter out those who have only one node
% ---
cprint(2, 'extracting connected components')
[Gs, s, id] = connected_components(G);

valid = find(s > 1);
Gsv = Gs(valid);

% ---
% We randomise the graph triplet order,
% as well as the in-component
% constraint succession be randomised here.
% ---
datPermu = randperm(numel(Gsv));
Gsv = Gsv(datPermu);

conPermu = zeros(numel(Gsv),3);
for i = 1:numel(Gsv)
    conPermu(i,:) = randperm(3);
end

% ---
% NOTE: we try the easy route: partition the graphs
% and look at which constraints  balance we end up with
% ---
P = cvpartition(numel(Gsv), 'k', k);

% ---
% here we export the graphs similarity test sets
% ---
cprint(2, 'export test similarity')
partBinTst = {};
for i = 1:P.NumTestSets  % test runs
    partBinTst{i} = zeros(0, 3);
    
    tmp_idx = find(P.test(i));
    for j = 1:numel(tmp_idx); % componens
        
        % ---
        % get the graphs which are associated 
        % to this set and save them into a new bin.
        % ---
         [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
         partBinTst{i} = [partBinTst{i}; [a' b' c' weights]];
    end
end


% ---
% Note: This uses a "truly" increasing training set
% to do the partial training partition
% ---
cprint(2, 'export train similarity')
for m = 1:numel(trainpart)

    Ptrain(m) = cvpartition_trunctrain_incsubsets(P, trainpart(m));
end

% ---
% here we export the graph's similarity training sets
% ---
partBinTrn = {};
for i = 1:P.NumTestSets % train runs

    for m = 1:numel(trainpart) % increasing training sets
        partBinTrn{i,m} = zeros(0, 3);
    
        tmp_idx = find(Ptrain(m).training(i));
        for j = 1:numel(tmp_idx); % components 
       
            % ---
            % get the graphs which are associated 
            % to this set and save them into a new bin.
            % ---
             [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
             
             % ---
             % NOTE: WE apply the inner-triplet permutation,
             % and truncate it where necessary
             % ---
             tmp_permu = conPermu(tmp_idx(j),:);
             if numel(a) < 3
                 tmp_permu = tmp_permu(tmp_permu <= numel(a));
             end
             
             a = a(tmp_permu);
             b = b(tmp_permu);
             c = c(tmp_permu);
             weights = weights(tmp_permu);
             
             % save the clips
             partBinTrn{i,m} = [partBinTrn{i,m}; [a' b' c' weights]];
        end
    end
end

partBinNoTrn = {};
for i = 1:P.NumTestSets % train runs

    for m = 1:numel(trainpart) % increasing training sets
        partBinNoTrn{i,m} = zeros(0, 3);
    
        tmp_idx = find(~Ptrain(m).training(i) & ~Ptrain(m).test(i));
        for j = 1:numel(tmp_idx); % components 
       
            % ---
            % get the graphs which are associated 
            % to this set and save them into a new bin.
            % ---
             [weights, a, b, c] = Gsv(tmp_idx(j)).similarities();
             
             % ---
             % NOTE: WE apply the inner-triplet permutation,
             % and truncate it where necessary
             % ---
             tmp_permu = conPermu(tmp_idx(j),:);
             if numel(a) < 3
                 tmp_permu = tmp_permu(tmp_permu <= numel(a));
             end
             
             a = a(tmp_permu);
             b = b(tmp_permu);
             c = c(tmp_permu);
             weights = weights(tmp_permu);

             % save the clips
             partBinNoTrn{i,m} = [partBinNoTrn{i,m}; [a' b' c' weights]];
        end
    end
end

if nargin == 5
    save(filename, 'partBinTrn', 'partBinTst', 'partBinNoTrn')
end

end