view core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params)
% [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features)
% 
% delta_fun.fun function pointer returning distance / delta(a, b) 
%          for feature vectors a,b, default: delta(a, b) = (a - b).^2;
% delta_fun.args extra arguments
%
% prepares ranking data to be used with the svmlight implementation
% of Schultz & Joachims 2003

% ---
% CAVE: this file still allows arbitrary rankings
%  this could be good but also introducing 
%  confusion in the future.  Especially the 
%  weightings are only defined per query 
% ---

% ---
% variable function to get vector deltas
% ---
if nargin < 3
    delta_fun = @get_delta;
    delta_params = {[]};
end

% ---
% fix right hand side to one
% (after Schultz & Joachims 2003)
% ---
fix_rhs = 1;
fix_weight = 1;

% ---
% NOTE: this preallocation is not complete
% ---
lhs = cell(0,2);
factors = [];
invalid = [];
for i = 1:size(r,1)
    
    % feature index
    a = i;
    
    % check if ranking is valid
    if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ...
        isempty(intersect(r{i,1}, r{i,2}));
    
        % ---
        % NOTE / TODO: the follwing is intended for compability
        %  both sides of the ranking may have more than one entry.
        %  for the MTT database, the ranking may be correct, but the 
        %  inequalities build from non-singular rankings are not
        %  based on the actual data
        % ---
        for j = 1:numel(r{i,1})
            b = r{i,1}(j);
            
            for k = 1:numel(r{i,2})
                c = r{i,2}(k);

                % ---
                % get vector deltas
                % ---
                [dab] = delta_fun(X(:,a), X(:,b), delta_params{:});
                [dac] = delta_fun(X(:,a), X(:,c), delta_params{:});
                
                % get the delta difference vector
                ddiff = dac - dab;
                if i == 1
                    dim = numel(ddiff);
                end
                
                % ---
                % save the non-empty differences row by row 
                % NOTE: it is not clear  whether the indexing for 
                % \omega starts a 0 or 1.
                % ----
                xgzero = find( ddiff ~= 0);
                
                if ~isempty(xgzero)
                    lhs = cat(1,lhs,{xgzero, ddiff(xgzero)});
                else
                    invalid = [invalid i];
                end
                
                % save factors
                if (nargout > 2)
                    if (size(r,2) > 2)
                        factors = [factors, r{i,3}]; % / max_weight
                    else
                        factors = [factors, fix_weight];
                    end
                end
                
            end
        end
    end
end

% ---
% determine right hand side
% ---
rhs = ones(size(lhs,1), 1) .* fix_rhs;
cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid));
end


function out = get_delta(a, b, A)
% returns the pointwise (transformed) feature vector differences

if nargin == 2 || isempty(A)
    
    % ---
    % return squared factors as in euclidean distance
    % ---
    out = (a - b).^2;
    
else
    
    % transform vectors before accessing difference
    out = (A' * a - A' * b).^2;
end

end