wolffd@0: function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params) wolffd@0: % [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features) wolffd@0: % wolffd@0: % delta_fun.fun function pointer returning distance / delta(a, b) wolffd@0: % for feature vectors a,b, default: delta(a, b) = (a - b).^2; wolffd@0: % delta_fun.args extra arguments wolffd@0: % wolffd@0: % prepares ranking data to be used with the svmlight implementation wolffd@0: % of Schultz & Joachims 2003 wolffd@0: wolffd@0: % --- wolffd@0: % CAVE: this file still allows arbitrary rankings wolffd@0: % this could be good but also introducing wolffd@0: % confusion in the future. Especially the wolffd@0: % weightings are only defined per query wolffd@0: % --- wolffd@0: wolffd@0: % --- wolffd@0: % variable function to get vector deltas wolffd@0: % --- wolffd@0: if nargin < 3 wolffd@0: delta_fun = @get_delta; wolffd@0: delta_params = {[]}; wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % fix right hand side to one wolffd@0: % (after Schultz & Joachims 2003) wolffd@0: % --- wolffd@0: fix_rhs = 1; wolffd@0: fix_weight = 1; wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: this preallocation is not complete wolffd@0: % --- wolffd@0: lhs = cell(0,2); wolffd@0: factors = []; wolffd@0: invalid = []; wolffd@0: for i = 1:size(r,1) wolffd@0: wolffd@0: % feature index wolffd@0: a = i; wolffd@0: wolffd@0: % check if ranking is valid wolffd@0: if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ... wolffd@0: isempty(intersect(r{i,1}, r{i,2})); wolffd@0: wolffd@0: % --- wolffd@0: % NOTE / TODO: the follwing is intended for compability wolffd@0: % both sides of the ranking may have more than one entry. wolffd@0: % for the MTT database, the ranking may be correct, but the wolffd@0: % inequalities build from non-singular rankings are not wolffd@0: % based on the actual data wolffd@0: % --- wolffd@0: for j = 1:numel(r{i,1}) wolffd@0: b = r{i,1}(j); wolffd@0: wolffd@0: for k = 1:numel(r{i,2}) wolffd@0: c = r{i,2}(k); wolffd@0: wolffd@0: % --- wolffd@0: % get vector deltas wolffd@0: % --- wolffd@0: [dab] = delta_fun(X(:,a), X(:,b), delta_params{:}); wolffd@0: [dac] = delta_fun(X(:,a), X(:,c), delta_params{:}); wolffd@0: wolffd@0: % get the delta difference vector wolffd@0: ddiff = dac - dab; wolffd@0: if i == 1 wolffd@0: dim = numel(ddiff); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % save the non-empty differences row by row wolffd@0: % NOTE: it is not clear whether the indexing for wolffd@0: % \omega starts a 0 or 1. wolffd@0: % ---- wolffd@0: xgzero = find( ddiff ~= 0); wolffd@0: wolffd@0: if ~isempty(xgzero) wolffd@0: lhs = cat(1,lhs,{xgzero, ddiff(xgzero)}); wolffd@0: else wolffd@0: invalid = [invalid i]; wolffd@0: end wolffd@0: wolffd@0: % save factors wolffd@0: if (nargout > 2) wolffd@0: if (size(r,2) > 2) wolffd@0: factors = [factors, r{i,3}]; % / max_weight wolffd@0: else wolffd@0: factors = [factors, fix_weight]; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % determine right hand side wolffd@0: % --- wolffd@0: rhs = ones(size(lhs,1), 1) .* fix_rhs; wolffd@0: cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid)); wolffd@0: end wolffd@0: wolffd@0: wolffd@0: function out = get_delta(a, b, A) wolffd@0: % returns the pointwise (transformed) feature vector differences wolffd@0: wolffd@0: if nargin == 2 || isempty(A) wolffd@0: wolffd@0: % --- wolffd@0: % return squared factors as in euclidean distance wolffd@0: % --- wolffd@0: out = (a - b).^2; wolffd@0: wolffd@0: else wolffd@0: wolffd@0: % transform vectors before accessing difference wolffd@0: out = (A' * a - A' * b).^2; wolffd@0: end wolffd@0: wolffd@0: end wolffd@0: wolffd@0: