Mercurial > hg > camir-aes2014
view core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line source
function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params) % [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features) % % delta_fun.fun function pointer returning distance / delta(a, b) % for feature vectors a,b, default: delta(a, b) = (a - b).^2; % delta_fun.args extra arguments % % prepares ranking data to be used with the svmlight implementation % of Schultz & Joachims 2003 % --- % CAVE: this file still allows arbitrary rankings % this could be good but also introducing % confusion in the future. Especially the % weightings are only defined per query % --- % --- % variable function to get vector deltas % --- if nargin < 3 delta_fun = @get_delta; delta_params = {[]}; end % --- % fix right hand side to one % (after Schultz & Joachims 2003) % --- fix_rhs = 1; fix_weight = 1; % --- % NOTE: this preallocation is not complete % --- lhs = cell(0,2); factors = []; invalid = []; for i = 1:size(r,1) % feature index a = i; % check if ranking is valid if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ... isempty(intersect(r{i,1}, r{i,2})); % --- % NOTE / TODO: the follwing is intended for compability % both sides of the ranking may have more than one entry. % for the MTT database, the ranking may be correct, but the % inequalities build from non-singular rankings are not % based on the actual data % --- for j = 1:numel(r{i,1}) b = r{i,1}(j); for k = 1:numel(r{i,2}) c = r{i,2}(k); % --- % get vector deltas % --- [dab] = delta_fun(X(:,a), X(:,b), delta_params{:}); [dac] = delta_fun(X(:,a), X(:,c), delta_params{:}); % get the delta difference vector ddiff = dac - dab; if i == 1 dim = numel(ddiff); end % --- % save the non-empty differences row by row % NOTE: it is not clear whether the indexing for % \omega starts a 0 or 1. % ---- xgzero = find( ddiff ~= 0); if ~isempty(xgzero) lhs = cat(1,lhs,{xgzero, ddiff(xgzero)}); else invalid = [invalid i]; end % save factors if (nargout > 2) if (size(r,2) > 2) factors = [factors, r{i,3}]; % / max_weight else factors = [factors, fix_weight]; end end end end end end % --- % determine right hand side % --- rhs = ones(size(lhs,1), 1) .* fix_rhs; cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid)); end function out = get_delta(a, b, A) % returns the pointwise (transformed) feature vector differences if nargin == 2 || isempty(A) % --- % return squared factors as in euclidean distance % --- out = (a - b).^2; else % transform vectors before accessing difference out = (A' * a - A' * b).^2; end end