Mercurial > hg > camir-aes2014
diff core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,126 @@ +function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params) +% [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features) +% +% delta_fun.fun function pointer returning distance / delta(a, b) +% for feature vectors a,b, default: delta(a, b) = (a - b).^2; +% delta_fun.args extra arguments +% +% prepares ranking data to be used with the svmlight implementation +% of Schultz & Joachims 2003 + +% --- +% CAVE: this file still allows arbitrary rankings +% this could be good but also introducing +% confusion in the future. Especially the +% weightings are only defined per query +% --- + +% --- +% variable function to get vector deltas +% --- +if nargin < 3 + delta_fun = @get_delta; + delta_params = {[]}; +end + +% --- +% fix right hand side to one +% (after Schultz & Joachims 2003) +% --- +fix_rhs = 1; +fix_weight = 1; + +% --- +% NOTE: this preallocation is not complete +% --- +lhs = cell(0,2); +factors = []; +invalid = []; +for i = 1:size(r,1) + + % feature index + a = i; + + % check if ranking is valid + if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ... + isempty(intersect(r{i,1}, r{i,2})); + + % --- + % NOTE / TODO: the follwing is intended for compability + % both sides of the ranking may have more than one entry. + % for the MTT database, the ranking may be correct, but the + % inequalities build from non-singular rankings are not + % based on the actual data + % --- + for j = 1:numel(r{i,1}) + b = r{i,1}(j); + + for k = 1:numel(r{i,2}) + c = r{i,2}(k); + + % --- + % get vector deltas + % --- + [dab] = delta_fun(X(:,a), X(:,b), delta_params{:}); + [dac] = delta_fun(X(:,a), X(:,c), delta_params{:}); + + % get the delta difference vector + ddiff = dac - dab; + if i == 1 + dim = numel(ddiff); + end + + % --- + % save the non-empty differences row by row + % NOTE: it is not clear whether the indexing for + % \omega starts a 0 or 1. + % ---- + xgzero = find( ddiff ~= 0); + + if ~isempty(xgzero) + lhs = cat(1,lhs,{xgzero, ddiff(xgzero)}); + else + invalid = [invalid i]; + end + + % save factors + if (nargout > 2) + if (size(r,2) > 2) + factors = [factors, r{i,3}]; % / max_weight + else + factors = [factors, fix_weight]; + end + end + + end + end + end +end + +% --- +% determine right hand side +% --- +rhs = ones(size(lhs,1), 1) .* fix_rhs; +cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid)); +end + + +function out = get_delta(a, b, A) +% returns the pointwise (transformed) feature vector differences + +if nargin == 2 || isempty(A) + + % --- + % return squared factors as in euclidean distance + % --- + out = (a - b).^2; + +else + + % transform vectors before accessing difference + out = (A' * a - A' * b).^2; +end + +end + +