annotate core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params)
wolffd@0 2 % [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features)
wolffd@0 3 %
wolffd@0 4 % delta_fun.fun function pointer returning distance / delta(a, b)
wolffd@0 5 % for feature vectors a,b, default: delta(a, b) = (a - b).^2;
wolffd@0 6 % delta_fun.args extra arguments
wolffd@0 7 %
wolffd@0 8 % prepares ranking data to be used with the svmlight implementation
wolffd@0 9 % of Schultz & Joachims 2003
wolffd@0 10
wolffd@0 11 % ---
wolffd@0 12 % CAVE: this file still allows arbitrary rankings
wolffd@0 13 % this could be good but also introducing
wolffd@0 14 % confusion in the future. Especially the
wolffd@0 15 % weightings are only defined per query
wolffd@0 16 % ---
wolffd@0 17
wolffd@0 18 % ---
wolffd@0 19 % variable function to get vector deltas
wolffd@0 20 % ---
wolffd@0 21 if nargin < 3
wolffd@0 22 delta_fun = @get_delta;
wolffd@0 23 delta_params = {[]};
wolffd@0 24 end
wolffd@0 25
wolffd@0 26 % ---
wolffd@0 27 % fix right hand side to one
wolffd@0 28 % (after Schultz & Joachims 2003)
wolffd@0 29 % ---
wolffd@0 30 fix_rhs = 1;
wolffd@0 31 fix_weight = 1;
wolffd@0 32
wolffd@0 33 % ---
wolffd@0 34 % NOTE: this preallocation is not complete
wolffd@0 35 % ---
wolffd@0 36 lhs = cell(0,2);
wolffd@0 37 factors = [];
wolffd@0 38 invalid = [];
wolffd@0 39 for i = 1:size(r,1)
wolffd@0 40
wolffd@0 41 % feature index
wolffd@0 42 a = i;
wolffd@0 43
wolffd@0 44 % check if ranking is valid
wolffd@0 45 if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ...
wolffd@0 46 isempty(intersect(r{i,1}, r{i,2}));
wolffd@0 47
wolffd@0 48 % ---
wolffd@0 49 % NOTE / TODO: the follwing is intended for compability
wolffd@0 50 % both sides of the ranking may have more than one entry.
wolffd@0 51 % for the MTT database, the ranking may be correct, but the
wolffd@0 52 % inequalities build from non-singular rankings are not
wolffd@0 53 % based on the actual data
wolffd@0 54 % ---
wolffd@0 55 for j = 1:numel(r{i,1})
wolffd@0 56 b = r{i,1}(j);
wolffd@0 57
wolffd@0 58 for k = 1:numel(r{i,2})
wolffd@0 59 c = r{i,2}(k);
wolffd@0 60
wolffd@0 61 % ---
wolffd@0 62 % get vector deltas
wolffd@0 63 % ---
wolffd@0 64 [dab] = delta_fun(X(:,a), X(:,b), delta_params{:});
wolffd@0 65 [dac] = delta_fun(X(:,a), X(:,c), delta_params{:});
wolffd@0 66
wolffd@0 67 % get the delta difference vector
wolffd@0 68 ddiff = dac - dab;
wolffd@0 69 if i == 1
wolffd@0 70 dim = numel(ddiff);
wolffd@0 71 end
wolffd@0 72
wolffd@0 73 % ---
wolffd@0 74 % save the non-empty differences row by row
wolffd@0 75 % NOTE: it is not clear whether the indexing for
wolffd@0 76 % \omega starts a 0 or 1.
wolffd@0 77 % ----
wolffd@0 78 xgzero = find( ddiff ~= 0);
wolffd@0 79
wolffd@0 80 if ~isempty(xgzero)
wolffd@0 81 lhs = cat(1,lhs,{xgzero, ddiff(xgzero)});
wolffd@0 82 else
wolffd@0 83 invalid = [invalid i];
wolffd@0 84 end
wolffd@0 85
wolffd@0 86 % save factors
wolffd@0 87 if (nargout > 2)
wolffd@0 88 if (size(r,2) > 2)
wolffd@0 89 factors = [factors, r{i,3}]; % / max_weight
wolffd@0 90 else
wolffd@0 91 factors = [factors, fix_weight];
wolffd@0 92 end
wolffd@0 93 end
wolffd@0 94
wolffd@0 95 end
wolffd@0 96 end
wolffd@0 97 end
wolffd@0 98 end
wolffd@0 99
wolffd@0 100 % ---
wolffd@0 101 % determine right hand side
wolffd@0 102 % ---
wolffd@0 103 rhs = ones(size(lhs,1), 1) .* fix_rhs;
wolffd@0 104 cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid));
wolffd@0 105 end
wolffd@0 106
wolffd@0 107
wolffd@0 108 function out = get_delta(a, b, A)
wolffd@0 109 % returns the pointwise (transformed) feature vector differences
wolffd@0 110
wolffd@0 111 if nargin == 2 || isempty(A)
wolffd@0 112
wolffd@0 113 % ---
wolffd@0 114 % return squared factors as in euclidean distance
wolffd@0 115 % ---
wolffd@0 116 out = (a - b).^2;
wolffd@0 117
wolffd@0 118 else
wolffd@0 119
wolffd@0 120 % transform vectors before accessing difference
wolffd@0 121 out = (A' * a - A' * b).^2;
wolffd@0 122 end
wolffd@0 123
wolffd@0 124 end
wolffd@0 125
wolffd@0 126