comparison core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params)
2 % [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features)
3 %
4 % delta_fun.fun function pointer returning distance / delta(a, b)
5 % for feature vectors a,b, default: delta(a, b) = (a - b).^2;
6 % delta_fun.args extra arguments
7 %
8 % prepares ranking data to be used with the svmlight implementation
9 % of Schultz & Joachims 2003
10
11 % ---
12 % CAVE: this file still allows arbitrary rankings
13 % this could be good but also introducing
14 % confusion in the future. Especially the
15 % weightings are only defined per query
16 % ---
17
18 % ---
19 % variable function to get vector deltas
20 % ---
21 if nargin < 3
22 delta_fun = @get_delta;
23 delta_params = {[]};
24 end
25
26 % ---
27 % fix right hand side to one
28 % (after Schultz & Joachims 2003)
29 % ---
30 fix_rhs = 1;
31 fix_weight = 1;
32
33 % ---
34 % NOTE: this preallocation is not complete
35 % ---
36 lhs = cell(0,2);
37 factors = [];
38 invalid = [];
39 for i = 1:size(r,1)
40
41 % feature index
42 a = i;
43
44 % check if ranking is valid
45 if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ...
46 isempty(intersect(r{i,1}, r{i,2}));
47
48 % ---
49 % NOTE / TODO: the follwing is intended for compability
50 % both sides of the ranking may have more than one entry.
51 % for the MTT database, the ranking may be correct, but the
52 % inequalities build from non-singular rankings are not
53 % based on the actual data
54 % ---
55 for j = 1:numel(r{i,1})
56 b = r{i,1}(j);
57
58 for k = 1:numel(r{i,2})
59 c = r{i,2}(k);
60
61 % ---
62 % get vector deltas
63 % ---
64 [dab] = delta_fun(X(:,a), X(:,b), delta_params{:});
65 [dac] = delta_fun(X(:,a), X(:,c), delta_params{:});
66
67 % get the delta difference vector
68 ddiff = dac - dab;
69 if i == 1
70 dim = numel(ddiff);
71 end
72
73 % ---
74 % save the non-empty differences row by row
75 % NOTE: it is not clear whether the indexing for
76 % \omega starts a 0 or 1.
77 % ----
78 xgzero = find( ddiff ~= 0);
79
80 if ~isempty(xgzero)
81 lhs = cat(1,lhs,{xgzero, ddiff(xgzero)});
82 else
83 invalid = [invalid i];
84 end
85
86 % save factors
87 if (nargout > 2)
88 if (size(r,2) > 2)
89 factors = [factors, r{i,3}]; % / max_weight
90 else
91 factors = [factors, fix_weight];
92 end
93 end
94
95 end
96 end
97 end
98 end
99
100 % ---
101 % determine right hand side
102 % ---
103 rhs = ones(size(lhs,1), 1) .* fix_rhs;
104 cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid));
105 end
106
107
108 function out = get_delta(a, b, A)
109 % returns the pointwise (transformed) feature vector differences
110
111 if nargin == 2 || isempty(A)
112
113 % ---
114 % return squared factors as in euclidean distance
115 % ---
116 out = (a - b).^2;
117
118 else
119
120 % transform vectors before accessing difference
121 out = (A' * a - A' * b).^2;
122 end
123
124 end
125
126