wolffd@0
|
1 function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params)
|
wolffd@0
|
2 % [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features)
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % delta_fun.fun function pointer returning distance / delta(a, b)
|
wolffd@0
|
5 % for feature vectors a,b, default: delta(a, b) = (a - b).^2;
|
wolffd@0
|
6 % delta_fun.args extra arguments
|
wolffd@0
|
7 %
|
wolffd@0
|
8 % prepares ranking data to be used with the svmlight implementation
|
wolffd@0
|
9 % of Schultz & Joachims 2003
|
wolffd@0
|
10
|
wolffd@0
|
11 % ---
|
wolffd@0
|
12 % CAVE: this file still allows arbitrary rankings
|
wolffd@0
|
13 % this could be good but also introducing
|
wolffd@0
|
14 % confusion in the future. Especially the
|
wolffd@0
|
15 % weightings are only defined per query
|
wolffd@0
|
16 % ---
|
wolffd@0
|
17
|
wolffd@0
|
18 % ---
|
wolffd@0
|
19 % variable function to get vector deltas
|
wolffd@0
|
20 % ---
|
wolffd@0
|
21 if nargin < 3
|
wolffd@0
|
22 delta_fun = @get_delta;
|
wolffd@0
|
23 delta_params = {[]};
|
wolffd@0
|
24 end
|
wolffd@0
|
25
|
wolffd@0
|
26 % ---
|
wolffd@0
|
27 % fix right hand side to one
|
wolffd@0
|
28 % (after Schultz & Joachims 2003)
|
wolffd@0
|
29 % ---
|
wolffd@0
|
30 fix_rhs = 1;
|
wolffd@0
|
31 fix_weight = 1;
|
wolffd@0
|
32
|
wolffd@0
|
33 % ---
|
wolffd@0
|
34 % NOTE: this preallocation is not complete
|
wolffd@0
|
35 % ---
|
wolffd@0
|
36 lhs = cell(0,2);
|
wolffd@0
|
37 factors = [];
|
wolffd@0
|
38 invalid = [];
|
wolffd@0
|
39 for i = 1:size(r,1)
|
wolffd@0
|
40
|
wolffd@0
|
41 % feature index
|
wolffd@0
|
42 a = i;
|
wolffd@0
|
43
|
wolffd@0
|
44 % check if ranking is valid
|
wolffd@0
|
45 if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ...
|
wolffd@0
|
46 isempty(intersect(r{i,1}, r{i,2}));
|
wolffd@0
|
47
|
wolffd@0
|
48 % ---
|
wolffd@0
|
49 % NOTE / TODO: the follwing is intended for compability
|
wolffd@0
|
50 % both sides of the ranking may have more than one entry.
|
wolffd@0
|
51 % for the MTT database, the ranking may be correct, but the
|
wolffd@0
|
52 % inequalities build from non-singular rankings are not
|
wolffd@0
|
53 % based on the actual data
|
wolffd@0
|
54 % ---
|
wolffd@0
|
55 for j = 1:numel(r{i,1})
|
wolffd@0
|
56 b = r{i,1}(j);
|
wolffd@0
|
57
|
wolffd@0
|
58 for k = 1:numel(r{i,2})
|
wolffd@0
|
59 c = r{i,2}(k);
|
wolffd@0
|
60
|
wolffd@0
|
61 % ---
|
wolffd@0
|
62 % get vector deltas
|
wolffd@0
|
63 % ---
|
wolffd@0
|
64 [dab] = delta_fun(X(:,a), X(:,b), delta_params{:});
|
wolffd@0
|
65 [dac] = delta_fun(X(:,a), X(:,c), delta_params{:});
|
wolffd@0
|
66
|
wolffd@0
|
67 % get the delta difference vector
|
wolffd@0
|
68 ddiff = dac - dab;
|
wolffd@0
|
69 if i == 1
|
wolffd@0
|
70 dim = numel(ddiff);
|
wolffd@0
|
71 end
|
wolffd@0
|
72
|
wolffd@0
|
73 % ---
|
wolffd@0
|
74 % save the non-empty differences row by row
|
wolffd@0
|
75 % NOTE: it is not clear whether the indexing for
|
wolffd@0
|
76 % \omega starts a 0 or 1.
|
wolffd@0
|
77 % ----
|
wolffd@0
|
78 xgzero = find( ddiff ~= 0);
|
wolffd@0
|
79
|
wolffd@0
|
80 if ~isempty(xgzero)
|
wolffd@0
|
81 lhs = cat(1,lhs,{xgzero, ddiff(xgzero)});
|
wolffd@0
|
82 else
|
wolffd@0
|
83 invalid = [invalid i];
|
wolffd@0
|
84 end
|
wolffd@0
|
85
|
wolffd@0
|
86 % save factors
|
wolffd@0
|
87 if (nargout > 2)
|
wolffd@0
|
88 if (size(r,2) > 2)
|
wolffd@0
|
89 factors = [factors, r{i,3}]; % / max_weight
|
wolffd@0
|
90 else
|
wolffd@0
|
91 factors = [factors, fix_weight];
|
wolffd@0
|
92 end
|
wolffd@0
|
93 end
|
wolffd@0
|
94
|
wolffd@0
|
95 end
|
wolffd@0
|
96 end
|
wolffd@0
|
97 end
|
wolffd@0
|
98 end
|
wolffd@0
|
99
|
wolffd@0
|
100 % ---
|
wolffd@0
|
101 % determine right hand side
|
wolffd@0
|
102 % ---
|
wolffd@0
|
103 rhs = ones(size(lhs,1), 1) .* fix_rhs;
|
wolffd@0
|
104 cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid));
|
wolffd@0
|
105 end
|
wolffd@0
|
106
|
wolffd@0
|
107
|
wolffd@0
|
108 function out = get_delta(a, b, A)
|
wolffd@0
|
109 % returns the pointwise (transformed) feature vector differences
|
wolffd@0
|
110
|
wolffd@0
|
111 if nargin == 2 || isempty(A)
|
wolffd@0
|
112
|
wolffd@0
|
113 % ---
|
wolffd@0
|
114 % return squared factors as in euclidean distance
|
wolffd@0
|
115 % ---
|
wolffd@0
|
116 out = (a - b).^2;
|
wolffd@0
|
117
|
wolffd@0
|
118 else
|
wolffd@0
|
119
|
wolffd@0
|
120 % transform vectors before accessing difference
|
wolffd@0
|
121 out = (A' * a - A' * b).^2;
|
wolffd@0
|
122 end
|
wolffd@0
|
123
|
wolffd@0
|
124 end
|
wolffd@0
|
125
|
wolffd@0
|
126
|