Mercurial > hg > camir-aes2014
comparison core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params) | |
2 % [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features) | |
3 % | |
4 % delta_fun.fun function pointer returning distance / delta(a, b) | |
5 % for feature vectors a,b, default: delta(a, b) = (a - b).^2; | |
6 % delta_fun.args extra arguments | |
7 % | |
8 % prepares ranking data to be used with the svmlight implementation | |
9 % of Schultz & Joachims 2003 | |
10 | |
11 % --- | |
12 % CAVE: this file still allows arbitrary rankings | |
13 % this could be good but also introducing | |
14 % confusion in the future. Especially the | |
15 % weightings are only defined per query | |
16 % --- | |
17 | |
18 % --- | |
19 % variable function to get vector deltas | |
20 % --- | |
21 if nargin < 3 | |
22 delta_fun = @get_delta; | |
23 delta_params = {[]}; | |
24 end | |
25 | |
26 % --- | |
27 % fix right hand side to one | |
28 % (after Schultz & Joachims 2003) | |
29 % --- | |
30 fix_rhs = 1; | |
31 fix_weight = 1; | |
32 | |
33 % --- | |
34 % NOTE: this preallocation is not complete | |
35 % --- | |
36 lhs = cell(0,2); | |
37 factors = []; | |
38 invalid = []; | |
39 for i = 1:size(r,1) | |
40 | |
41 % feature index | |
42 a = i; | |
43 | |
44 % check if ranking is valid | |
45 if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ... | |
46 isempty(intersect(r{i,1}, r{i,2})); | |
47 | |
48 % --- | |
49 % NOTE / TODO: the follwing is intended for compability | |
50 % both sides of the ranking may have more than one entry. | |
51 % for the MTT database, the ranking may be correct, but the | |
52 % inequalities build from non-singular rankings are not | |
53 % based on the actual data | |
54 % --- | |
55 for j = 1:numel(r{i,1}) | |
56 b = r{i,1}(j); | |
57 | |
58 for k = 1:numel(r{i,2}) | |
59 c = r{i,2}(k); | |
60 | |
61 % --- | |
62 % get vector deltas | |
63 % --- | |
64 [dab] = delta_fun(X(:,a), X(:,b), delta_params{:}); | |
65 [dac] = delta_fun(X(:,a), X(:,c), delta_params{:}); | |
66 | |
67 % get the delta difference vector | |
68 ddiff = dac - dab; | |
69 if i == 1 | |
70 dim = numel(ddiff); | |
71 end | |
72 | |
73 % --- | |
74 % save the non-empty differences row by row | |
75 % NOTE: it is not clear whether the indexing for | |
76 % \omega starts a 0 or 1. | |
77 % ---- | |
78 xgzero = find( ddiff ~= 0); | |
79 | |
80 if ~isempty(xgzero) | |
81 lhs = cat(1,lhs,{xgzero, ddiff(xgzero)}); | |
82 else | |
83 invalid = [invalid i]; | |
84 end | |
85 | |
86 % save factors | |
87 if (nargout > 2) | |
88 if (size(r,2) > 2) | |
89 factors = [factors, r{i,3}]; % / max_weight | |
90 else | |
91 factors = [factors, fix_weight]; | |
92 end | |
93 end | |
94 | |
95 end | |
96 end | |
97 end | |
98 end | |
99 | |
100 % --- | |
101 % determine right hand side | |
102 % --- | |
103 rhs = ones(size(lhs,1), 1) .* fix_rhs; | |
104 cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid)); | |
105 end | |
106 | |
107 | |
108 function out = get_delta(a, b, A) | |
109 % returns the pointwise (transformed) feature vector differences | |
110 | |
111 if nargin == 2 || isempty(A) | |
112 | |
113 % --- | |
114 % return squared factors as in euclidean distance | |
115 % --- | |
116 out = (a - b).^2; | |
117 | |
118 else | |
119 | |
120 % transform vectors before accessing difference | |
121 out = (A' * a - A' * b).^2; | |
122 end | |
123 | |
124 end | |
125 | |
126 |