diff core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/tools/machine_learning/get_svmlight_inequalities_from_ranking.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,126 @@
+function [lhs, rhs, dim, factors, invalid] = get_svmlight_inequalities_from_ranking(r, X, delta_fun,delta_params)
+% [lhs, rhs] = get_svmlight_inequalities_from_ranking(r, features)
+% 
+% delta_fun.fun function pointer returning distance / delta(a, b) 
+%          for feature vectors a,b, default: delta(a, b) = (a - b).^2;
+% delta_fun.args extra arguments
+%
+% prepares ranking data to be used with the svmlight implementation
+% of Schultz & Joachims 2003
+
+% ---
+% CAVE: this file still allows arbitrary rankings
+%  this could be good but also introducing 
+%  confusion in the future.  Especially the 
+%  weightings are only defined per query 
+% ---
+
+% ---
+% variable function to get vector deltas
+% ---
+if nargin < 3
+    delta_fun = @get_delta;
+    delta_params = {[]};
+end
+
+% ---
+% fix right hand side to one
+% (after Schultz & Joachims 2003)
+% ---
+fix_rhs = 1;
+fix_weight = 1;
+
+% ---
+% NOTE: this preallocation is not complete
+% ---
+lhs = cell(0,2);
+factors = [];
+invalid = [];
+for i = 1:size(r,1)
+    
+    % feature index
+    a = i;
+    
+    % check if ranking is valid
+    if ~isempty(r{i,1}) && ~isempty(r{i,2})&& ...
+        isempty(intersect(r{i,1}, r{i,2}));
+    
+        % ---
+        % NOTE / TODO: the follwing is intended for compability
+        %  both sides of the ranking may have more than one entry.
+        %  for the MTT database, the ranking may be correct, but the 
+        %  inequalities build from non-singular rankings are not
+        %  based on the actual data
+        % ---
+        for j = 1:numel(r{i,1})
+            b = r{i,1}(j);
+            
+            for k = 1:numel(r{i,2})
+                c = r{i,2}(k);
+
+                % ---
+                % get vector deltas
+                % ---
+                [dab] = delta_fun(X(:,a), X(:,b), delta_params{:});
+                [dac] = delta_fun(X(:,a), X(:,c), delta_params{:});
+                
+                % get the delta difference vector
+                ddiff = dac - dab;
+                if i == 1
+                    dim = numel(ddiff);
+                end
+                
+                % ---
+                % save the non-empty differences row by row 
+                % NOTE: it is not clear  whether the indexing for 
+                % \omega starts a 0 or 1.
+                % ----
+                xgzero = find( ddiff ~= 0);
+                
+                if ~isempty(xgzero)
+                    lhs = cat(1,lhs,{xgzero, ddiff(xgzero)});
+                else
+                    invalid = [invalid i];
+                end
+                
+                % save factors
+                if (nargout > 2)
+                    if (size(r,2) > 2)
+                        factors = [factors, r{i,3}]; % / max_weight
+                    else
+                        factors = [factors, fix_weight];
+                    end
+                end
+                
+            end
+        end
+    end
+end
+
+% ---
+% determine right hand side
+% ---
+rhs = ones(size(lhs,1), 1) .* fix_rhs;
+cprint(2, 'SVMLight data: %d invalid rankings excluded from training set', numel(invalid));
+end
+
+
+function out = get_delta(a, b, A)
+% returns the pointwise (transformed) feature vector differences
+
+if nargin == 2 || isempty(A)
+    
+    % ---
+    % return squared factors as in euclidean distance
+    % ---
+    out = (a - b).^2;
+    
+else
+    
+    % transform vectors before accessing difference
+    out = (A' * a - A' * b).^2;
+end
+
+end
+
+