Daniel@0
|
1 function Perf = mlr_test(W, test_k, Xtrain, Ytrain, Xtest, Ytest, Testnorm)
|
Daniel@0
|
2 % Perf = mlr_test(W, test_k, Xtrain, Ytrain, Xtest, Ytest)
|
Daniel@0
|
3 %
|
Daniel@0
|
4 % W = d-by-d positive semi-definite matrix
|
Daniel@0
|
5 % test_k = vector of k-values to use for KNN/Prec@k/NDCG
|
Daniel@0
|
6 % Xtrain = d-by-n matrix of training data
|
Daniel@0
|
7 % Ytrain = n-by-1 vector of training labels
|
Daniel@0
|
8 % OR
|
Daniel@0
|
9 % n-by-2 cell array where
|
Daniel@0
|
10 % Y{q,1} contains relevant indices (in 1..n) for point q
|
Daniel@0
|
11 % Y{q,2} contains irrelevant indices (in 1..n) for point q
|
Daniel@0
|
12 % Xtest = d-by-m matrix of testing data
|
Daniel@0
|
13 % Ytest = m-by-1 vector of training labels, or m-by-2 cell array
|
Daniel@0
|
14 %
|
Daniel@0
|
15 %
|
Daniel@0
|
16 % The output structure Perf contains the mean score for:
|
Daniel@0
|
17 % AUC, KNN, Prec@k, MAP, MRR, NDCG,
|
Daniel@0
|
18 % as well as the effective dimensionality of W, and
|
Daniel@0
|
19 % the best-performing k-value for KNN, Prec@k, and NDCG.
|
Daniel@0
|
20 %
|
Daniel@0
|
21
|
Daniel@0
|
22 % addpath('cuttingPlane', 'distance', 'feasible', 'initialize', 'loss', ...
|
Daniel@0
|
23 % 'metricPsi', 'regularize', 'separationOracle', 'util');
|
Daniel@0
|
24
|
Daniel@0
|
25 Perf = struct( ...
|
Daniel@0
|
26 'AUC', [], ...
|
Daniel@0
|
27 'KNN', [], ...
|
Daniel@0
|
28 'PrecAtK', [], ...
|
Daniel@0
|
29 'MAP', [], ...
|
Daniel@0
|
30 'MRR', [], ...
|
Daniel@0
|
31 'NDCG', [], ...
|
Daniel@0
|
32 'dimensionality', [], ...
|
Daniel@0
|
33 'KNNk', [], ...
|
Daniel@0
|
34 'PrecAtKk', [], ...
|
Daniel@0
|
35 'NDCGk', [] ...
|
Daniel@0
|
36 );
|
Daniel@0
|
37
|
Daniel@0
|
38 [d, nTrain, nKernel] = size(Xtrain);
|
Daniel@0
|
39 nTest = length(Ytest);
|
Daniel@0
|
40 test_k = min(test_k, nTrain);
|
Daniel@0
|
41
|
Daniel@0
|
42 if nargin < 7
|
Daniel@0
|
43 Testnorm = [];
|
Daniel@0
|
44 end
|
Daniel@0
|
45
|
Daniel@0
|
46 % Compute dimensionality of the learned metric
|
Daniel@0
|
47 Perf.dimensionality = mlr_test_dimension(W, nTrain, nKernel);
|
Daniel@0
|
48
|
Daniel@0
|
49
|
Daniel@0
|
50 % Build the distance matrix
|
Daniel@0
|
51 [D, I] = mlr_test_distance(W, Xtrain, Xtest, Testnorm);
|
Daniel@0
|
52
|
Daniel@0
|
53
|
Daniel@0
|
54 % Compute label agreement
|
Daniel@0
|
55 if ~iscell(Ytest)
|
Daniel@0
|
56 Labels = Ytrain(I);
|
Daniel@0
|
57 Agree = bsxfun(@eq, Ytest', Labels);
|
Daniel@0
|
58
|
Daniel@0
|
59 % We only compute KNN error if Y are labels
|
Daniel@0
|
60 [Perf.KNN, Perf.KNNk] = mlr_test_knn(Labels, Ytest, test_k);
|
Daniel@0
|
61 else
|
Daniel@0
|
62 Agree = zeros(nTrain, nTest);
|
Daniel@0
|
63 for i = 1:nTest
|
Daniel@0
|
64 Agree(:,i) = ismember(I(:,i), Ytest{i,1});
|
Daniel@0
|
65 end
|
Daniel@0
|
66 Agree = reduceAgreement(Agree);
|
Daniel@0
|
67 end
|
Daniel@0
|
68
|
Daniel@0
|
69 % Compute AUC score
|
Daniel@0
|
70 Perf.AUC = mlr_test_auc(Agree);
|
Daniel@0
|
71
|
Daniel@0
|
72 % Compute MAP score
|
Daniel@0
|
73 Perf.MAP = mlr_test_map(Agree);
|
Daniel@0
|
74
|
Daniel@0
|
75 % Compute MRR score
|
Daniel@0
|
76 Perf.MRR = mlr_test_mrr(Agree);
|
Daniel@0
|
77
|
Daniel@0
|
78 % Compute prec@k
|
Daniel@0
|
79 [Perf.PrecAtK, Perf.PrecAtKk] = mlr_test_preck(Agree, test_k);
|
Daniel@0
|
80
|
Daniel@0
|
81 % Compute NDCG score
|
Daniel@0
|
82 [Perf.NDCG, Perf.NDCGk] = mlr_test_ndcg(Agree, test_k);
|
Daniel@0
|
83
|
Daniel@0
|
84 end
|
Daniel@0
|
85
|
Daniel@0
|
86
|
Daniel@0
|
87 function [D,I] = mlr_test_distance(W, Xtrain, Xtest, Testnorm)
|
Daniel@0
|
88
|
Daniel@0
|
89 % CASES:
|
Daniel@0
|
90 % Raw: W = []
|
Daniel@0
|
91
|
Daniel@0
|
92 % Linear, full: W = d-by-d
|
Daniel@0
|
93 % Single Kernel, full: W = n-by-n
|
Daniel@0
|
94 % MKL, full: W = n-by-n-by-m
|
Daniel@0
|
95
|
Daniel@0
|
96 % Linear, diagonal: W = d-by-1
|
Daniel@0
|
97 % Single Kernel, diagonal: W = n-by-1
|
Daniel@0
|
98 % MKL, diag: W = n-by-m
|
Daniel@0
|
99 % MKL, diag-off-diag: W = m-by-m-by-n
|
Daniel@0
|
100
|
Daniel@0
|
101 [d, nTrain, nKernel] = size(Xtrain);
|
Daniel@0
|
102 nTest = size(Xtest, 2);
|
Daniel@0
|
103
|
Daniel@0
|
104 if isempty(W)
|
Daniel@0
|
105 % W = [] => native euclidean distances
|
Daniel@0
|
106 D = mlr_test_distance_raw(Xtrain, Xtest, Testnorm);
|
Daniel@0
|
107
|
Daniel@0
|
108 elseif size(W,1) == d && size(W,2) == d
|
Daniel@0
|
109 % We're in a full-projection case
|
Daniel@0
|
110 D = setDistanceFullMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain);
|
Daniel@0
|
111
|
Daniel@0
|
112 elseif size(W,1) == d && size(W,2) == nKernel
|
Daniel@0
|
113 % We're in a simple diagonal case
|
Daniel@0
|
114 D = setDistanceDiagMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain);
|
Daniel@0
|
115
|
Daniel@0
|
116 elseif size(W,1) == nKernel && size(W,2) == nKernel && size(W,3) == nTrain
|
Daniel@0
|
117 % We're in DOD mode
|
Daniel@0
|
118 D = setDistanceDODMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain);
|
Daniel@0
|
119
|
Daniel@0
|
120 else
|
Daniel@0
|
121 % Error?
|
Daniel@0
|
122 error('Cannot determine metric mode.');
|
Daniel@0
|
123
|
Daniel@0
|
124 end
|
Daniel@0
|
125
|
Daniel@0
|
126 D = full(D(1:nTrain, nTrain + (1:nTest)));
|
Daniel@0
|
127 [v,I] = sort(D, 1);
|
Daniel@0
|
128 end
|
Daniel@0
|
129
|
Daniel@0
|
130
|
Daniel@0
|
131
|
Daniel@0
|
132 function dimension = mlr_test_dimension(W, nTrain, nKernel)
|
Daniel@0
|
133
|
Daniel@0
|
134 % CASES:
|
Daniel@0
|
135 % Raw: W = []
|
Daniel@0
|
136
|
Daniel@0
|
137 % Linear, full: W = d-by-d
|
Daniel@0
|
138 % Single Kernel, full: W = n-by-n
|
Daniel@0
|
139 % MKL, full: W = n-by-n-by-m
|
Daniel@0
|
140
|
Daniel@0
|
141 % Linear, diagonal: W = d-by-1
|
Daniel@0
|
142 % Single Kernel, diagonal: W = n-by-1
|
Daniel@0
|
143 % MKL, diag: W = n-by-m
|
Daniel@0
|
144 % MKL, diag-off-diag: W = m-by-m-by-n
|
Daniel@0
|
145
|
Daniel@0
|
146
|
Daniel@0
|
147 if size(W,2) == nTrain
|
Daniel@0
|
148 dim = [];
|
Daniel@0
|
149 for i = 1:nKernel
|
Daniel@0
|
150 [v,d] = eig(0.5 * (W(:,:,i) + W(:,:,i)'));
|
Daniel@0
|
151 dim = [dim ; abs(real(diag(d)))];
|
Daniel@0
|
152 end
|
Daniel@0
|
153 else
|
Daniel@0
|
154 dim = W(:);
|
Daniel@0
|
155 end
|
Daniel@0
|
156
|
Daniel@0
|
157 cd = cumsum(dim) / sum(dim);
|
Daniel@0
|
158 dimension = find(cd >= 0.95, 1);
|
Daniel@0
|
159 if isempty(dimension)
|
Daniel@0
|
160 dimension = 0;
|
Daniel@0
|
161 end
|
Daniel@0
|
162 end
|
Daniel@0
|
163
|
Daniel@0
|
164 function [NDCG, NDCGk] = mlr_test_ndcg(Agree, test_k)
|
Daniel@0
|
165
|
Daniel@0
|
166 nTrain = size(Agree, 1);
|
Daniel@0
|
167
|
Daniel@0
|
168 Discount = zeros(1, nTrain);
|
Daniel@0
|
169 Discount(1:2) = 1;
|
Daniel@0
|
170
|
Daniel@0
|
171 NDCG = -Inf;
|
Daniel@0
|
172 NDCGk = 0;
|
Daniel@0
|
173 for k = test_k
|
Daniel@0
|
174
|
Daniel@0
|
175 Discount(3:k) = 1 ./ log2(3:k);
|
Daniel@0
|
176 Discount = Discount / sum(Discount);
|
Daniel@0
|
177
|
Daniel@0
|
178 b = mean(Discount * Agree);
|
Daniel@0
|
179 if b > NDCG
|
Daniel@0
|
180 NDCG = b;
|
Daniel@0
|
181 NDCGk = k;
|
Daniel@0
|
182 end
|
Daniel@0
|
183 end
|
Daniel@0
|
184 end
|
Daniel@0
|
185
|
Daniel@0
|
186 function [PrecAtK, PrecAtKk] = mlr_test_preck(Agree, test_k)
|
Daniel@0
|
187
|
Daniel@0
|
188 PrecAtK = -Inf;
|
Daniel@0
|
189 PrecAtKk = 0;
|
Daniel@0
|
190 for k = test_k
|
Daniel@0
|
191 b = mean( mean( Agree(1:k, :), 1 ) );
|
Daniel@0
|
192 if b > PrecAtK
|
Daniel@0
|
193 PrecAtK = b;
|
Daniel@0
|
194 PrecAtKk = k;
|
Daniel@0
|
195 end
|
Daniel@0
|
196 end
|
Daniel@0
|
197 end
|
Daniel@0
|
198
|
Daniel@0
|
199 function [KNN, KNNk] = mlr_test_knn(Labels, Ytest, test_k)
|
Daniel@0
|
200
|
Daniel@0
|
201 KNN = -Inf;
|
Daniel@0
|
202 KNNk = 0;
|
Daniel@0
|
203 for k = test_k
|
Daniel@0
|
204 b = mean( mode( Labels(1:k,:), 1 ) == Ytest');
|
Daniel@0
|
205 if b > KNN
|
Daniel@0
|
206 KNN = b;
|
Daniel@0
|
207 KNNk = k;
|
Daniel@0
|
208 end
|
Daniel@0
|
209 end
|
Daniel@0
|
210 end
|
Daniel@0
|
211
|
Daniel@0
|
212 function MAP = mlr_test_map(Agree);
|
Daniel@0
|
213
|
Daniel@0
|
214 nTrain = size(Agree, 1);
|
Daniel@0
|
215 MAP = bsxfun(@ldivide, (1:nTrain)', cumsum(Agree, 1));
|
Daniel@0
|
216 MAP = mean(sum(MAP .* Agree, 1)./ sum(Agree, 1));
|
Daniel@0
|
217 end
|
Daniel@0
|
218
|
Daniel@0
|
219 function MRR = mlr_test_mrr(Agree);
|
Daniel@0
|
220
|
Daniel@0
|
221 nTest = size(Agree, 2);
|
Daniel@0
|
222 MRR = 0;
|
Daniel@0
|
223 for i = 1:nTest
|
Daniel@0
|
224 MRR = MRR + (1 / find(Agree(:,i), 1));
|
Daniel@0
|
225 end
|
Daniel@0
|
226 MRR = MRR / nTest;
|
Daniel@0
|
227 end
|
Daniel@0
|
228
|
Daniel@0
|
229 function AUC = mlr_test_auc(Agree)
|
Daniel@0
|
230
|
Daniel@0
|
231 TPR = cumsum(Agree, 1);
|
Daniel@0
|
232 FPR = cumsum(~Agree, 1);
|
Daniel@0
|
233
|
Daniel@0
|
234 numPos = TPR(end,:);
|
Daniel@0
|
235 numNeg = FPR(end,:);
|
Daniel@0
|
236
|
Daniel@0
|
237 TPR = mean(bsxfun(@rdivide, TPR, numPos),2);
|
Daniel@0
|
238 FPR = mean(bsxfun(@rdivide, FPR, numNeg),2);
|
Daniel@0
|
239 AUC = diff([0 FPR']) * TPR;
|
Daniel@0
|
240 end
|
Daniel@0
|
241
|
Daniel@0
|
242
|
Daniel@0
|
243 function D = mlr_test_distance_raw(Xtrain, Xtest, Testnorm)
|
Daniel@0
|
244
|
Daniel@0
|
245 [d, nTrain, nKernel] = size(Xtrain);
|
Daniel@0
|
246 nTest = size(Xtest, 2);
|
Daniel@0
|
247
|
Daniel@0
|
248 if isempty(Testnorm)
|
Daniel@0
|
249 % Not in kernel mode, compute distances directly
|
Daniel@0
|
250 D = 0;
|
Daniel@0
|
251 for i = 1:nKernel
|
Daniel@0
|
252 D = D + setDistanceDiag([Xtrain(:,:,i) Xtest(:,:,i)], ones(d,1), ...
|
Daniel@0
|
253 nTrain + (1:nTest), 1:nTrain);
|
Daniel@0
|
254 end
|
Daniel@0
|
255 else
|
Daniel@0
|
256 % We are in kernel mode
|
Daniel@0
|
257 D = sparse(nTrain + nTest, nTrain + nTest);
|
Daniel@0
|
258 for i = 1:nKernel
|
Daniel@0
|
259 Trainnorm = diag(Xtrain(:,:,i));
|
Daniel@0
|
260 D(1:nTrain, nTrain + (1:nTest)) = D(1:nTrain, nTrain + (1:nTest)) ...
|
Daniel@0
|
261 + bsxfun(@plus, Trainnorm, bsxfun(@plus, Testnorm(:,i)', -2 * Xtest(:,:,i)));
|
Daniel@0
|
262 end
|
Daniel@0
|
263 end
|
Daniel@0
|
264 end
|
Daniel@0
|
265
|
Daniel@0
|
266 function A = reduceAgreement(Agree)
|
Daniel@0
|
267 nPos = sum(Agree,1);
|
Daniel@0
|
268 nNeg = sum(~Agree,1);
|
Daniel@0
|
269
|
Daniel@0
|
270 goodI = find(nPos > 0 & nNeg > 0);
|
Daniel@0
|
271 A = Agree(:,goodI);
|
Daniel@0
|
272 end
|