comparison toolboxes/distance_learning/mlr/mlr_test.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function Perf = mlr_test(W, test_k, Xtrain, Ytrain, Xtest, Ytest)
2 % Perf = mlr_test(W, test_k, Xtrain, Ytrain, Xtest, Ytest)
3 %
4 % W = d-by-d positive semi-definite matrix
5 % test_k = vector of k-values to use for KNN/Prec@k/NDCG
6 % Xtrain = d-by-n matrix of training data
7 % Ytrain = n-by-1 vector of training labels
8 % OR
9 % n-by-2 cell array where
10 % Y{q,1} contains relevant indices (in 1..n) for point q
11 % Y{q,2} contains irrelevant indices (in 1..n) for point q
12 % Xtest = d-by-m matrix of testing data
13 % Ytest = m-by-1 vector of training labels, or m-by-2 cell array
14 %
15 %
16 % The output structure Perf contains the mean score for:
17 % AUC, KNN, Prec@k, MAP, MRR, NDCG,
18 % as well as the effective dimensionality of W, and
19 % the best-performing k-value for KNN, Prec@k, and NDCG.
20 %
21
22 Perf = struct( ...
23 'AUC', [], ...
24 'KNN', [], ...
25 'PrecAtK', [], ...
26 'MAP', [], ...
27 'MRR', [], ...
28 'NDCG', [], ...
29 'dimensionality', [], ...
30 'KNNk', [], ...
31 'PrecAtKk', [], ...
32 'NDCGk', [] ...
33 );
34
35 [d, nTrain, nKernel] = size(Xtrain);
36 % Compute dimensionality of the learned metric
37 Perf.dimensionality = mlr_test_dimension(W, nTrain, nKernel);
38 test_k = min(test_k, nTrain);
39
40 if nargin > 5
41 % Knock out the points with no labels
42 if ~iscell(Ytest)
43 Ibad = find(isnan(Ytrain));
44 Xtrain(:,Ibad,:) = inf;
45 end
46
47 % Build the distance matrix
48 [D, I] = mlr_test_distance(W, Xtrain, Xtest);
49 else
50 % Leave-one-out validation
51
52 if nargin > 4
53 % In this case, Xtest is a subset of training indices to test on
54 testRange = Xtest;
55 else
56 testRange = 1:nTrain;
57 end
58 Xtest = Xtrain(:,testRange,:);
59 Ytest = Ytrain(testRange);
60
61 % compute self-distance
62 [D, I] = mlr_test_distance(W, Xtrain, Xtest);
63 % clear out the self-link (distance = 0)
64 I = I(2:end,:);
65 D = D(2:end,:);
66 end
67
68 nTest = length(Ytest);
69
70 % Compute label agreement
71 if ~iscell(Ytest)
72 % First, knock out the points with no label
73 Labels = Ytrain(I);
74 Agree = bsxfun(@eq, Ytest', Labels);
75
76 % We only compute KNN error if Y are labels
77 [Perf.KNN, Perf.KNNk] = mlr_test_knn(Labels, Ytest, test_k);
78 else
79 if nargin > 5
80 Agree = zeros(nTrain, nTest);
81 else
82 Agree = zeros(nTrain-1, nTest);
83 end
84 for i = 1:nTest
85 Agree(:,i) = ismember(I(:,i), Ytest{i,1});
86 end
87
88 Agree = reduceAgreement(Agree);
89 end
90
91 % Compute AUC score
92 Perf.AUC = mlr_test_auc(Agree);
93
94 % Compute MAP score
95 Perf.MAP = mlr_test_map(Agree);
96
97 % Compute MRR score
98 Perf.MRR = mlr_test_mrr(Agree);
99
100 % Compute prec@k
101 [Perf.PrecAtK, Perf.PrecAtKk] = mlr_test_preck(Agree, test_k);
102
103 % Compute NDCG score
104 [Perf.NDCG, Perf.NDCGk] = mlr_test_ndcg(Agree, test_k);
105
106 end
107
108
109 function [D,I] = mlr_test_distance(W, Xtrain, Xtest)
110
111 % CASES:
112 % Raw: W = []
113
114 % Linear, full: W = d-by-d
115 % Single Kernel, full: W = n-by-n
116 % MKL, full: W = n-by-n-by-m
117
118 % Linear, diagonal: W = d-by-1
119 % Single Kernel, diagonal: W = n-by-1
120 % MKL, diag: W = n-by-m
121 % MKL, diag-off-diag: W = m-by-m-by-n
122
123 [d, nTrain, nKernel] = size(Xtrain);
124 nTest = size(Xtest, 2);
125
126 if isempty(W)
127 % W = [] => native euclidean distances
128 D = mlr_test_distance_raw(Xtrain, Xtest);
129
130 elseif size(W,1) == d && size(W,2) == d
131 % We're in a full-projection case
132 D = setDistanceFullMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain);
133
134 elseif size(W,1) == d && size(W,2) == nKernel
135 % We're in a simple diagonal case
136 D = setDistanceDiagMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain);
137
138 else
139 % Error?
140 error('Cannot determine metric mode.');
141
142 end
143
144 D = full(D(1:nTrain, nTrain + (1:nTest)));
145 [v,I] = sort(D, 1);
146 end
147
148
149
150 function dimension = mlr_test_dimension(W, nTrain, nKernel)
151
152 % CASES:
153 % Raw: W = []
154
155 % Linear, full: W = d-by-d
156 % Single Kernel, full: W = n-by-n
157 % MKL, full: W = n-by-n-by-m
158
159 % Linear, diagonal: W = d-by-1
160 % Single Kernel, diagonal: W = n-by-1
161 % MKL, diag: W = n-by-m
162 % MKL, diag-off-diag: W = m-by-m-by-n
163
164
165 if size(W,1) == size(W,2)
166 dim = [];
167 for i = 1:nKernel
168 [v,d] = eig(0.5 * (W(:,:,i) + W(:,:,i)'));
169 dim = [dim ; abs(real(diag(d)))];
170 end
171 else
172 dim = W(:);
173 end
174
175 cd = cumsum(dim) / sum(dim);
176 dimension = find(cd >= 0.95, 1);
177 if isempty(dimension)
178 dimension = 0;
179 end
180 end
181
182 function [NDCG, NDCGk] = mlr_test_ndcg(Agree, test_k)
183
184 nTrain = size(Agree, 1);
185
186 Discount = zeros(1, nTrain);
187 Discount(1:2) = 1;
188
189 NDCG = -Inf;
190 NDCGk = 0;
191 for k = test_k
192
193 Discount(3:k) = 1 ./ log2(3:k);
194 Discount = Discount / sum(Discount);
195
196 b = mean(Discount * Agree);
197 if b > NDCG
198 NDCG = b;
199 NDCGk = k;
200 end
201 end
202 end
203
204 function [PrecAtK, PrecAtKk] = mlr_test_preck(Agree, test_k)
205
206 PrecAtK = -Inf;
207 PrecAtKk = 0;
208 for k = test_k
209 b = mean( mean( Agree(1:k, :), 1 ) );
210 if b > PrecAtK
211 PrecAtK = b;
212 PrecAtKk = k;
213 end
214 end
215 end
216
217 function [KNN, KNNk] = mlr_test_knn(Labels, Ytest, test_k)
218
219 KNN = -Inf;
220 KNNk = 0;
221 for k = test_k
222 % FIXME: 2012-02-07 16:51:59 by Brian McFee <bmcfee@cs.ucsd.edu>
223 % fix these to discount nans
224
225 b = mean( mode( Labels(1:k,:), 1 ) == Ytest');
226 if b > KNN
227 KNN = b;
228 KNNk = k;
229 end
230 end
231 end
232
233 function MAP = mlr_test_map(Agree);
234
235 nTrain = size(Agree, 1);
236 MAP = bsxfun(@ldivide, (1:nTrain)', cumsum(Agree, 1));
237 MAP = mean(sum(MAP .* Agree, 1)./ sum(Agree, 1));
238 end
239
240 function MRR = mlr_test_mrr(Agree);
241
242 nTest = size(Agree, 2);
243 MRR = 0;
244 for i = 1:nTest
245 MRR = MRR + (1 / find(Agree(:,i), 1));
246 end
247 MRR = MRR / nTest;
248 end
249
250 function AUC = mlr_test_auc(Agree)
251
252 TPR = cumsum(Agree, 1);
253 FPR = cumsum(~Agree, 1);
254
255 numPos = TPR(end,:);
256 numNeg = FPR(end,:);
257
258 TPR = mean(bsxfun(@rdivide, TPR, numPos),2);
259 FPR = mean(bsxfun(@rdivide, FPR, numNeg),2);
260 AUC = diff([0 FPR']) * TPR;
261 end
262
263
264 function D = mlr_test_distance_raw(Xtrain, Xtest)
265
266 [d, nTrain, nKernel] = size(Xtrain);
267 nTest = size(Xtest, 2);
268
269 % Not in kernel mode, compute distances directly
270 D = 0;
271 for i = 1:nKernel
272 D = D + setDistanceDiag([Xtrain(:,:,i) Xtest(:,:,i)], ones(d,1), ...
273 nTrain + (1:nTest), 1:nTrain);
274 end
275 end
276
277 function A = reduceAgreement(Agree)
278 nPos = sum(Agree,1);
279 nNeg = sum(~Agree,1);
280
281 goodI = find(nPos > 0 & nNeg > 0);
282 A = Agree(:,goodI);
283 end