annotate toolboxes/distance_learning/mlr/mlr_test.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function Perf = mlr_test(W, test_k, Xtrain, Ytrain, Xtest, Ytest, Testnorm)
Daniel@0 2 % Perf = mlr_test(W, test_k, Xtrain, Ytrain, Xtest, Ytest)
Daniel@0 3 %
Daniel@0 4 % W = d-by-d positive semi-definite matrix
Daniel@0 5 % test_k = vector of k-values to use for KNN/Prec@k/NDCG
Daniel@0 6 % Xtrain = d-by-n matrix of training data
Daniel@0 7 % Ytrain = n-by-1 vector of training labels
Daniel@0 8 % OR
Daniel@0 9 % n-by-2 cell array where
Daniel@0 10 % Y{q,1} contains relevant indices (in 1..n) for point q
Daniel@0 11 % Y{q,2} contains irrelevant indices (in 1..n) for point q
Daniel@0 12 % Xtest = d-by-m matrix of testing data
Daniel@0 13 % Ytest = m-by-1 vector of training labels, or m-by-2 cell array
Daniel@0 14 %
Daniel@0 15 %
Daniel@0 16 % The output structure Perf contains the mean score for:
Daniel@0 17 % AUC, KNN, Prec@k, MAP, MRR, NDCG,
Daniel@0 18 % as well as the effective dimensionality of W, and
Daniel@0 19 % the best-performing k-value for KNN, Prec@k, and NDCG.
Daniel@0 20 %
Daniel@0 21
Daniel@0 22 % addpath('cuttingPlane', 'distance', 'feasible', 'initialize', 'loss', ...
Daniel@0 23 % 'metricPsi', 'regularize', 'separationOracle', 'util');
Daniel@0 24
Daniel@0 25 Perf = struct( ...
Daniel@0 26 'AUC', [], ...
Daniel@0 27 'KNN', [], ...
Daniel@0 28 'PrecAtK', [], ...
Daniel@0 29 'MAP', [], ...
Daniel@0 30 'MRR', [], ...
Daniel@0 31 'NDCG', [], ...
Daniel@0 32 'dimensionality', [], ...
Daniel@0 33 'KNNk', [], ...
Daniel@0 34 'PrecAtKk', [], ...
Daniel@0 35 'NDCGk', [] ...
Daniel@0 36 );
Daniel@0 37
Daniel@0 38 [d, nTrain, nKernel] = size(Xtrain);
Daniel@0 39 nTest = length(Ytest);
Daniel@0 40 test_k = min(test_k, nTrain);
Daniel@0 41
Daniel@0 42 if nargin < 7
Daniel@0 43 Testnorm = [];
Daniel@0 44 end
Daniel@0 45
Daniel@0 46 % Compute dimensionality of the learned metric
Daniel@0 47 Perf.dimensionality = mlr_test_dimension(W, nTrain, nKernel);
Daniel@0 48
Daniel@0 49
Daniel@0 50 % Build the distance matrix
Daniel@0 51 [D, I] = mlr_test_distance(W, Xtrain, Xtest, Testnorm);
Daniel@0 52
Daniel@0 53
Daniel@0 54 % Compute label agreement
Daniel@0 55 if ~iscell(Ytest)
Daniel@0 56 Labels = Ytrain(I);
Daniel@0 57 Agree = bsxfun(@eq, Ytest', Labels);
Daniel@0 58
Daniel@0 59 % We only compute KNN error if Y are labels
Daniel@0 60 [Perf.KNN, Perf.KNNk] = mlr_test_knn(Labels, Ytest, test_k);
Daniel@0 61 else
Daniel@0 62 Agree = zeros(nTrain, nTest);
Daniel@0 63 for i = 1:nTest
Daniel@0 64 Agree(:,i) = ismember(I(:,i), Ytest{i,1});
Daniel@0 65 end
Daniel@0 66 Agree = reduceAgreement(Agree);
Daniel@0 67 end
Daniel@0 68
Daniel@0 69 % Compute AUC score
Daniel@0 70 Perf.AUC = mlr_test_auc(Agree);
Daniel@0 71
Daniel@0 72 % Compute MAP score
Daniel@0 73 Perf.MAP = mlr_test_map(Agree);
Daniel@0 74
Daniel@0 75 % Compute MRR score
Daniel@0 76 Perf.MRR = mlr_test_mrr(Agree);
Daniel@0 77
Daniel@0 78 % Compute prec@k
Daniel@0 79 [Perf.PrecAtK, Perf.PrecAtKk] = mlr_test_preck(Agree, test_k);
Daniel@0 80
Daniel@0 81 % Compute NDCG score
Daniel@0 82 [Perf.NDCG, Perf.NDCGk] = mlr_test_ndcg(Agree, test_k);
Daniel@0 83
Daniel@0 84 end
Daniel@0 85
Daniel@0 86
Daniel@0 87 function [D,I] = mlr_test_distance(W, Xtrain, Xtest, Testnorm)
Daniel@0 88
Daniel@0 89 % CASES:
Daniel@0 90 % Raw: W = []
Daniel@0 91
Daniel@0 92 % Linear, full: W = d-by-d
Daniel@0 93 % Single Kernel, full: W = n-by-n
Daniel@0 94 % MKL, full: W = n-by-n-by-m
Daniel@0 95
Daniel@0 96 % Linear, diagonal: W = d-by-1
Daniel@0 97 % Single Kernel, diagonal: W = n-by-1
Daniel@0 98 % MKL, diag: W = n-by-m
Daniel@0 99 % MKL, diag-off-diag: W = m-by-m-by-n
Daniel@0 100
Daniel@0 101 [d, nTrain, nKernel] = size(Xtrain);
Daniel@0 102 nTest = size(Xtest, 2);
Daniel@0 103
Daniel@0 104 if isempty(W)
Daniel@0 105 % W = [] => native euclidean distances
Daniel@0 106 D = mlr_test_distance_raw(Xtrain, Xtest, Testnorm);
Daniel@0 107
Daniel@0 108 elseif size(W,1) == d && size(W,2) == d
Daniel@0 109 % We're in a full-projection case
Daniel@0 110 D = setDistanceFullMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain);
Daniel@0 111
Daniel@0 112 elseif size(W,1) == d && size(W,2) == nKernel
Daniel@0 113 % We're in a simple diagonal case
Daniel@0 114 D = setDistanceDiagMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain);
Daniel@0 115
Daniel@0 116 elseif size(W,1) == nKernel && size(W,2) == nKernel && size(W,3) == nTrain
Daniel@0 117 % We're in DOD mode
Daniel@0 118 D = setDistanceDODMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain);
Daniel@0 119
Daniel@0 120 else
Daniel@0 121 % Error?
Daniel@0 122 error('Cannot determine metric mode.');
Daniel@0 123
Daniel@0 124 end
Daniel@0 125
Daniel@0 126 D = full(D(1:nTrain, nTrain + (1:nTest)));
Daniel@0 127 [v,I] = sort(D, 1);
Daniel@0 128 end
Daniel@0 129
Daniel@0 130
Daniel@0 131
Daniel@0 132 function dimension = mlr_test_dimension(W, nTrain, nKernel)
Daniel@0 133
Daniel@0 134 % CASES:
Daniel@0 135 % Raw: W = []
Daniel@0 136
Daniel@0 137 % Linear, full: W = d-by-d
Daniel@0 138 % Single Kernel, full: W = n-by-n
Daniel@0 139 % MKL, full: W = n-by-n-by-m
Daniel@0 140
Daniel@0 141 % Linear, diagonal: W = d-by-1
Daniel@0 142 % Single Kernel, diagonal: W = n-by-1
Daniel@0 143 % MKL, diag: W = n-by-m
Daniel@0 144 % MKL, diag-off-diag: W = m-by-m-by-n
Daniel@0 145
Daniel@0 146
Daniel@0 147 if size(W,2) == nTrain
Daniel@0 148 dim = [];
Daniel@0 149 for i = 1:nKernel
Daniel@0 150 [v,d] = eig(0.5 * (W(:,:,i) + W(:,:,i)'));
Daniel@0 151 dim = [dim ; abs(real(diag(d)))];
Daniel@0 152 end
Daniel@0 153 else
Daniel@0 154 dim = W(:);
Daniel@0 155 end
Daniel@0 156
Daniel@0 157 cd = cumsum(dim) / sum(dim);
Daniel@0 158 dimension = find(cd >= 0.95, 1);
Daniel@0 159 if isempty(dimension)
Daniel@0 160 dimension = 0;
Daniel@0 161 end
Daniel@0 162 end
Daniel@0 163
Daniel@0 164 function [NDCG, NDCGk] = mlr_test_ndcg(Agree, test_k)
Daniel@0 165
Daniel@0 166 nTrain = size(Agree, 1);
Daniel@0 167
Daniel@0 168 Discount = zeros(1, nTrain);
Daniel@0 169 Discount(1:2) = 1;
Daniel@0 170
Daniel@0 171 NDCG = -Inf;
Daniel@0 172 NDCGk = 0;
Daniel@0 173 for k = test_k
Daniel@0 174
Daniel@0 175 Discount(3:k) = 1 ./ log2(3:k);
Daniel@0 176 Discount = Discount / sum(Discount);
Daniel@0 177
Daniel@0 178 b = mean(Discount * Agree);
Daniel@0 179 if b > NDCG
Daniel@0 180 NDCG = b;
Daniel@0 181 NDCGk = k;
Daniel@0 182 end
Daniel@0 183 end
Daniel@0 184 end
Daniel@0 185
Daniel@0 186 function [PrecAtK, PrecAtKk] = mlr_test_preck(Agree, test_k)
Daniel@0 187
Daniel@0 188 PrecAtK = -Inf;
Daniel@0 189 PrecAtKk = 0;
Daniel@0 190 for k = test_k
Daniel@0 191 b = mean( mean( Agree(1:k, :), 1 ) );
Daniel@0 192 if b > PrecAtK
Daniel@0 193 PrecAtK = b;
Daniel@0 194 PrecAtKk = k;
Daniel@0 195 end
Daniel@0 196 end
Daniel@0 197 end
Daniel@0 198
Daniel@0 199 function [KNN, KNNk] = mlr_test_knn(Labels, Ytest, test_k)
Daniel@0 200
Daniel@0 201 KNN = -Inf;
Daniel@0 202 KNNk = 0;
Daniel@0 203 for k = test_k
Daniel@0 204 b = mean( mode( Labels(1:k,:), 1 ) == Ytest');
Daniel@0 205 if b > KNN
Daniel@0 206 KNN = b;
Daniel@0 207 KNNk = k;
Daniel@0 208 end
Daniel@0 209 end
Daniel@0 210 end
Daniel@0 211
Daniel@0 212 function MAP = mlr_test_map(Agree);
Daniel@0 213
Daniel@0 214 nTrain = size(Agree, 1);
Daniel@0 215 MAP = bsxfun(@ldivide, (1:nTrain)', cumsum(Agree, 1));
Daniel@0 216 MAP = mean(sum(MAP .* Agree, 1)./ sum(Agree, 1));
Daniel@0 217 end
Daniel@0 218
Daniel@0 219 function MRR = mlr_test_mrr(Agree);
Daniel@0 220
Daniel@0 221 nTest = size(Agree, 2);
Daniel@0 222 MRR = 0;
Daniel@0 223 for i = 1:nTest
Daniel@0 224 MRR = MRR + (1 / find(Agree(:,i), 1));
Daniel@0 225 end
Daniel@0 226 MRR = MRR / nTest;
Daniel@0 227 end
Daniel@0 228
Daniel@0 229 function AUC = mlr_test_auc(Agree)
Daniel@0 230
Daniel@0 231 TPR = cumsum(Agree, 1);
Daniel@0 232 FPR = cumsum(~Agree, 1);
Daniel@0 233
Daniel@0 234 numPos = TPR(end,:);
Daniel@0 235 numNeg = FPR(end,:);
Daniel@0 236
Daniel@0 237 TPR = mean(bsxfun(@rdivide, TPR, numPos),2);
Daniel@0 238 FPR = mean(bsxfun(@rdivide, FPR, numNeg),2);
Daniel@0 239 AUC = diff([0 FPR']) * TPR;
Daniel@0 240 end
Daniel@0 241
Daniel@0 242
Daniel@0 243 function D = mlr_test_distance_raw(Xtrain, Xtest, Testnorm)
Daniel@0 244
Daniel@0 245 [d, nTrain, nKernel] = size(Xtrain);
Daniel@0 246 nTest = size(Xtest, 2);
Daniel@0 247
Daniel@0 248 if isempty(Testnorm)
Daniel@0 249 % Not in kernel mode, compute distances directly
Daniel@0 250 D = 0;
Daniel@0 251 for i = 1:nKernel
Daniel@0 252 D = D + setDistanceDiag([Xtrain(:,:,i) Xtest(:,:,i)], ones(d,1), ...
Daniel@0 253 nTrain + (1:nTest), 1:nTrain);
Daniel@0 254 end
Daniel@0 255 else
Daniel@0 256 % We are in kernel mode
Daniel@0 257 D = sparse(nTrain + nTest, nTrain + nTest);
Daniel@0 258 for i = 1:nKernel
Daniel@0 259 Trainnorm = diag(Xtrain(:,:,i));
Daniel@0 260 D(1:nTrain, nTrain + (1:nTest)) = D(1:nTrain, nTrain + (1:nTest)) ...
Daniel@0 261 + bsxfun(@plus, Trainnorm, bsxfun(@plus, Testnorm(:,i)', -2 * Xtest(:,:,i)));
Daniel@0 262 end
Daniel@0 263 end
Daniel@0 264 end
Daniel@0 265
Daniel@0 266 function A = reduceAgreement(Agree)
Daniel@0 267 nPos = sum(Agree,1);
Daniel@0 268 nNeg = sum(~Agree,1);
Daniel@0 269
Daniel@0 270 goodI = find(nPos > 0 & nNeg > 0);
Daniel@0 271 A = Agree(:,goodI);
Daniel@0 272 end