Mercurial > hg > camir-aes2014
diff toolboxes/distance_learning/mlr/mlr_test.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/distance_learning/mlr/mlr_test.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,283 @@ +function Perf = mlr_test(W, test_k, Xtrain, Ytrain, Xtest, Ytest) +% Perf = mlr_test(W, test_k, Xtrain, Ytrain, Xtest, Ytest) +% +% W = d-by-d positive semi-definite matrix +% test_k = vector of k-values to use for KNN/Prec@k/NDCG +% Xtrain = d-by-n matrix of training data +% Ytrain = n-by-1 vector of training labels +% OR +% n-by-2 cell array where +% Y{q,1} contains relevant indices (in 1..n) for point q +% Y{q,2} contains irrelevant indices (in 1..n) for point q +% Xtest = d-by-m matrix of testing data +% Ytest = m-by-1 vector of training labels, or m-by-2 cell array +% +% +% The output structure Perf contains the mean score for: +% AUC, KNN, Prec@k, MAP, MRR, NDCG, +% as well as the effective dimensionality of W, and +% the best-performing k-value for KNN, Prec@k, and NDCG. +% + + Perf = struct( ... + 'AUC', [], ... + 'KNN', [], ... + 'PrecAtK', [], ... + 'MAP', [], ... + 'MRR', [], ... + 'NDCG', [], ... + 'dimensionality', [], ... + 'KNNk', [], ... + 'PrecAtKk', [], ... + 'NDCGk', [] ... + ); + + [d, nTrain, nKernel] = size(Xtrain); + % Compute dimensionality of the learned metric + Perf.dimensionality = mlr_test_dimension(W, nTrain, nKernel); + test_k = min(test_k, nTrain); + + if nargin > 5 + % Knock out the points with no labels + if ~iscell(Ytest) + Ibad = find(isnan(Ytrain)); + Xtrain(:,Ibad,:) = inf; + end + + % Build the distance matrix + [D, I] = mlr_test_distance(W, Xtrain, Xtest); + else + % Leave-one-out validation + + if nargin > 4 + % In this case, Xtest is a subset of training indices to test on + testRange = Xtest; + else + testRange = 1:nTrain; + end + Xtest = Xtrain(:,testRange,:); + Ytest = Ytrain(testRange); + + % compute self-distance + [D, I] = mlr_test_distance(W, Xtrain, Xtest); + % clear out the self-link (distance = 0) + I = I(2:end,:); + D = D(2:end,:); + end + + nTest = length(Ytest); + + % Compute label agreement + if ~iscell(Ytest) + % First, knock out the points with no label + Labels = Ytrain(I); + Agree = bsxfun(@eq, Ytest', Labels); + + % We only compute KNN error if Y are labels + [Perf.KNN, Perf.KNNk] = mlr_test_knn(Labels, Ytest, test_k); + else + if nargin > 5 + Agree = zeros(nTrain, nTest); + else + Agree = zeros(nTrain-1, nTest); + end + for i = 1:nTest + Agree(:,i) = ismember(I(:,i), Ytest{i,1}); + end + + Agree = reduceAgreement(Agree); + end + + % Compute AUC score + Perf.AUC = mlr_test_auc(Agree); + + % Compute MAP score + Perf.MAP = mlr_test_map(Agree); + + % Compute MRR score + Perf.MRR = mlr_test_mrr(Agree); + + % Compute prec@k + [Perf.PrecAtK, Perf.PrecAtKk] = mlr_test_preck(Agree, test_k); + + % Compute NDCG score + [Perf.NDCG, Perf.NDCGk] = mlr_test_ndcg(Agree, test_k); + +end + + +function [D,I] = mlr_test_distance(W, Xtrain, Xtest) + + % CASES: + % Raw: W = [] + + % Linear, full: W = d-by-d + % Single Kernel, full: W = n-by-n + % MKL, full: W = n-by-n-by-m + + % Linear, diagonal: W = d-by-1 + % Single Kernel, diagonal: W = n-by-1 + % MKL, diag: W = n-by-m + % MKL, diag-off-diag: W = m-by-m-by-n + + [d, nTrain, nKernel] = size(Xtrain); + nTest = size(Xtest, 2); + + if isempty(W) + % W = [] => native euclidean distances + D = mlr_test_distance_raw(Xtrain, Xtest); + + elseif size(W,1) == d && size(W,2) == d + % We're in a full-projection case + D = setDistanceFullMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain); + + elseif size(W,1) == d && size(W,2) == nKernel + % We're in a simple diagonal case + D = setDistanceDiagMKL([Xtrain Xtest], W, nTrain + (1:nTest), 1:nTrain); + + else + % Error? + error('Cannot determine metric mode.'); + + end + + D = full(D(1:nTrain, nTrain + (1:nTest))); + [v,I] = sort(D, 1); +end + + + +function dimension = mlr_test_dimension(W, nTrain, nKernel) + + % CASES: + % Raw: W = [] + + % Linear, full: W = d-by-d + % Single Kernel, full: W = n-by-n + % MKL, full: W = n-by-n-by-m + + % Linear, diagonal: W = d-by-1 + % Single Kernel, diagonal: W = n-by-1 + % MKL, diag: W = n-by-m + % MKL, diag-off-diag: W = m-by-m-by-n + + + if size(W,1) == size(W,2) + dim = []; + for i = 1:nKernel + [v,d] = eig(0.5 * (W(:,:,i) + W(:,:,i)')); + dim = [dim ; abs(real(diag(d)))]; + end + else + dim = W(:); + end + + cd = cumsum(dim) / sum(dim); + dimension = find(cd >= 0.95, 1); + if isempty(dimension) + dimension = 0; + end +end + +function [NDCG, NDCGk] = mlr_test_ndcg(Agree, test_k) + + nTrain = size(Agree, 1); + + Discount = zeros(1, nTrain); + Discount(1:2) = 1; + + NDCG = -Inf; + NDCGk = 0; + for k = test_k + + Discount(3:k) = 1 ./ log2(3:k); + Discount = Discount / sum(Discount); + + b = mean(Discount * Agree); + if b > NDCG + NDCG = b; + NDCGk = k; + end + end +end + +function [PrecAtK, PrecAtKk] = mlr_test_preck(Agree, test_k) + + PrecAtK = -Inf; + PrecAtKk = 0; + for k = test_k + b = mean( mean( Agree(1:k, :), 1 ) ); + if b > PrecAtK + PrecAtK = b; + PrecAtKk = k; + end + end +end + +function [KNN, KNNk] = mlr_test_knn(Labels, Ytest, test_k) + + KNN = -Inf; + KNNk = 0; + for k = test_k + % FIXME: 2012-02-07 16:51:59 by Brian McFee <bmcfee@cs.ucsd.edu> + % fix these to discount nans + + b = mean( mode( Labels(1:k,:), 1 ) == Ytest'); + if b > KNN + KNN = b; + KNNk = k; + end + end +end + +function MAP = mlr_test_map(Agree); + + nTrain = size(Agree, 1); + MAP = bsxfun(@ldivide, (1:nTrain)', cumsum(Agree, 1)); + MAP = mean(sum(MAP .* Agree, 1)./ sum(Agree, 1)); +end + +function MRR = mlr_test_mrr(Agree); + + nTest = size(Agree, 2); + MRR = 0; + for i = 1:nTest + MRR = MRR + (1 / find(Agree(:,i), 1)); + end + MRR = MRR / nTest; +end + +function AUC = mlr_test_auc(Agree) + + TPR = cumsum(Agree, 1); + FPR = cumsum(~Agree, 1); + + numPos = TPR(end,:); + numNeg = FPR(end,:); + + TPR = mean(bsxfun(@rdivide, TPR, numPos),2); + FPR = mean(bsxfun(@rdivide, FPR, numNeg),2); + AUC = diff([0 FPR']) * TPR; +end + + +function D = mlr_test_distance_raw(Xtrain, Xtest) + + [d, nTrain, nKernel] = size(Xtrain); + nTest = size(Xtest, 2); + + % Not in kernel mode, compute distances directly + D = 0; + for i = 1:nKernel + D = D + setDistanceDiag([Xtrain(:,:,i) Xtest(:,:,i)], ones(d,1), ... + nTrain + (1:nTest), 1:nTrain); + end +end + +function A = reduceAgreement(Agree) + nPos = sum(Agree,1); + nNeg = sum(~Agree,1); + + goodI = find(nPos > 0 & nNeg > 0); + A = Agree(:,goodI); +end