wolffd@0
|
1 function [Y, Loss] = separationOracleNDCG(q, D, pos, neg, k)
|
wolffd@0
|
2 %
|
wolffd@0
|
3 % [Y,Loss] = separationOracleNDCG(q, D, pos, neg, k)
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % q = index of the query point
|
wolffd@0
|
6 % D = the current distance matrix
|
wolffd@0
|
7 % pos = indices of relevant results for q
|
wolffd@0
|
8 % neg = indices of irrelevant results for q
|
wolffd@0
|
9 % k = length of the list to consider
|
wolffd@0
|
10 %
|
wolffd@0
|
11 % Y is a permutation 1:n corresponding to the maximally
|
wolffd@0
|
12 % violated constraint
|
wolffd@0
|
13 %
|
wolffd@0
|
14 % Loss is the loss for Y, in this case, 1-NDCG(Y)
|
wolffd@0
|
15
|
wolffd@0
|
16
|
wolffd@0
|
17 % First, sort the documents in descending order of W'Phi(q,x)
|
wolffd@0
|
18 % Phi = - (X(q) - X(x)) * (X(q) - X(x))'
|
wolffd@0
|
19
|
wolffd@0
|
20 % Sort the positive documents
|
wolffd@0
|
21 ScorePos = - D(pos, q);
|
wolffd@0
|
22 [Vpos, Ipos] = sort(full(ScorePos'), 'descend');
|
wolffd@0
|
23 Ipos = pos(Ipos);
|
wolffd@0
|
24
|
wolffd@0
|
25 % Sort the negative documents
|
wolffd@0
|
26 ScoreNeg = - D(neg, q);
|
wolffd@0
|
27 [Vneg, Ineg] = sort(full(ScoreNeg'), 'descend');
|
wolffd@0
|
28 Ineg = neg(Ineg);
|
wolffd@0
|
29
|
wolffd@0
|
30 % Now, solve the DP for the interleaving
|
wolffd@0
|
31
|
wolffd@0
|
32 numPos = length(pos);
|
wolffd@0
|
33 numNeg = length(neg);
|
wolffd@0
|
34 n = numPos + numNeg;
|
wolffd@0
|
35
|
wolffd@0
|
36 % From Chakrabarti (KDD08)
|
wolffd@0
|
37 k = min(k, numPos);
|
wolffd@0
|
38
|
wolffd@0
|
39 cVneg = cumsum(Vneg);
|
wolffd@0
|
40
|
wolffd@0
|
41 Discount = zeros(k, 1);
|
wolffd@0
|
42 Discount(1:2) = 1;
|
wolffd@0
|
43 Discount(3:k) = 1./ log2(3:k);
|
wolffd@0
|
44
|
wolffd@0
|
45 DCGstar = sum(Discount);
|
wolffd@0
|
46
|
wolffd@0
|
47
|
wolffd@0
|
48 % Pre-compute the loss table
|
wolffd@0
|
49 LossTab = padarray( hankel(- Discount / DCGstar), ...
|
wolffd@0
|
50 max(0, [numNeg numPos] - k), 0, 'post');
|
wolffd@0
|
51 if sum(size(LossTab) > [numNeg, numPos])
|
wolffd@0
|
52 LossTab = LossTab(1:numNeg, 1:numPos);
|
wolffd@0
|
53 end
|
wolffd@0
|
54
|
wolffd@0
|
55 % 2010-01-17 09:13:41 by Brian McFee <bmcfee@cs.ucsd.edu>
|
wolffd@0
|
56 % initialize the score table
|
wolffd@0
|
57
|
wolffd@0
|
58 pcVneg = [0 cVneg];
|
wolffd@0
|
59 % Pre-compute cellScore
|
wolffd@0
|
60 cellValue = bsxfun(@times, Vpos / (numPos * numNeg), numNeg - 2 * ((1:numNeg)-1)');
|
wolffd@0
|
61 cellValue = bsxfun(@plus, (2 * pcVneg(1:numNeg) - cVneg(end))' / (numPos * numNeg), cellValue);
|
wolffd@0
|
62 cellValue = cellValue + LossTab;
|
wolffd@0
|
63
|
wolffd@0
|
64 S = zeros(numNeg, numPos);
|
wolffd@0
|
65 P = zeros(numNeg, numPos);
|
wolffd@0
|
66
|
wolffd@0
|
67 % Initialize first column
|
wolffd@0
|
68 P(:,1) = 1;
|
wolffd@0
|
69 S(:,1) = cellValue(:,1);
|
wolffd@0
|
70
|
wolffd@0
|
71 % Initialize first row
|
wolffd@0
|
72 P(1,:) = 1;
|
wolffd@0
|
73 S(1,:) = cumsum(cellValue(1,:));
|
wolffd@0
|
74
|
wolffd@0
|
75 % For the rest, use the recurrence
|
wolffd@0
|
76
|
wolffd@0
|
77 for g = 2:numPos
|
wolffd@0
|
78 [m, pointer] = cummax(S(:,g-1));
|
wolffd@0
|
79 P(:,g) = pointer;
|
wolffd@0
|
80 S(:,g) = m' + cellValue(:,g);
|
wolffd@0
|
81 end
|
wolffd@0
|
82
|
wolffd@0
|
83 % Now reconstruct the permutation from the DP table
|
wolffd@0
|
84 Y = nan * ones(n,1);
|
wolffd@0
|
85 [m,p] = max(S(:,numPos));
|
wolffd@0
|
86
|
wolffd@0
|
87 Loss = 1 + LossTab(p,numPos);
|
wolffd@0
|
88
|
wolffd@0
|
89 NegsBefore = zeros(numPos,1);
|
wolffd@0
|
90 NegsBefore(numPos) = p-1;
|
wolffd@0
|
91
|
wolffd@0
|
92 for a = numPos:-1:2
|
wolffd@0
|
93 p = P(p,a);
|
wolffd@0
|
94 NegsBefore(a-1) = p-1;
|
wolffd@0
|
95 Loss = Loss + LossTab(p,a-1);
|
wolffd@0
|
96 end
|
wolffd@0
|
97 Y((1:numPos)' + NegsBefore) = Ipos;
|
wolffd@0
|
98 Y(isnan(Y)) = Ineg;
|
wolffd@0
|
99
|
wolffd@0
|
100 end
|