wolffd@0: function [W, Xi, Diagnostics] = mlr_solver(C, Margins, W, K) wolffd@0: % [W, Xi, D] = mlr_solver(C, Margins, W, X) wolffd@0: % wolffd@0: % C >= 0 Slack trade-off parameter wolffd@0: % Margins = array of mean margin values wolffd@0: % W = initial value for W wolffd@0: % X = data matrix (or kernel) wolffd@0: % wolffd@0: % W (output) = the learned metric wolffd@0: % Xi = 1-slack wolffd@0: % D = diagnostics wolffd@0: wolffd@0: global DEBUG REG FEASIBLE LOSS; wolffd@0: wolffd@0: %%% wolffd@0: % Initialize the gradient directions for each constraint wolffd@0: % wolffd@0: global PsiR; wolffd@0: global PsiClock; wolffd@0: wolffd@0: numConstraints = length(PsiR); wolffd@0: wolffd@0: %%% wolffd@0: % Some optimization details wolffd@0: wolffd@0: % Armijo rule number wolffd@0: armijo = 1e-5; wolffd@0: wolffd@0: % Initial learning rate wolffd@0: lambda0 = 1e-4; wolffd@0: wolffd@0: % Increase/decrease after each iteration wolffd@0: lambdaup = ((1+sqrt(5))/2)^(1/3); wolffd@0: lambdadown = ((1+sqrt(5))/2)^(-1); wolffd@0: wolffd@0: % Maximum steps to take wolffd@0: maxsteps = 1e4; wolffd@0: wolffd@0: % Size of convergence window wolffd@0: frame = 10; wolffd@0: wolffd@0: % Convergence threshold wolffd@0: convthresh = 1e-5; wolffd@0: wolffd@0: % Maximum number of backtracks wolffd@0: maxbackcount = 100; wolffd@0: wolffd@0: wolffd@0: Diagnostics = struct( 'f', [], ... wolffd@0: 'num_steps', [], ... wolffd@0: 'stop_criteria', []); wolffd@0: wolffd@0: % Repeat until convergence: wolffd@0: % 1) Calculate f wolffd@0: % 2) Take a gradient step wolffd@0: % 3) Project W back onto PSD wolffd@0: wolffd@0: %%% wolffd@0: % Initialze wolffd@0: % wolffd@0: wolffd@0: f = inf; wolffd@0: dfdW = zeros(size(W)); wolffd@0: lambda = lambda0; wolffd@0: F = Inf * ones(1,maxsteps+1); wolffd@0: XiR = zeros(numConstraints,1); wolffd@0: wolffd@0: wolffd@0: stepcount = -1; wolffd@0: backcount = 0; wolffd@0: done = 0; wolffd@0: wolffd@0: wolffd@0: while 1 wolffd@0: fold = f; wolffd@0: Wold = W; wolffd@0: wolffd@0: %%% wolffd@0: % Count constraint violations and build the gradient wolffd@0: dbprint(3, 'Computing gradient'); wolffd@0: wolffd@0: %%% wolffd@0: % Calculate constraint violations wolffd@0: % wolffd@0: XiR(:) = 0; wolffd@0: for R = numConstraints:-1:1 wolffd@0: XiR(R) = LOSS(W, PsiR{R}, Margins(R), 0); wolffd@0: end wolffd@0: wolffd@0: %%% wolffd@0: % Find the most active constraint wolffd@0: % wolffd@0: [Xi, mgrad] = max(XiR); wolffd@0: Xi = max(Xi, 0); wolffd@0: wolffd@0: PsiClock(mgrad) = 0; wolffd@0: wolffd@0: %%% wolffd@0: % Evaluate f wolffd@0: % wolffd@0: wolffd@0: f = C * max(Xi, 0) ... wolffd@0: + REG(W, K, 0); wolffd@0: wolffd@0: %%% wolffd@0: % Test for convergence wolffd@0: % wolffd@0: objDiff = fold - f; wolffd@0: wolffd@0: if objDiff > armijo * lambda * (dfdW(:)' * dfdW(:)) wolffd@0: wolffd@0: stepcount = stepcount + 1; wolffd@0: wolffd@0: F(stepcount+1) = f; wolffd@0: wolffd@0: sdiff = inf; wolffd@0: if stepcount >= frame; wolffd@0: sdiff = log(F(stepcount+1-frame) / f); wolffd@0: end wolffd@0: wolffd@0: if stepcount >= maxsteps wolffd@0: done = 1; wolffd@0: stopcriteria = 'MAXSTEPS'; wolffd@0: elseif sdiff <= convthresh wolffd@0: done = 1; wolffd@0: stopcriteria = 'CONVERGENCE'; wolffd@0: else wolffd@0: %%% wolffd@0: % If it's positive, add the corresponding gradient wolffd@0: dfdW = C * LOSS(W, PsiR{mgrad}, Margins(mgrad), 1) ... wolffd@0: + REG(W, K, 1); wolffd@0: end wolffd@0: wolffd@0: dbprint(3, 'Lambda up!'); wolffd@0: Wold = W; wolffd@0: lambda = lambdaup * lambda; wolffd@0: backcount = 0; wolffd@0: wolffd@0: else wolffd@0: % Backtracking time, drop the learning rate wolffd@0: if backcount >= maxbackcount wolffd@0: W = Wold; wolffd@0: f = fold; wolffd@0: done = 1; wolffd@0: wolffd@0: stopcriteria = 'BACKTRACK'; wolffd@0: else wolffd@0: dbprint(3, 'Lambda down!'); wolffd@0: lambda = lambdadown * lambda; wolffd@0: backcount = backcount+1; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: %%% wolffd@0: % Take a gradient step wolffd@0: % wolffd@0: W = W - lambda * dfdW; wolffd@0: wolffd@0: %%% wolffd@0: % Project back onto the feasible set wolffd@0: % wolffd@0: wolffd@0: dbprint(3, 'Projecting onto feasible set'); wolffd@0: W = FEASIBLE(W); wolffd@0: if done wolffd@0: break; wolffd@0: end; wolffd@0: wolffd@0: end wolffd@0: wolffd@0: Diagnostics.f = F(2:(stepcount+1))'; wolffd@0: Diagnostics.stop_criteria = stopcriteria; wolffd@0: Diagnostics.num_steps = stepcount; wolffd@0: wolffd@0: dbprint(1, '\t%s after %d steps.\n', stopcriteria, stepcount); wolffd@0: end wolffd@0: