Daniel@0: function [W, Xi, Diagnostics] = mlr_solver(C, Margins, W, K) Daniel@0: % [W, Xi, D] = mlr_solver(C, Margins, W, X) Daniel@0: % Daniel@0: % C >= 0 Slack trade-off parameter Daniel@0: % Margins = array of mean margin values Daniel@0: % W = initial value for W Daniel@0: % X = data matrix (or kernel) Daniel@0: % Daniel@0: % W (output) = the learned metric Daniel@0: % Xi = 1-slack Daniel@0: % D = diagnostics Daniel@0: Daniel@0: global DEBUG REG FEASIBLE LOSS; Daniel@0: Daniel@0: %%% Daniel@0: % Initialize the gradient directions for each constraint Daniel@0: % Daniel@0: global PsiR; Daniel@0: global PsiClock; Daniel@0: Daniel@0: numConstraints = length(PsiR); Daniel@0: Daniel@0: %%% Daniel@0: % Some optimization details Daniel@0: Daniel@0: % Armijo rule number Daniel@0: armijo = 1e-5; Daniel@0: Daniel@0: % Initial learning rate Daniel@0: lambda0 = 1e-6; Daniel@0: Daniel@0: % Increase/decrease after each iteration Daniel@0: lambdaup = ((1+sqrt(5))/2)^(1/3); Daniel@0: lambdadown = ((1+sqrt(5))/2)^(-1); Daniel@0: Daniel@0: % Maximum steps to take Daniel@0: maxsteps = 1e4; Daniel@0: Daniel@0: % Size of convergence window Daniel@0: frame = 10; Daniel@0: Daniel@0: % Convergence threshold Daniel@0: convthresh = 1e-5; Daniel@0: Daniel@0: % Maximum number of backtracks Daniel@0: maxbackcount = 100; Daniel@0: Daniel@0: Daniel@0: Diagnostics = struct( 'f', [], ... Daniel@0: 'num_steps', [], ... Daniel@0: 'stop_criteria', []); Daniel@0: Daniel@0: % Repeat until convergence: Daniel@0: % 1) Calculate f Daniel@0: % 2) Take a gradient step Daniel@0: % 3) Project W back onto PSD Daniel@0: Daniel@0: %%% Daniel@0: % Initialze Daniel@0: % Daniel@0: Daniel@0: f = inf; Daniel@0: dfdW = zeros(size(W)); Daniel@0: lambda = lambda0; Daniel@0: F = Inf * ones(1,maxsteps+1); Daniel@0: XiR = zeros(numConstraints,1); Daniel@0: Daniel@0: Daniel@0: stepcount = -1; Daniel@0: backcount = 0; Daniel@0: done = 0; Daniel@0: Daniel@0: Daniel@0: while 1 Daniel@0: fold = f; Daniel@0: Wold = W; Daniel@0: Daniel@0: %%% Daniel@0: % Count constraint violations and build the gradient Daniel@0: dbprint(3, 'Computing gradient'); Daniel@0: Daniel@0: %%% Daniel@0: % Calculate constraint violations Daniel@0: % Daniel@0: XiR(:) = 0; Daniel@0: for R = numConstraints:-1:1 Daniel@0: XiR(R) = LOSS(W, PsiR{R}, Margins(R), 0); Daniel@0: end Daniel@0: Daniel@0: %%% Daniel@0: % Find the most active constraint Daniel@0: % Daniel@0: [Xi, mgrad] = max(XiR); Daniel@0: Xi = max(Xi, 0); Daniel@0: Daniel@0: PsiClock(mgrad) = 0; Daniel@0: Daniel@0: %%% Daniel@0: % Evaluate f Daniel@0: % Daniel@0: Daniel@0: f = C * max(Xi, 0) ... Daniel@0: + REG(W, K, 0); Daniel@0: Daniel@0: %%% Daniel@0: % Test for convergence Daniel@0: % Daniel@0: objDiff = fold - f; Daniel@0: Daniel@0: if objDiff > armijo * lambda * (dfdW(:)' * dfdW(:)) Daniel@0: Daniel@0: stepcount = stepcount + 1; Daniel@0: Daniel@0: F(stepcount+1) = f; Daniel@0: Daniel@0: sdiff = inf; Daniel@0: if stepcount >= frame; Daniel@0: sdiff = log(F(stepcount+1-frame) / f); Daniel@0: end Daniel@0: Daniel@0: if stepcount >= maxsteps Daniel@0: done = 1; Daniel@0: stopcriteria = 'MAXSTEPS'; Daniel@0: elseif sdiff <= convthresh Daniel@0: done = 1; Daniel@0: stopcriteria = 'CONVERGENCE'; Daniel@0: else Daniel@0: %%% Daniel@0: % If it's positive, add the corresponding gradient Daniel@0: dfdW = C * LOSS(W, PsiR{mgrad}, Margins(mgrad), 1) ... Daniel@0: + REG(W, K, 1); Daniel@0: end Daniel@0: Daniel@0: dbprint(3, 'Lambda up!'); Daniel@0: Wold = W; Daniel@0: lambda = lambdaup * lambda; Daniel@0: backcount = 0; Daniel@0: Daniel@0: else Daniel@0: % Backtracking time, drop the learning rate Daniel@0: if backcount >= maxbackcount Daniel@0: W = Wold; Daniel@0: f = fold; Daniel@0: done = 1; Daniel@0: Daniel@0: stopcriteria = 'BACKTRACK'; Daniel@0: else Daniel@0: dbprint(3, 'Lambda down!'); Daniel@0: lambda = lambdadown * lambda; Daniel@0: backcount = backcount+1; Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: %%% Daniel@0: % Take a gradient step Daniel@0: % Daniel@0: W = W - lambda * dfdW; Daniel@0: Daniel@0: %%% Daniel@0: % Project back onto the feasible set Daniel@0: % Daniel@0: Daniel@0: dbprint(3, 'Projecting onto feasible set'); Daniel@0: W = FEASIBLE(W); Daniel@0: if done Daniel@0: break; Daniel@0: end; Daniel@0: Daniel@0: end Daniel@0: Daniel@0: Diagnostics.f = F(2:(stepcount+1))'; Daniel@0: Diagnostics.stop_criteria = stopcriteria; Daniel@0: Diagnostics.num_steps = stepcount; Daniel@0: Daniel@0: dbprint(2, '\t%s after %d steps.\n', stopcriteria, stepcount); Daniel@0: end Daniel@0: