wolffd@0: function [W, Xi, Diagnostics] = rmlr_admm(C, K, Delta, H, Q, lam) wolffd@0: % [W, Xi, D] = mlr_admm(C, Delta, W, X) wolffd@0: % wolffd@0: % C >= 0 Slack trade-off parameter wolffd@0: % K = data matrix (or kernel) wolffd@0: % Delta = array of mean margin values wolffd@0: % H = structural kernel matrix wolffd@0: % Q = kernel-structure interaction vector wolffd@0: % wolffd@0: % W (output) = the learned metric wolffd@0: % Xi = 1-slack wolffd@0: % D = diagnostics wolffd@0: wolffd@0: global DEBUG REG FEASIBLE LOSS INIT STRUCTKERNEL DUALW THRESH; wolffd@0: wolffd@0: %%% wolffd@0: % Initialize the gradient directions for each constraint wolffd@0: % wolffd@0: global PsiR; wolffd@0: wolffd@0: global ADMM_Z ADMM_V ADMM_UW ADMM_UV; wolffd@0: wolffd@0: global ADMM_STEPS; wolffd@0: wolffd@0: global RHO; wolffd@0: wolffd@0: numConstraints = length(PsiR); wolffd@0: wolffd@0: Diagnostics = struct( 'f', [], ... wolffd@0: 'num_steps', [], ... wolffd@0: 'stop_criteria', []); wolffd@0: wolffd@0: wolffd@0: % Convergence settings wolffd@0: if ~isempty(ADMM_STEPS) wolffd@0: MAX_ITER = ADMM_STEPS; wolffd@0: else wolffd@0: MAX_ITER = 10; wolffd@0: end wolffd@0: ABSTOL = 1e-4 * sqrt(numel(ADMM_Z)); wolffd@0: RELTOL = 1e-3; wolffd@0: SCALE_THRESH = 10; wolffd@0: RHO_RESCALE = 2; wolffd@0: stopcriteria= 'MAX STEPS'; wolffd@0: wolffd@0: % Objective function wolffd@0: F = zeros(1,MAX_ITER); wolffd@0: wolffd@0: % how many constraints wolffd@0: wolffd@0: alpha = zeros(numConstraints, 1); wolffd@0: Gamma = zeros(numConstraints, 1); wolffd@0: wolffd@0: ln1 = 0; wolffd@0: ln2 = 0; wolffd@0: wolffd@0: % figure(2) wolffd@0: % hold off wolffd@0: % plot(0) wolffd@0: % delete(abc) wolffd@0: % delete(abc2) wolffd@0: for step = 1:MAX_ITER wolffd@0: % do a w-update wolffd@0: % dubstep needs: wolffd@0: % C <-- static wolffd@0: % RHO <-- static wolffd@0: % H <-- static wolffd@0: % Q <-- static wolffd@0: % Delta <-- static wolffd@0: % Gamma <-- this one's dynamic wolffd@0: wolffd@0: for i = 1:numConstraints wolffd@0: Gamma(i) = STRUCTKERNEL(ADMM_Z-ADMM_UW, PsiR{i}); wolffd@0: end wolffd@0: % d = length(K); wolffd@0: alpha = mlr_dual(C, RHO, H, Q, Delta, Gamma, alpha); wolffd@0: wolffd@0: %%% wolffd@0: % 3) convert back to W wolffd@0: % wolffd@0: W = DUALW(alpha, ADMM_Z, ADMM_UW, RHO, K); wolffd@0: wolffd@0: % figure(1), imagesc(W), drawnow; wolffd@0: wolffd@0: % Update V wolffd@0: ADMM_V = THRESH(ADMM_Z - ADMM_UV, lam/RHO); wolffd@0: wolffd@0: % Update Z wolffd@0: Zold = ADMM_Z; wolffd@0: ADMM_Z = FEASIBLE(0.5* (W + ADMM_V + ADMM_UW + ADMM_UV)); wolffd@0: wolffd@0: % Update residuals wolffd@0: ADMM_UW = ADMM_UW + W - ADMM_Z; wolffd@0: ADMM_UV = ADMM_UV + ADMM_V - ADMM_Z; wolffd@0: wolffd@0: % Compute primal objective wolffd@0: % slack term wolffd@0: Xi = 0; wolffd@0: for R = numConstraints:-1:1 wolffd@0: Xi = max(Xi, LOSS(ADMM_Z, PsiR{R}, Delta(R), 0)); wolffd@0: end wolffd@0: F(step) = C * Xi + REG(W, K, 0) + lam * sum(sqrt(sum(W.^2))); wolffd@0: wolffd@0: % figure(2), loglog(1:step, F(1:step)), xlim([0, MAX_ITER]), drawnow; wolffd@0: % Test for convergence wolffd@0: wolffd@0: %WIP wolffd@0: N1 = norm(ADMM_V(:) + W(:) - 2* ADMM_Z(:)); wolffd@0: N2 = RHO * norm(2* (Zold(:) - ADMM_Z(:))); wolffd@0: wolffd@0: eps_primal = ABSTOL + RELTOL * max(norm(W(:)), norm(ADMM_Z(:))); wolffd@0: eps_dual = ABSTOL + RELTOL * RHO * norm(ADMM_UW(:)); wolffd@0: %end WIP wolffd@0: wolffd@0: wolffd@0: % figure(2), loglog(step + (-1:0), [ln1, N1/eps_primal], 'b'), xlim([0, MAX_ITER]), hold('on'); wolffd@0: % figure(2), loglog(step + (-1:0), [ln2, N2/eps_dual], 'r-'), xlim([0, MAX_ITER]), hold('on'), drawnow; wolffd@0: % ln1 = N1/eps_primal; wolffd@0: % ln2 = N2/eps_dual; wolffd@0: wolffd@0: if N1 < eps_primal && N2 < eps_dual wolffd@0: stopcriteria = 'CONVERGENCE'; wolffd@0: break; wolffd@0: end wolffd@0: wolffd@0: if N1 > SCALE_THRESH * N2 wolffd@0: dbprint(3, sprintf('RHO: %.2e UP %.2e', RHO, RHO * RHO_RESCALE)); wolffd@0: RHO = RHO * RHO_RESCALE; wolffd@0: ADMM_UW = ADMM_UW / RHO_RESCALE; wolffd@0: elseif N2 > SCALE_THRESH * N1 wolffd@0: dbprint(3, sprintf('RHO: %.2e DN %.2e', RHO, RHO / RHO_RESCALE)); wolffd@0: RHO = RHO / RHO_RESCALE; wolffd@0: ADMM_UW = ADMM_UW * RHO_RESCALE; wolffd@0: end wolffd@0: end wolffd@0: % figure(2), hold('off'); wolffd@0: wolffd@0: %%% wolffd@0: % Ensure feasibility wolffd@0: % wolffd@0: W = FEASIBLE(W); wolffd@0: wolffd@0: wolffd@0: %%% wolffd@0: % Compute the slack wolffd@0: % wolffd@0: Xi = 0; wolffd@0: for R = numConstraints:-1:1 wolffd@0: Xi = max(Xi, LOSS(W, PsiR{R}, Delta(R), 0)); wolffd@0: end wolffd@0: wolffd@0: %%% wolffd@0: % Update diagnostics wolffd@0: % wolffd@0: wolffd@0: Diagnostics.f = F(1:step)'; wolffd@0: Diagnostics.stop_criteria = stopcriteria; wolffd@0: Diagnostics.num_steps = step; wolffd@0: wolffd@0: dbprint(1, '\t%s after %d steps.\n', stopcriteria, step); wolffd@0: end wolffd@0: wolffd@0: function alpha = mlr_dual(C, RHO, H, Q, Delta, Gamma, alpha) wolffd@0: wolffd@0: global PsiClock; wolffd@0: wolffd@0: m = length(Delta); wolffd@0: wolffd@0: if nargin < 7 wolffd@0: alpha = zeros(m,1); wolffd@0: end wolffd@0: wolffd@0: %%% wolffd@0: % 1) construct the QP parameters wolffd@0: % wolffd@0: b = RHO * (Gamma - Delta) - Q; wolffd@0: wolffd@0: %%% wolffd@0: % 2) solve the QP wolffd@0: % wolffd@0: alpha = qplcprog(H, b, ones(1, m), C, [], [], 0, []); wolffd@0: wolffd@0: %%% wolffd@0: % 3) update the Psi clock wolffd@0: % wolffd@0: PsiClock(alpha > 0) = 0; wolffd@0: wolffd@0: end