annotate toolboxes/distance_learning/mlr/util/mlr_solver.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [W, Xi, Diagnostics] = mlr_solver(C, Margins, W, K)
wolffd@0 2 % [W, Xi, D] = mlr_solver(C, Margins, W, X)
wolffd@0 3 %
wolffd@0 4 % C >= 0 Slack trade-off parameter
wolffd@0 5 % Margins = array of mean margin values
wolffd@0 6 % W = initial value for W
wolffd@0 7 % X = data matrix (or kernel)
wolffd@0 8 %
wolffd@0 9 % W (output) = the learned metric
wolffd@0 10 % Xi = 1-slack
wolffd@0 11 % D = diagnostics
wolffd@0 12
wolffd@0 13 global DEBUG REG FEASIBLE LOSS;
wolffd@0 14
wolffd@0 15 %%%
wolffd@0 16 % Initialize the gradient directions for each constraint
wolffd@0 17 %
wolffd@0 18 global PsiR;
wolffd@0 19 global PsiClock;
wolffd@0 20
wolffd@0 21 numConstraints = length(PsiR);
wolffd@0 22
wolffd@0 23 %%%
wolffd@0 24 % Some optimization details
wolffd@0 25
wolffd@0 26 % Armijo rule number
wolffd@0 27 armijo = 1e-5;
wolffd@0 28
wolffd@0 29 % Initial learning rate
wolffd@0 30 lambda0 = 1e-4;
wolffd@0 31
wolffd@0 32 % Increase/decrease after each iteration
wolffd@0 33 lambdaup = ((1+sqrt(5))/2)^(1/3);
wolffd@0 34 lambdadown = ((1+sqrt(5))/2)^(-1);
wolffd@0 35
wolffd@0 36 % Maximum steps to take
wolffd@0 37 maxsteps = 1e4;
wolffd@0 38
wolffd@0 39 % Size of convergence window
wolffd@0 40 frame = 10;
wolffd@0 41
wolffd@0 42 % Convergence threshold
wolffd@0 43 convthresh = 1e-5;
wolffd@0 44
wolffd@0 45 % Maximum number of backtracks
wolffd@0 46 maxbackcount = 100;
wolffd@0 47
wolffd@0 48
wolffd@0 49 Diagnostics = struct( 'f', [], ...
wolffd@0 50 'num_steps', [], ...
wolffd@0 51 'stop_criteria', []);
wolffd@0 52
wolffd@0 53 % Repeat until convergence:
wolffd@0 54 % 1) Calculate f
wolffd@0 55 % 2) Take a gradient step
wolffd@0 56 % 3) Project W back onto PSD
wolffd@0 57
wolffd@0 58 %%%
wolffd@0 59 % Initialze
wolffd@0 60 %
wolffd@0 61
wolffd@0 62 f = inf;
wolffd@0 63 dfdW = zeros(size(W));
wolffd@0 64 lambda = lambda0;
wolffd@0 65 F = Inf * ones(1,maxsteps+1);
wolffd@0 66 XiR = zeros(numConstraints,1);
wolffd@0 67
wolffd@0 68
wolffd@0 69 stepcount = -1;
wolffd@0 70 backcount = 0;
wolffd@0 71 done = 0;
wolffd@0 72
wolffd@0 73
wolffd@0 74 while 1
wolffd@0 75 fold = f;
wolffd@0 76 Wold = W;
wolffd@0 77
wolffd@0 78 %%%
wolffd@0 79 % Count constraint violations and build the gradient
wolffd@0 80 dbprint(3, 'Computing gradient');
wolffd@0 81
wolffd@0 82 %%%
wolffd@0 83 % Calculate constraint violations
wolffd@0 84 %
wolffd@0 85 XiR(:) = 0;
wolffd@0 86 for R = numConstraints:-1:1
wolffd@0 87 XiR(R) = LOSS(W, PsiR{R}, Margins(R), 0);
wolffd@0 88 end
wolffd@0 89
wolffd@0 90 %%%
wolffd@0 91 % Find the most active constraint
wolffd@0 92 %
wolffd@0 93 [Xi, mgrad] = max(XiR);
wolffd@0 94 Xi = max(Xi, 0);
wolffd@0 95
wolffd@0 96 PsiClock(mgrad) = 0;
wolffd@0 97
wolffd@0 98 %%%
wolffd@0 99 % Evaluate f
wolffd@0 100 %
wolffd@0 101
wolffd@0 102 f = C * max(Xi, 0) ...
wolffd@0 103 + REG(W, K, 0);
wolffd@0 104
wolffd@0 105 %%%
wolffd@0 106 % Test for convergence
wolffd@0 107 %
wolffd@0 108 objDiff = fold - f;
wolffd@0 109
wolffd@0 110 if objDiff > armijo * lambda * (dfdW(:)' * dfdW(:))
wolffd@0 111
wolffd@0 112 stepcount = stepcount + 1;
wolffd@0 113
wolffd@0 114 F(stepcount+1) = f;
wolffd@0 115
wolffd@0 116 sdiff = inf;
wolffd@0 117 if stepcount >= frame;
wolffd@0 118 sdiff = log(F(stepcount+1-frame) / f);
wolffd@0 119 end
wolffd@0 120
wolffd@0 121 if stepcount >= maxsteps
wolffd@0 122 done = 1;
wolffd@0 123 stopcriteria = 'MAXSTEPS';
wolffd@0 124 elseif sdiff <= convthresh
wolffd@0 125 done = 1;
wolffd@0 126 stopcriteria = 'CONVERGENCE';
wolffd@0 127 else
wolffd@0 128 %%%
wolffd@0 129 % If it's positive, add the corresponding gradient
wolffd@0 130 dfdW = C * LOSS(W, PsiR{mgrad}, Margins(mgrad), 1) ...
wolffd@0 131 + REG(W, K, 1);
wolffd@0 132 end
wolffd@0 133
wolffd@0 134 dbprint(3, 'Lambda up!');
wolffd@0 135 Wold = W;
wolffd@0 136 lambda = lambdaup * lambda;
wolffd@0 137 backcount = 0;
wolffd@0 138
wolffd@0 139 else
wolffd@0 140 % Backtracking time, drop the learning rate
wolffd@0 141 if backcount >= maxbackcount
wolffd@0 142 W = Wold;
wolffd@0 143 f = fold;
wolffd@0 144 done = 1;
wolffd@0 145
wolffd@0 146 stopcriteria = 'BACKTRACK';
wolffd@0 147 else
wolffd@0 148 dbprint(3, 'Lambda down!');
wolffd@0 149 lambda = lambdadown * lambda;
wolffd@0 150 backcount = backcount+1;
wolffd@0 151 end
wolffd@0 152 end
wolffd@0 153
wolffd@0 154 %%%
wolffd@0 155 % Take a gradient step
wolffd@0 156 %
wolffd@0 157 W = W - lambda * dfdW;
wolffd@0 158
wolffd@0 159 %%%
wolffd@0 160 % Project back onto the feasible set
wolffd@0 161 %
wolffd@0 162
wolffd@0 163 dbprint(3, 'Projecting onto feasible set');
wolffd@0 164 W = FEASIBLE(W);
wolffd@0 165 if done
wolffd@0 166 break;
wolffd@0 167 end;
wolffd@0 168
wolffd@0 169 end
wolffd@0 170
wolffd@0 171 Diagnostics.f = F(2:(stepcount+1))';
wolffd@0 172 Diagnostics.stop_criteria = stopcriteria;
wolffd@0 173 Diagnostics.num_steps = stepcount;
wolffd@0 174
wolffd@0 175 dbprint(1, '\t%s after %d steps.\n', stopcriteria, stepcount);
wolffd@0 176 end
wolffd@0 177