annotate toolboxes/distance_learning/mlr/util/mlr_solver.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function [W, Xi, Diagnostics] = mlr_solver(C, Margins, W, K)
Daniel@0 2 % [W, Xi, D] = mlr_solver(C, Margins, W, X)
Daniel@0 3 %
Daniel@0 4 % C >= 0 Slack trade-off parameter
Daniel@0 5 % Margins = array of mean margin values
Daniel@0 6 % W = initial value for W
Daniel@0 7 % X = data matrix (or kernel)
Daniel@0 8 %
Daniel@0 9 % W (output) = the learned metric
Daniel@0 10 % Xi = 1-slack
Daniel@0 11 % D = diagnostics
Daniel@0 12
Daniel@0 13 global DEBUG REG FEASIBLE LOSS;
Daniel@0 14
Daniel@0 15 %%%
Daniel@0 16 % Initialize the gradient directions for each constraint
Daniel@0 17 %
Daniel@0 18 global PsiR;
Daniel@0 19 global PsiClock;
Daniel@0 20
Daniel@0 21 numConstraints = length(PsiR);
Daniel@0 22
Daniel@0 23 %%%
Daniel@0 24 % Some optimization details
Daniel@0 25
Daniel@0 26 % Armijo rule number
Daniel@0 27 armijo = 1e-5;
Daniel@0 28
Daniel@0 29 % Initial learning rate
Daniel@0 30 lambda0 = 1e-6;
Daniel@0 31
Daniel@0 32 % Increase/decrease after each iteration
Daniel@0 33 lambdaup = ((1+sqrt(5))/2)^(1/3);
Daniel@0 34 lambdadown = ((1+sqrt(5))/2)^(-1);
Daniel@0 35
Daniel@0 36 % Maximum steps to take
Daniel@0 37 maxsteps = 1e4;
Daniel@0 38
Daniel@0 39 % Size of convergence window
Daniel@0 40 frame = 10;
Daniel@0 41
Daniel@0 42 % Convergence threshold
Daniel@0 43 convthresh = 1e-5;
Daniel@0 44
Daniel@0 45 % Maximum number of backtracks
Daniel@0 46 maxbackcount = 100;
Daniel@0 47
Daniel@0 48
Daniel@0 49 Diagnostics = struct( 'f', [], ...
Daniel@0 50 'num_steps', [], ...
Daniel@0 51 'stop_criteria', []);
Daniel@0 52
Daniel@0 53 % Repeat until convergence:
Daniel@0 54 % 1) Calculate f
Daniel@0 55 % 2) Take a gradient step
Daniel@0 56 % 3) Project W back onto PSD
Daniel@0 57
Daniel@0 58 %%%
Daniel@0 59 % Initialze
Daniel@0 60 %
Daniel@0 61
Daniel@0 62 f = inf;
Daniel@0 63 dfdW = zeros(size(W));
Daniel@0 64 lambda = lambda0;
Daniel@0 65 F = Inf * ones(1,maxsteps+1);
Daniel@0 66 XiR = zeros(numConstraints,1);
Daniel@0 67
Daniel@0 68
Daniel@0 69 stepcount = -1;
Daniel@0 70 backcount = 0;
Daniel@0 71 done = 0;
Daniel@0 72
Daniel@0 73
Daniel@0 74 while 1
Daniel@0 75 fold = f;
Daniel@0 76 Wold = W;
Daniel@0 77
Daniel@0 78 %%%
Daniel@0 79 % Count constraint violations and build the gradient
Daniel@0 80 dbprint(3, 'Computing gradient');
Daniel@0 81
Daniel@0 82 %%%
Daniel@0 83 % Calculate constraint violations
Daniel@0 84 %
Daniel@0 85 XiR(:) = 0;
Daniel@0 86 for R = numConstraints:-1:1
Daniel@0 87 XiR(R) = LOSS(W, PsiR{R}, Margins(R), 0);
Daniel@0 88 end
Daniel@0 89
Daniel@0 90 %%%
Daniel@0 91 % Find the most active constraint
Daniel@0 92 %
Daniel@0 93 [Xi, mgrad] = max(XiR);
Daniel@0 94 Xi = max(Xi, 0);
Daniel@0 95
Daniel@0 96 PsiClock(mgrad) = 0;
Daniel@0 97
Daniel@0 98 %%%
Daniel@0 99 % Evaluate f
Daniel@0 100 %
Daniel@0 101
Daniel@0 102 f = C * max(Xi, 0) ...
Daniel@0 103 + REG(W, K, 0);
Daniel@0 104
Daniel@0 105 %%%
Daniel@0 106 % Test for convergence
Daniel@0 107 %
Daniel@0 108 objDiff = fold - f;
Daniel@0 109
Daniel@0 110 if objDiff > armijo * lambda * (dfdW(:)' * dfdW(:))
Daniel@0 111
Daniel@0 112 stepcount = stepcount + 1;
Daniel@0 113
Daniel@0 114 F(stepcount+1) = f;
Daniel@0 115
Daniel@0 116 sdiff = inf;
Daniel@0 117 if stepcount >= frame;
Daniel@0 118 sdiff = log(F(stepcount+1-frame) / f);
Daniel@0 119 end
Daniel@0 120
Daniel@0 121 if stepcount >= maxsteps
Daniel@0 122 done = 1;
Daniel@0 123 stopcriteria = 'MAXSTEPS';
Daniel@0 124 elseif sdiff <= convthresh
Daniel@0 125 done = 1;
Daniel@0 126 stopcriteria = 'CONVERGENCE';
Daniel@0 127 else
Daniel@0 128 %%%
Daniel@0 129 % If it's positive, add the corresponding gradient
Daniel@0 130 dfdW = C * LOSS(W, PsiR{mgrad}, Margins(mgrad), 1) ...
Daniel@0 131 + REG(W, K, 1);
Daniel@0 132 end
Daniel@0 133
Daniel@0 134 dbprint(3, 'Lambda up!');
Daniel@0 135 Wold = W;
Daniel@0 136 lambda = lambdaup * lambda;
Daniel@0 137 backcount = 0;
Daniel@0 138
Daniel@0 139 else
Daniel@0 140 % Backtracking time, drop the learning rate
Daniel@0 141 if backcount >= maxbackcount
Daniel@0 142 W = Wold;
Daniel@0 143 f = fold;
Daniel@0 144 done = 1;
Daniel@0 145
Daniel@0 146 stopcriteria = 'BACKTRACK';
Daniel@0 147 else
Daniel@0 148 dbprint(3, 'Lambda down!');
Daniel@0 149 lambda = lambdadown * lambda;
Daniel@0 150 backcount = backcount+1;
Daniel@0 151 end
Daniel@0 152 end
Daniel@0 153
Daniel@0 154 %%%
Daniel@0 155 % Take a gradient step
Daniel@0 156 %
Daniel@0 157 W = W - lambda * dfdW;
Daniel@0 158
Daniel@0 159 %%%
Daniel@0 160 % Project back onto the feasible set
Daniel@0 161 %
Daniel@0 162
Daniel@0 163 dbprint(3, 'Projecting onto feasible set');
Daniel@0 164 W = FEASIBLE(W);
Daniel@0 165 if done
Daniel@0 166 break;
Daniel@0 167 end;
Daniel@0 168
Daniel@0 169 end
Daniel@0 170
Daniel@0 171 Diagnostics.f = F(2:(stepcount+1))';
Daniel@0 172 Diagnostics.stop_criteria = stopcriteria;
Daniel@0 173 Diagnostics.num_steps = stepcount;
Daniel@0 174
Daniel@0 175 dbprint(2, '\t%s after %d steps.\n', stopcriteria, stepcount);
Daniel@0 176 end
Daniel@0 177