wolffd@0
|
1 function [x, options, flog, pointlog, scalelog] = scg(f, x, options, gradf, varargin)
|
wolffd@0
|
2 %SCG Scaled conjugate gradient optimization.
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % Description
|
wolffd@0
|
5 % [X, OPTIONS] = SCG(F, X, OPTIONS, GRADF) uses a scaled conjugate
|
wolffd@0
|
6 % gradients algorithm to find a local minimum of the function F(X)
|
wolffd@0
|
7 % whose gradient is given by GRADF(X). Here X is a row vector and F
|
wolffd@0
|
8 % returns a scalar value. The point at which F has a local minimum is
|
wolffd@0
|
9 % returned as X. The function value at that point is returned in
|
wolffd@0
|
10 % OPTIONS(8).
|
wolffd@0
|
11 %
|
wolffd@0
|
12 % [X, OPTIONS, FLOG, POINTLOG, SCALELOG] = SCG(F, X, OPTIONS, GRADF)
|
wolffd@0
|
13 % also returns (optionally) a log of the function values after each
|
wolffd@0
|
14 % cycle in FLOG, a log of the points visited in POINTLOG, and a log of
|
wolffd@0
|
15 % the scale values in the algorithm in SCALELOG.
|
wolffd@0
|
16 %
|
wolffd@0
|
17 % SCG(F, X, OPTIONS, GRADF, P1, P2, ...) allows additional arguments to
|
wolffd@0
|
18 % be passed to F() and GRADF(). The optional parameters have the
|
wolffd@0
|
19 % following interpretations.
|
wolffd@0
|
20 %
|
wolffd@0
|
21 % OPTIONS(1) is set to 1 to display error values; also logs error
|
wolffd@0
|
22 % values in the return argument ERRLOG, and the points visited in the
|
wolffd@0
|
23 % return argument POINTSLOG. If OPTIONS(1) is set to 0, then only
|
wolffd@0
|
24 % warning messages are displayed. If OPTIONS(1) is -1, then nothing is
|
wolffd@0
|
25 % displayed.
|
wolffd@0
|
26 %
|
wolffd@0
|
27 % OPTIONS(2) is a measure of the absolute precision required for the
|
wolffd@0
|
28 % value of X at the solution. If the absolute difference between the
|
wolffd@0
|
29 % values of X between two successive steps is less than OPTIONS(2),
|
wolffd@0
|
30 % then this condition is satisfied.
|
wolffd@0
|
31 %
|
wolffd@0
|
32 % OPTIONS(3) is a measure of the precision required of the objective
|
wolffd@0
|
33 % function at the solution. If the absolute difference between the
|
wolffd@0
|
34 % objective function values between two successive steps is less than
|
wolffd@0
|
35 % OPTIONS(3), then this condition is satisfied. Both this and the
|
wolffd@0
|
36 % previous condition must be satisfied for termination.
|
wolffd@0
|
37 %
|
wolffd@0
|
38 % OPTIONS(9) is set to 1 to check the user defined gradient function.
|
wolffd@0
|
39 %
|
wolffd@0
|
40 % OPTIONS(10) returns the total number of function evaluations
|
wolffd@0
|
41 % (including those in any line searches).
|
wolffd@0
|
42 %
|
wolffd@0
|
43 % OPTIONS(11) returns the total number of gradient evaluations.
|
wolffd@0
|
44 %
|
wolffd@0
|
45 % OPTIONS(14) is the maximum number of iterations; default 100.
|
wolffd@0
|
46 %
|
wolffd@0
|
47 % See also
|
wolffd@0
|
48 % CONJGRAD, QUASINEW
|
wolffd@0
|
49 %
|
wolffd@0
|
50
|
wolffd@0
|
51 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
52
|
wolffd@0
|
53 % Set up the options.
|
wolffd@0
|
54 if length(options) < 18
|
wolffd@0
|
55 error('Options vector too short')
|
wolffd@0
|
56 end
|
wolffd@0
|
57
|
wolffd@0
|
58 if(options(14))
|
wolffd@0
|
59 niters = options(14);
|
wolffd@0
|
60 else
|
wolffd@0
|
61 niters = 100;
|
wolffd@0
|
62 end
|
wolffd@0
|
63
|
wolffd@0
|
64 display = options(1);
|
wolffd@0
|
65 gradcheck = options(9);
|
wolffd@0
|
66
|
wolffd@0
|
67 % Set up strings for evaluating function and gradient
|
wolffd@0
|
68 f = fcnchk(f, length(varargin));
|
wolffd@0
|
69 gradf = fcnchk(gradf, length(varargin));
|
wolffd@0
|
70
|
wolffd@0
|
71 nparams = length(x);
|
wolffd@0
|
72
|
wolffd@0
|
73 % Check gradients
|
wolffd@0
|
74 if (gradcheck)
|
wolffd@0
|
75 feval('gradchek', x, f, gradf, varargin{:});
|
wolffd@0
|
76 end
|
wolffd@0
|
77
|
wolffd@0
|
78 sigma0 = 1.0e-4;
|
wolffd@0
|
79 fold = feval(f, x, varargin{:}); % Initial function value.
|
wolffd@0
|
80 fnow = fold;
|
wolffd@0
|
81 options(10) = options(10) + 1; % Increment function evaluation counter.
|
wolffd@0
|
82 gradnew = feval(gradf, x, varargin{:}); % Initial gradient.
|
wolffd@0
|
83 gradold = gradnew;
|
wolffd@0
|
84 options(11) = options(11) + 1; % Increment gradient evaluation counter.
|
wolffd@0
|
85 d = -gradnew; % Initial search direction.
|
wolffd@0
|
86 success = 1; % Force calculation of directional derivs.
|
wolffd@0
|
87 nsuccess = 0; % nsuccess counts number of successes.
|
wolffd@0
|
88 beta = 1.0; % Initial scale parameter.
|
wolffd@0
|
89 betamin = 1.0e-15; % Lower bound on scale.
|
wolffd@0
|
90 betamax = 1.0e100; % Upper bound on scale.
|
wolffd@0
|
91 j = 1; % j counts number of iterations.
|
wolffd@0
|
92 if nargout >= 3
|
wolffd@0
|
93 flog(j, :) = fold;
|
wolffd@0
|
94 if nargout == 4
|
wolffd@0
|
95 pointlog(j, :) = x;
|
wolffd@0
|
96 end
|
wolffd@0
|
97 end
|
wolffd@0
|
98
|
wolffd@0
|
99 % Main optimization loop.
|
wolffd@0
|
100 while (j <= niters)
|
wolffd@0
|
101
|
wolffd@0
|
102 % Calculate first and second directional derivatives.
|
wolffd@0
|
103 if (success == 1)
|
wolffd@0
|
104 mu = d*gradnew';
|
wolffd@0
|
105 if (mu >= 0)
|
wolffd@0
|
106 d = - gradnew;
|
wolffd@0
|
107 mu = d*gradnew';
|
wolffd@0
|
108 end
|
wolffd@0
|
109 kappa = d*d';
|
wolffd@0
|
110 if kappa < eps
|
wolffd@0
|
111 options(8) = fnow;
|
wolffd@0
|
112 return
|
wolffd@0
|
113 end
|
wolffd@0
|
114 sigma = sigma0/sqrt(kappa);
|
wolffd@0
|
115 xplus = x + sigma*d;
|
wolffd@0
|
116 gplus = feval(gradf, xplus, varargin{:});
|
wolffd@0
|
117 options(11) = options(11) + 1;
|
wolffd@0
|
118 theta = (d*(gplus' - gradnew'))/sigma;
|
wolffd@0
|
119 end
|
wolffd@0
|
120
|
wolffd@0
|
121 % Increase effective curvature and evaluate step size alpha.
|
wolffd@0
|
122 delta = theta + beta*kappa;
|
wolffd@0
|
123 if (delta <= 0)
|
wolffd@0
|
124 delta = beta*kappa;
|
wolffd@0
|
125 beta = beta - theta/kappa;
|
wolffd@0
|
126 end
|
wolffd@0
|
127 alpha = - mu/delta;
|
wolffd@0
|
128
|
wolffd@0
|
129 % Calculate the comparison ratio.
|
wolffd@0
|
130 xnew = x + alpha*d;
|
wolffd@0
|
131 fnew = feval(f, xnew, varargin{:});
|
wolffd@0
|
132 options(10) = options(10) + 1;
|
wolffd@0
|
133 Delta = 2*(fnew - fold)/(alpha*mu);
|
wolffd@0
|
134 if (Delta >= 0)
|
wolffd@0
|
135 success = 1;
|
wolffd@0
|
136 nsuccess = nsuccess + 1;
|
wolffd@0
|
137 x = xnew;
|
wolffd@0
|
138 fnow = fnew;
|
wolffd@0
|
139 else
|
wolffd@0
|
140 success = 0;
|
wolffd@0
|
141 fnow = fold;
|
wolffd@0
|
142 end
|
wolffd@0
|
143
|
wolffd@0
|
144 if nargout >= 3
|
wolffd@0
|
145 % Store relevant variables
|
wolffd@0
|
146 flog(j) = fnow; % Current function value
|
wolffd@0
|
147 if nargout >= 4
|
wolffd@0
|
148 pointlog(j,:) = x; % Current position
|
wolffd@0
|
149 if nargout >= 5
|
wolffd@0
|
150 scalelog(j) = beta; % Current scale parameter
|
wolffd@0
|
151 end
|
wolffd@0
|
152 end
|
wolffd@0
|
153 end
|
wolffd@0
|
154 if display > 0
|
wolffd@0
|
155 fprintf(1, 'Cycle %4d Error %11.6f Scale %e\n', j, fnow, beta);
|
wolffd@0
|
156 end
|
wolffd@0
|
157
|
wolffd@0
|
158 if (success == 1)
|
wolffd@0
|
159 % Test for termination
|
wolffd@0
|
160
|
wolffd@0
|
161 if (max(abs(alpha*d)) < options(2) & max(abs(fnew-fold)) < options(3))
|
wolffd@0
|
162 options(8) = fnew;
|
wolffd@0
|
163 return;
|
wolffd@0
|
164
|
wolffd@0
|
165 else
|
wolffd@0
|
166 % Update variables for new position
|
wolffd@0
|
167 fold = fnew;
|
wolffd@0
|
168 gradold = gradnew;
|
wolffd@0
|
169 gradnew = feval(gradf, x, varargin{:});
|
wolffd@0
|
170 options(11) = options(11) + 1;
|
wolffd@0
|
171 % If the gradient is zero then we are done.
|
wolffd@0
|
172 if (gradnew*gradnew' == 0)
|
wolffd@0
|
173 options(8) = fnew;
|
wolffd@0
|
174 return;
|
wolffd@0
|
175 end
|
wolffd@0
|
176 end
|
wolffd@0
|
177 end
|
wolffd@0
|
178
|
wolffd@0
|
179 % Adjust beta according to comparison ratio.
|
wolffd@0
|
180 if (Delta < 0.25)
|
wolffd@0
|
181 beta = min(4.0*beta, betamax);
|
wolffd@0
|
182 end
|
wolffd@0
|
183 if (Delta > 0.75)
|
wolffd@0
|
184 beta = max(0.5*beta, betamin);
|
wolffd@0
|
185 end
|
wolffd@0
|
186
|
wolffd@0
|
187 % Update search direction using Polak-Ribiere formula, or re-start
|
wolffd@0
|
188 % in direction of negative gradient after nparams steps.
|
wolffd@0
|
189 if (nsuccess == nparams)
|
wolffd@0
|
190 d = -gradnew;
|
wolffd@0
|
191 nsuccess = 0;
|
wolffd@0
|
192 else
|
wolffd@0
|
193 if (success == 1)
|
wolffd@0
|
194 gamma = (gradold - gradnew)*gradnew'/(mu);
|
wolffd@0
|
195 d = gamma*d - gradnew;
|
wolffd@0
|
196 end
|
wolffd@0
|
197 end
|
wolffd@0
|
198 j = j + 1;
|
wolffd@0
|
199 end
|
wolffd@0
|
200
|
wolffd@0
|
201 % If we get here, then we haven't terminated in the given number of
|
wolffd@0
|
202 % iterations.
|
wolffd@0
|
203
|
wolffd@0
|
204 options(8) = fold;
|
wolffd@0
|
205 if (options(1) >= 0)
|
wolffd@0
|
206 disp(maxitmess);
|
wolffd@0
|
207 end
|
wolffd@0
|
208
|