wolffd@0: function [x, options, flog, pointlog] = quasinew(f, x, options, gradf, ...
wolffd@0:                                     varargin)
wolffd@0: %QUASINEW Quasi-Newton optimization.
wolffd@0: %
wolffd@0: %	Description
wolffd@0: %	[X, OPTIONS, FLOG, POINTLOG] = QUASINEW(F, X, OPTIONS, GRADF)  uses a
wolffd@0: %	quasi-Newton algorithm to find a local minimum of the function F(X)
wolffd@0: %	whose gradient is given by GRADF(X).  Here X is a row vector and F
wolffd@0: %	returns a scalar value.   The point at which F has a local minimum is
wolffd@0: %	returned as X.  The function value at that point is returned in
wolffd@0: %	OPTIONS(8). A log of the function values after each cycle is
wolffd@0: %	(optionally) returned in FLOG, and a log of the points visited is
wolffd@0: %	(optionally) returned in POINTLOG.
wolffd@0: %
wolffd@0: %	QUASINEW(F, X, OPTIONS, GRADF, P1, P2, ...) allows  additional
wolffd@0: %	arguments to be passed to F() and GRADF().
wolffd@0: %
wolffd@0: %	The optional parameters have the following interpretations.
wolffd@0: %
wolffd@0: %	OPTIONS(1) is set to 1 to display error values; also logs error
wolffd@0: %	values in the return argument ERRLOG, and the points visited in the
wolffd@0: %	return argument POINTSLOG.  If OPTIONS(1) is set to 0, then only
wolffd@0: %	warning messages are displayed.  If OPTIONS(1) is -1, then nothing is
wolffd@0: %	displayed.
wolffd@0: %
wolffd@0: %	OPTIONS(2) is a measure of the absolute precision required for the
wolffd@0: %	value of X at the solution.  If the absolute difference between the
wolffd@0: %	values of X between two successive steps is less than OPTIONS(2),
wolffd@0: %	then this condition is satisfied.
wolffd@0: %
wolffd@0: %	OPTIONS(3) is a measure of the precision required of the objective
wolffd@0: %	function at the solution.  If the absolute difference between the
wolffd@0: %	objective function values between two successive steps is less than
wolffd@0: %	OPTIONS(3), then this condition is satisfied. Both this and the
wolffd@0: %	previous condition must be satisfied for termination.
wolffd@0: %
wolffd@0: %	OPTIONS(9) should be set to 1 to check the user defined gradient
wolffd@0: %	function.
wolffd@0: %
wolffd@0: %	OPTIONS(10) returns the total number of function evaluations
wolffd@0: %	(including those in any line searches).
wolffd@0: %
wolffd@0: %	OPTIONS(11) returns the total number of gradient evaluations.
wolffd@0: %
wolffd@0: %	OPTIONS(14) is the maximum number of iterations; default 100.
wolffd@0: %
wolffd@0: %	OPTIONS(15) is the precision in parameter space of the line search;
wolffd@0: %	default 1E-2.
wolffd@0: %
wolffd@0: %	See also
wolffd@0: %	CONJGRAD, GRADDESC, LINEMIN, MINBRACK, SCG
wolffd@0: %
wolffd@0: 
wolffd@0: %	Copyright (c) Ian T Nabney (1996-2001)
wolffd@0: 
wolffd@0: %  Set up the options.
wolffd@0: if length(options) < 18
wolffd@0:   error('Options vector too short')
wolffd@0: end
wolffd@0: 
wolffd@0: if(options(14))
wolffd@0:   niters = options(14);
wolffd@0: else
wolffd@0:   niters = 100;
wolffd@0: end
wolffd@0: 
wolffd@0: % Set up options for line search
wolffd@0: line_options = foptions;
wolffd@0: % Don't need a very precise line search
wolffd@0: if options(15) > 0
wolffd@0:   line_options(2) = options(15);
wolffd@0: else
wolffd@0:   line_options(2) = 1e-2;  % Default
wolffd@0: end
wolffd@0: % Minimal fractional change in f from Newton step: otherwise do a line search
wolffd@0: min_frac_change = 1e-4;	
wolffd@0: 
wolffd@0: display = options(1);
wolffd@0: 
wolffd@0: % Next two lines allow quasinew to work with expression strings
wolffd@0: f = fcnchk(f, length(varargin));
wolffd@0: gradf = fcnchk(gradf, length(varargin));
wolffd@0: 
wolffd@0: % Check gradients
wolffd@0: if (options(9))
wolffd@0:   feval('gradchek', x, f, gradf, varargin{:});
wolffd@0: end
wolffd@0: 
wolffd@0: nparams = length(x);
wolffd@0: fnew = feval(f, x, varargin{:});
wolffd@0: options(10) = options(10) + 1;
wolffd@0: gradnew = feval(gradf, x, varargin{:});
wolffd@0: options(11) = options(11) + 1;
wolffd@0: p = -gradnew;		% Search direction
wolffd@0: hessinv = eye(nparams); % Initialise inverse Hessian to be identity matrix
wolffd@0: j = 1;
wolffd@0: if nargout >= 3
wolffd@0:   flog(j, :) = fnew;
wolffd@0:   if nargout == 4
wolffd@0:     pointlog(j, :) = x;
wolffd@0:   end
wolffd@0: end
wolffd@0: 
wolffd@0: while (j <= niters)
wolffd@0: 
wolffd@0:   xold = x;
wolffd@0:   fold = fnew;
wolffd@0:   gradold = gradnew;
wolffd@0: 
wolffd@0:   x = xold + p;
wolffd@0:   fnew = feval(f, x, varargin{:});
wolffd@0:   options(10) = options(10) + 1;
wolffd@0: 
wolffd@0:   % This shouldn't occur, but rest of code depends on sd being downhill
wolffd@0:   if (gradnew*p' >= 0)
wolffd@0:     p = -p;
wolffd@0:     if options(1) >= 0
wolffd@0:       warning('search direction uphill in quasinew');
wolffd@0:     end
wolffd@0:   end
wolffd@0: 
wolffd@0:   % Does the Newton step reduce the function value sufficiently?
wolffd@0:   if (fnew >= fold + min_frac_change * (gradnew*p'))
wolffd@0:     % No it doesn't
wolffd@0:     % Minimize along current search direction: must be less than Newton step
wolffd@0:     [lmin, line_options] = feval('linemin', f, xold, p, fold, ...
wolffd@0:       line_options, varargin{:});
wolffd@0:     options(10) = options(10) + line_options(10);
wolffd@0:     options(11) = options(11) + line_options(11);
wolffd@0:     % Correct x and fnew to be the actual search point we have found
wolffd@0:     x = xold + lmin * p;
wolffd@0:     p = x - xold;
wolffd@0:     fnew = line_options(8);
wolffd@0:   end
wolffd@0: 
wolffd@0:   % Check for termination
wolffd@0:   if (max(abs(x - xold)) < options(2) & max(abs(fnew - fold)) < options(3))
wolffd@0:     options(8) = fnew;
wolffd@0:     return;
wolffd@0:   end
wolffd@0:   gradnew = feval(gradf, x, varargin{:});
wolffd@0:   options(11) = options(11) + 1;
wolffd@0:   v = gradnew - gradold;
wolffd@0:   vdotp = v*p';
wolffd@0: 
wolffd@0:   % Skip update to inverse Hessian if fac not sufficiently positive
wolffd@0:   if (vdotp*vdotp > eps*sum(v.^2)*sum(p.^2)) 
wolffd@0:     Gv = (hessinv*v')';
wolffd@0:     vGv = sum(v.*Gv);
wolffd@0:     u = p./vdotp - Gv./vGv;
wolffd@0:     % Use BFGS update rule
wolffd@0:     hessinv = hessinv + (p'*p)/vdotp - (Gv'*Gv)/vGv + vGv*(u'*u);
wolffd@0:   end
wolffd@0: 
wolffd@0:   p = -(hessinv * gradnew')';
wolffd@0: 
wolffd@0:   if (display > 0)
wolffd@0:     fprintf(1, 'Cycle %4d  Function %11.6f\n', j, fnew);
wolffd@0:   end
wolffd@0: 
wolffd@0:   j = j + 1;
wolffd@0:   if nargout >= 3
wolffd@0:     flog(j, :) = fnew;
wolffd@0:     if nargout == 4
wolffd@0:       pointlog(j, :) = x;
wolffd@0:     end
wolffd@0:   end
wolffd@0: end
wolffd@0: 
wolffd@0: % If we get here, then we haven't terminated in the given number of 
wolffd@0: % iterations.
wolffd@0: 
wolffd@0: options(8) = fold;
wolffd@0: if (options(1) >= 0)
wolffd@0:   disp(maxitmess);
wolffd@0: end