wolffd@0: function [x, options, flog, pointlog, scalelog] = scg(f, x, options, gradf, varargin)
wolffd@0: %SCG	Scaled conjugate gradient optimization.
wolffd@0: %
wolffd@0: %	Description
wolffd@0: %	[X, OPTIONS] = SCG(F, X, OPTIONS, GRADF) uses a scaled conjugate
wolffd@0: %	gradients algorithm to find a local minimum of the function F(X)
wolffd@0: %	whose gradient is given by GRADF(X).  Here X is a row vector and F
wolffd@0: %	returns a scalar value. The point at which F has a local minimum is
wolffd@0: %	returned as X.  The function value at that point is returned in
wolffd@0: %	OPTIONS(8).
wolffd@0: %
wolffd@0: %	[X, OPTIONS, FLOG, POINTLOG, SCALELOG] = SCG(F, X, OPTIONS, GRADF)
wolffd@0: %	also returns (optionally) a log of the function values after each
wolffd@0: %	cycle in FLOG, a log of the points visited in POINTLOG, and a log of
wolffd@0: %	the scale values in the algorithm in SCALELOG.
wolffd@0: %
wolffd@0: %	SCG(F, X, OPTIONS, GRADF, P1, P2, ...) allows additional arguments to
wolffd@0: %	be passed to F() and GRADF().     The optional parameters have the
wolffd@0: %	following interpretations.
wolffd@0: %
wolffd@0: %	OPTIONS(1) is set to 1 to display error values; also logs error
wolffd@0: %	values in the return argument ERRLOG, and the points visited in the
wolffd@0: %	return argument POINTSLOG.  If OPTIONS(1) is set to 0, then only
wolffd@0: %	warning messages are displayed.  If OPTIONS(1) is -1, then nothing is
wolffd@0: %	displayed.
wolffd@0: %
wolffd@0: %	OPTIONS(2) is a measure of the absolute precision required for the
wolffd@0: %	value of X at the solution.  If the absolute difference between the
wolffd@0: %	values of X between two successive steps is less than OPTIONS(2),
wolffd@0: %	then this condition is satisfied.
wolffd@0: %
wolffd@0: %	OPTIONS(3) is a measure of the precision required of the objective
wolffd@0: %	function at the solution.  If the absolute difference between the
wolffd@0: %	objective function values between two successive steps is less than
wolffd@0: %	OPTIONS(3), then this condition is satisfied. Both this and the
wolffd@0: %	previous condition must be satisfied for termination.
wolffd@0: %
wolffd@0: %	OPTIONS(9) is set to 1 to check the user defined gradient function.
wolffd@0: %
wolffd@0: %	OPTIONS(10) returns the total number of function evaluations
wolffd@0: %	(including those in any line searches).
wolffd@0: %
wolffd@0: %	OPTIONS(11) returns the total number of gradient evaluations.
wolffd@0: %
wolffd@0: %	OPTIONS(14) is the maximum number of iterations; default 100.
wolffd@0: %
wolffd@0: %	See also
wolffd@0: %	CONJGRAD, QUASINEW
wolffd@0: %
wolffd@0: 
wolffd@0: %	Copyright (c) Ian T Nabney (1996-2001)
wolffd@0: 
wolffd@0: %  Set up the options.
wolffd@0: if length(options) < 18
wolffd@0:   error('Options vector too short')
wolffd@0: end
wolffd@0: 
wolffd@0: if(options(14))
wolffd@0:   niters = options(14);
wolffd@0: else
wolffd@0:   niters = 100;
wolffd@0: end
wolffd@0: 
wolffd@0: display = options(1);
wolffd@0: gradcheck = options(9);
wolffd@0: 
wolffd@0: % Set up strings for evaluating function and gradient
wolffd@0: f = fcnchk(f, length(varargin));
wolffd@0: gradf = fcnchk(gradf, length(varargin));
wolffd@0: 
wolffd@0: nparams = length(x);
wolffd@0: 
wolffd@0: %  Check gradients
wolffd@0: if (gradcheck)
wolffd@0:   feval('gradchek', x, f, gradf, varargin{:});
wolffd@0: end
wolffd@0: 
wolffd@0: sigma0 = 1.0e-4;
wolffd@0: fold = feval(f, x, varargin{:});	% Initial function value.
wolffd@0: fnow = fold;
wolffd@0: options(10) = options(10) + 1;		% Increment function evaluation counter.
wolffd@0: gradnew = feval(gradf, x, varargin{:});	% Initial gradient.
wolffd@0: gradold = gradnew;
wolffd@0: options(11) = options(11) + 1;		% Increment gradient evaluation counter.
wolffd@0: d = -gradnew;				% Initial search direction.
wolffd@0: success = 1;				% Force calculation of directional derivs.
wolffd@0: nsuccess = 0;				% nsuccess counts number of successes.
wolffd@0: beta = 1.0;				% Initial scale parameter.
wolffd@0: betamin = 1.0e-15; 			% Lower bound on scale.
wolffd@0: betamax = 1.0e100;			% Upper bound on scale.
wolffd@0: j = 1;					% j counts number of iterations.
wolffd@0: if nargout >= 3
wolffd@0:   flog(j, :) = fold;
wolffd@0:   if nargout == 4
wolffd@0:     pointlog(j, :) = x;
wolffd@0:   end
wolffd@0: end
wolffd@0: 
wolffd@0: % Main optimization loop.
wolffd@0: while (j <= niters)
wolffd@0: 
wolffd@0:   % Calculate first and second directional derivatives.
wolffd@0:   if (success == 1)
wolffd@0:     mu = d*gradnew';
wolffd@0:     if (mu >= 0)
wolffd@0:       d = - gradnew;
wolffd@0:       mu = d*gradnew';
wolffd@0:     end
wolffd@0:     kappa = d*d';
wolffd@0:     if kappa < eps
wolffd@0:       options(8) = fnow;
wolffd@0:       return
wolffd@0:     end
wolffd@0:     sigma = sigma0/sqrt(kappa);
wolffd@0:     xplus = x + sigma*d;
wolffd@0:     gplus = feval(gradf, xplus, varargin{:});
wolffd@0:     options(11) = options(11) + 1; 
wolffd@0:     theta = (d*(gplus' - gradnew'))/sigma;
wolffd@0:   end
wolffd@0: 
wolffd@0:   % Increase effective curvature and evaluate step size alpha.
wolffd@0:   delta = theta + beta*kappa;
wolffd@0:   if (delta <= 0) 
wolffd@0:     delta = beta*kappa;
wolffd@0:     beta = beta - theta/kappa;
wolffd@0:   end
wolffd@0:   alpha = - mu/delta;
wolffd@0:   
wolffd@0:   % Calculate the comparison ratio.
wolffd@0:   xnew = x + alpha*d;
wolffd@0:   fnew = feval(f, xnew, varargin{:});
wolffd@0:   options(10) = options(10) + 1;
wolffd@0:   Delta = 2*(fnew - fold)/(alpha*mu);
wolffd@0:   if (Delta  >= 0)
wolffd@0:     success = 1;
wolffd@0:     nsuccess = nsuccess + 1;
wolffd@0:     x = xnew;
wolffd@0:     fnow = fnew;
wolffd@0:   else
wolffd@0:     success = 0;
wolffd@0:     fnow = fold;
wolffd@0:   end
wolffd@0: 
wolffd@0:   if nargout >= 3
wolffd@0:     % Store relevant variables
wolffd@0:     flog(j) = fnow;		% Current function value
wolffd@0:     if nargout >= 4
wolffd@0:       pointlog(j,:) = x;	% Current position
wolffd@0:       if nargout >= 5
wolffd@0: 	scalelog(j) = beta;	% Current scale parameter
wolffd@0:       end
wolffd@0:     end
wolffd@0:   end    
wolffd@0:   if display > 0
wolffd@0:     fprintf(1, 'Cycle %4d  Error %11.6f  Scale %e\n', j, fnow, beta);
wolffd@0:   end
wolffd@0: 
wolffd@0:   if (success == 1)
wolffd@0:     % Test for termination
wolffd@0: 
wolffd@0:     if (max(abs(alpha*d)) < options(2) & max(abs(fnew-fold)) < options(3))
wolffd@0:       options(8) = fnew;
wolffd@0:       return;
wolffd@0: 
wolffd@0:     else
wolffd@0:       % Update variables for new position
wolffd@0:       fold = fnew;
wolffd@0:       gradold = gradnew;
wolffd@0:       gradnew = feval(gradf, x, varargin{:});
wolffd@0:       options(11) = options(11) + 1;
wolffd@0:       % If the gradient is zero then we are done.
wolffd@0:       if (gradnew*gradnew' == 0)
wolffd@0: 	options(8) = fnew;
wolffd@0: 	return;
wolffd@0:       end
wolffd@0:     end
wolffd@0:   end
wolffd@0: 
wolffd@0:   % Adjust beta according to comparison ratio.
wolffd@0:   if (Delta < 0.25)
wolffd@0:     beta = min(4.0*beta, betamax);
wolffd@0:   end
wolffd@0:   if (Delta > 0.75)
wolffd@0:     beta = max(0.5*beta, betamin);
wolffd@0:   end
wolffd@0: 
wolffd@0:   % Update search direction using Polak-Ribiere formula, or re-start 
wolffd@0:   % in direction of negative gradient after nparams steps.
wolffd@0:   if (nsuccess == nparams)
wolffd@0:     d = -gradnew;
wolffd@0:     nsuccess = 0;
wolffd@0:   else
wolffd@0:     if (success == 1)
wolffd@0:       gamma = (gradold - gradnew)*gradnew'/(mu);
wolffd@0:       d = gamma*d - gradnew;
wolffd@0:     end
wolffd@0:   end
wolffd@0:   j = j + 1;
wolffd@0: end
wolffd@0: 
wolffd@0: % If we get here, then we haven't terminated in the given number of 
wolffd@0: % iterations.
wolffd@0: 
wolffd@0: options(8) = fold;
wolffd@0: if (options(1) >= 0)
wolffd@0:   disp(maxitmess);
wolffd@0: end
wolffd@0: