Daniel@0: %DEMOLGD1 Demonstrate simple MLP optimisation with on-line gradient descent
Daniel@0: %
Daniel@0: %	Description
Daniel@0: %	The problem consists of one input variable X and one target variable
Daniel@0: %	T with data generated by sampling X at equal intervals and then
Daniel@0: %	generating target data by computing SIN(2*PI*X) and adding Gaussian
Daniel@0: %	noise. A 2-layer network with linear outputs is trained by minimizing
Daniel@0: %	a  sum-of-squares error function using on-line gradient descent.
Daniel@0: %
Daniel@0: %	See also
Daniel@0: %	DEMMLP1, OLGD
Daniel@0: %
Daniel@0: 
Daniel@0: %	Copyright (c) Ian T Nabney (1996-2001)
Daniel@0: 
Daniel@0: 
Daniel@0: % Generate the matrix of inputs x and targets t.
Daniel@0: 
Daniel@0: ndata = 20;			% Number of data points.
Daniel@0: noise = 0.2;			% Standard deviation of noise distribution.
Daniel@0: x = [0:1/(ndata - 1):1]';
Daniel@0: randn('state', 42);
Daniel@0: rand('state', 42);
Daniel@0: t = sin(2*pi*x) + noise*randn(ndata, 1);
Daniel@0: 
Daniel@0: clc
Daniel@0: disp('This demonstration illustrates the use of the on-line gradient')
Daniel@0: disp('descent algorithm to train a Multi-Layer Perceptron network for')
Daniel@0: disp('regression problems.  It is intended to illustrate the drawbacks')
Daniel@0: disp('of this algorithm compared to more powerful non-linear optimisation')
Daniel@0: disp('algorithms, such as conjugate gradients.')
Daniel@0: disp(' ')
Daniel@0: disp('First we generate the data from a noisy sine function and construct')
Daniel@0: disp('the network.')
Daniel@0: disp(' ')
Daniel@0: disp('Press any key to continue.')
Daniel@0: pause
Daniel@0: % Set up network parameters.
Daniel@0: nin = 1;			% Number of inputs.
Daniel@0: nhidden = 3;			% Number of hidden units.
Daniel@0: nout = 1;			% Number of outputs.
Daniel@0: alpha = 0.01;			% Coefficient of weight-decay prior. 
Daniel@0: 
Daniel@0: % Create and initialize network weight vector.
Daniel@0: net = mlp(nin, nhidden, nout, 'linear');
Daniel@0: % Initialise weights reasonably close to 0
Daniel@0: net = mlpinit(net, 10);
Daniel@0: 
Daniel@0: % Set up vector of options for the optimiser.
Daniel@0: options = foptions;
Daniel@0: options(1) = 1;			% This provides display of error values.
Daniel@0: options(14) = 20;		% Number of training cycles. 
Daniel@0: options(18) = 0.1;		% Learning rate
Daniel@0: %options(17) = 0.4;		% Momentum
Daniel@0: options(17) = 0.4;		% Momentum
Daniel@0: options(5) = 1; 		% Do randomise pattern order
Daniel@0: clc
Daniel@0: disp('Then we set the options for the training algorithm.')
Daniel@0: disp(['In the first phase of training, which lasts for ',...
Daniel@0:     num2str(options(14)), ' cycles,'])
Daniel@0: disp(['the learning rate is ', num2str(options(18)), ...
Daniel@0:     ' and the momentum is ', num2str(options(17)), '.'])
Daniel@0: disp('The error values are displayed at the end of each pass through the')
Daniel@0: disp('entire pattern set.')
Daniel@0: disp(' ')
Daniel@0: disp('Press any key to continue.')
Daniel@0: pause
Daniel@0: 
Daniel@0: % Train using online gradient descent
Daniel@0: [net, options] = olgd(net, options, x, t);
Daniel@0: 
Daniel@0: % Now allow learning rate to decay and remove momentum
Daniel@0: options(2) = 0;
Daniel@0: options(3) = 0;
Daniel@0: options(17) = 0.4;	% Turn off momentum
Daniel@0: options(5) = 1;		% Randomise pattern order
Daniel@0: options(6) = 1;		% Set learning rate decay on
Daniel@0: options(14) = 200;
Daniel@0: options(18) = 0.1;	% Initial learning rate
Daniel@0: 
Daniel@0: disp(['In the second phase of training, which lasts for up to ',...
Daniel@0:     num2str(options(14)), ' cycles,'])
Daniel@0: disp(['the learning rate starts at ', num2str(options(18)), ...
Daniel@0:     ', decaying at 1/t and the momentum is ', num2str(options(17)), '.'])
Daniel@0: disp(' ')
Daniel@0: disp('Press any key to continue.')
Daniel@0: pause
Daniel@0: [net, options] = olgd(net, options, x, t);
Daniel@0: 
Daniel@0: clc
Daniel@0: disp('Now we plot the data, underlying function, and network outputs')
Daniel@0: disp('on a single graph to compare the results.')
Daniel@0: disp(' ')
Daniel@0: disp('Press any key to continue.')
Daniel@0: pause
Daniel@0: 
Daniel@0: % Plot the data, the original function, and the trained network function.
Daniel@0: plotvals = [0:0.01:1]';
Daniel@0: y = mlpfwd(net, plotvals);
Daniel@0: fh1 = figure;
Daniel@0: plot(x, t, 'ob')
Daniel@0: hold on
Daniel@0: axis([0 1 -1.5 1.5])
Daniel@0: fplot('sin(2*pi*x)', [0 1], '--g')
Daniel@0: plot(plotvals, y, '-r')
Daniel@0: legend('data', 'function', 'network');
Daniel@0: hold off
Daniel@0: 
Daniel@0: disp('Note the very poor fit to the data: this should be compared with')
Daniel@0: disp('the results obtained in demmlp1.')
Daniel@0: disp(' ')
Daniel@0: disp('Press any key to exit.')
Daniel@0: pause
Daniel@0: close(fh1);
Daniel@0: clear all;