wolffd@0
|
1 function demopt1(xinit)
|
wolffd@0
|
2 %DEMOPT1 Demonstrate different optimisers on Rosenbrock's function.
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % Description
|
wolffd@0
|
5 % The four general optimisers (quasi-Newton, conjugate gradients,
|
wolffd@0
|
6 % scaled conjugate gradients, and gradient descent) are applied to the
|
wolffd@0
|
7 % minimisation of Rosenbrock's well known `banana' function. Each
|
wolffd@0
|
8 % optimiser is run for at most 100 cycles, and a stopping criterion of
|
wolffd@0
|
9 % 1.0e-4 is used for both position and function value. At the end, the
|
wolffd@0
|
10 % trajectory of each algorithm is shown on a contour plot of the
|
wolffd@0
|
11 % function.
|
wolffd@0
|
12 %
|
wolffd@0
|
13 % DEMOPT1(XINIT) allows the user to specify a row vector with two
|
wolffd@0
|
14 % columns as the starting point. The default is the point [-1 1]. Note
|
wolffd@0
|
15 % that the contour plot has an x range of [-1.5, 1.5] and a y range of
|
wolffd@0
|
16 % [-0.5, 2.1], so it is best to choose a starting point in the same
|
wolffd@0
|
17 % region.
|
wolffd@0
|
18 %
|
wolffd@0
|
19 % See also
|
wolffd@0
|
20 % CONJGRAD, GRADDESC, QUASINEW, SCG, ROSEN, ROSEGRAD
|
wolffd@0
|
21 %
|
wolffd@0
|
22
|
wolffd@0
|
23 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
24
|
wolffd@0
|
25 % Initialise start point for search
|
wolffd@0
|
26 if nargin < 1 | size(xinit) ~= [1 2]
|
wolffd@0
|
27 xinit = [-1 1]; % Traditional start point
|
wolffd@0
|
28 end
|
wolffd@0
|
29
|
wolffd@0
|
30 % Find out if flops is available (i.e. pre-version 6 Matlab)
|
wolffd@0
|
31 v = version;
|
wolffd@0
|
32 if (str2num(strtok(v, '.')) >= 6)
|
wolffd@0
|
33 flops_works = logical(0);
|
wolffd@0
|
34 else
|
wolffd@0
|
35 flops_works = logical(1);
|
wolffd@0
|
36 end
|
wolffd@0
|
37
|
wolffd@0
|
38 % Set up options
|
wolffd@0
|
39 options = foptions; % Standard options
|
wolffd@0
|
40 options(1) = -1; % Turn off printing completely
|
wolffd@0
|
41 options(3) = 1e-8; % Tolerance in value of function
|
wolffd@0
|
42 options(14) = 100; % Max. 100 iterations of algorithm
|
wolffd@0
|
43
|
wolffd@0
|
44 clc
|
wolffd@0
|
45 disp('This demonstration compares the performance of four generic')
|
wolffd@0
|
46 disp('optimisation routines when finding the minimum of Rosenbrock''s')
|
wolffd@0
|
47 disp('function y = 100*(x2-x1^2)^2 + (1-x1)^2.')
|
wolffd@0
|
48 disp(' ')
|
wolffd@0
|
49 disp('The global minimum of this function is at [1 1].')
|
wolffd@0
|
50 disp(['Each algorithm starts at the point [' num2str(xinit(1))...
|
wolffd@0
|
51 ' ' num2str(xinit(2)) '].'])
|
wolffd@0
|
52 disp(' ')
|
wolffd@0
|
53 disp('Press any key to continue.')
|
wolffd@0
|
54 pause
|
wolffd@0
|
55
|
wolffd@0
|
56 % Generate a contour plot of the function
|
wolffd@0
|
57 a = -1.5:.02:1.5;
|
wolffd@0
|
58 b = -0.5:.02:2.1;
|
wolffd@0
|
59 [A, B] = meshgrid(a, b);
|
wolffd@0
|
60 Z = rosen([A(:), B(:)]);
|
wolffd@0
|
61 Z = reshape(Z, length(b), length(a));
|
wolffd@0
|
62 l = -1:6;
|
wolffd@0
|
63 v = 2.^l;
|
wolffd@0
|
64 fh1 = figure;
|
wolffd@0
|
65 contour(a, b, Z, v)
|
wolffd@0
|
66 title('Contour plot of Rosenbrock''s function')
|
wolffd@0
|
67 hold on
|
wolffd@0
|
68
|
wolffd@0
|
69 clc
|
wolffd@0
|
70 disp('We now use quasi-Newton, conjugate gradient, scaled conjugate')
|
wolffd@0
|
71 disp('gradient, and gradient descent with line search algorithms')
|
wolffd@0
|
72 disp('to find a local minimum of this function. Each algorithm is stopped')
|
wolffd@0
|
73 disp('when 100 cycles have elapsed, or if the change in function value')
|
wolffd@0
|
74 disp('is less than 1.0e-8 or the change in the input vector is less than')
|
wolffd@0
|
75 disp('1.0e-4 in magnitude.')
|
wolffd@0
|
76 disp(' ')
|
wolffd@0
|
77 disp('Press any key to continue.')
|
wolffd@0
|
78 pause
|
wolffd@0
|
79
|
wolffd@0
|
80 clc
|
wolffd@0
|
81 x = xinit;
|
wolffd@0
|
82 flops(0)
|
wolffd@0
|
83 [x, options, errlog, pointlog] = quasinew('rosen', x, options, 'rosegrad');
|
wolffd@0
|
84 fprintf(1, 'For quasi-Newton method:\n')
|
wolffd@0
|
85 fprintf(1, 'Final point is (%f, %f), value is %f\n', x(1), x(2), options(8))
|
wolffd@0
|
86 fprintf(1, 'Number of function evaluations is %d\n', options(10))
|
wolffd@0
|
87 fprintf(1, 'Number of gradient evaluations is %d\n', options(11))
|
wolffd@0
|
88 if flops_works
|
wolffd@0
|
89 opt_flops = flops;
|
wolffd@0
|
90 fprintf(1, 'Number of floating point operations is %d\n', opt_flops)
|
wolffd@0
|
91 end
|
wolffd@0
|
92 fprintf(1, 'Number of cycles is %d\n', size(pointlog, 1) - 1);
|
wolffd@0
|
93 disp(' ')
|
wolffd@0
|
94
|
wolffd@0
|
95 x = xinit;
|
wolffd@0
|
96 flops(0)
|
wolffd@0
|
97 [x, options, errlog2, pointlog2] = conjgrad('rosen', x, options, 'rosegrad');
|
wolffd@0
|
98 fprintf(1, 'For conjugate gradient method:\n')
|
wolffd@0
|
99 fprintf(1, 'Final point is (%f, %f), value is %f\n', x(1), x(2), options(8))
|
wolffd@0
|
100 fprintf(1, 'Number of function evaluations is %d\n', options(10))
|
wolffd@0
|
101 fprintf(1, 'Number of gradient evaluations is %d\n', options(11))
|
wolffd@0
|
102 if flops_works
|
wolffd@0
|
103 opt_flops = flops;
|
wolffd@0
|
104 fprintf(1, 'Number of floating point operations is %d\n', ...
|
wolffd@0
|
105 opt_flops)
|
wolffd@0
|
106 end
|
wolffd@0
|
107 fprintf(1, 'Number of cycles is %d\n', size(pointlog2, 1) - 1);
|
wolffd@0
|
108 disp(' ')
|
wolffd@0
|
109
|
wolffd@0
|
110 x = xinit;
|
wolffd@0
|
111 flops(0)
|
wolffd@0
|
112 [x, options, errlog3, pointlog3] = scg('rosen', x, options, 'rosegrad');
|
wolffd@0
|
113 fprintf(1, 'For scaled conjugate gradient method:\n')
|
wolffd@0
|
114 fprintf(1, 'Final point is (%f, %f), value is %f\n', x(1), x(2), options(8))
|
wolffd@0
|
115 fprintf(1, 'Number of function evaluations is %d\n', options(10))
|
wolffd@0
|
116 fprintf(1, 'Number of gradient evaluations is %d\n', options(11))
|
wolffd@0
|
117 if flops_works
|
wolffd@0
|
118 opt_flops = flops;
|
wolffd@0
|
119 fprintf(1, 'Number of floating point operations is %d\n', opt_flops)
|
wolffd@0
|
120 end
|
wolffd@0
|
121 fprintf(1, 'Number of cycles is %d\n', size(pointlog3, 1) - 1);
|
wolffd@0
|
122 disp(' ')
|
wolffd@0
|
123
|
wolffd@0
|
124 x = xinit;
|
wolffd@0
|
125 options(7) = 1; % Line minimisation used
|
wolffd@0
|
126 flops(0)
|
wolffd@0
|
127 [x, options, errlog4, pointlog4] = graddesc('rosen', x, options, 'rosegrad');
|
wolffd@0
|
128 fprintf(1, 'For gradient descent method:\n')
|
wolffd@0
|
129 fprintf(1, 'Final point is (%f, %f), value is %f\n', x(1), x(2), options(8))
|
wolffd@0
|
130 fprintf(1, 'Number of function evaluations is %d\n', options(10))
|
wolffd@0
|
131 fprintf(1, 'Number of gradient evaluations is %d\n', options(11))
|
wolffd@0
|
132 if flops_works
|
wolffd@0
|
133 opt_flops = flops;
|
wolffd@0
|
134 fprintf(1, 'Number of floating point operations is %d\n', opt_flops)
|
wolffd@0
|
135 end
|
wolffd@0
|
136 fprintf(1, 'Number of cycles is %d\n', size(pointlog4, 1) - 1);
|
wolffd@0
|
137 disp(' ')
|
wolffd@0
|
138 disp('Note that gradient descent does not reach a local minimum in')
|
wolffd@0
|
139 disp('100 cycles.')
|
wolffd@0
|
140 disp(' ')
|
wolffd@0
|
141 disp('On this problem, where the function is cheap to evaluate, the')
|
wolffd@0
|
142 disp('computational effort is dominated by the algorithm overhead.')
|
wolffd@0
|
143 disp('However on more complex optimisation problems (such as those')
|
wolffd@0
|
144 disp('involving neural networks), computational effort is dominated by')
|
wolffd@0
|
145 disp('the number of function and gradient evaluations. Counting these,')
|
wolffd@0
|
146 disp('we can rank the algorithms: quasi-Newton (the best), conjugate')
|
wolffd@0
|
147 disp('gradient, scaled conjugate gradient, gradient descent (the worst)')
|
wolffd@0
|
148 disp(' ')
|
wolffd@0
|
149 disp('Press any key to continue.')
|
wolffd@0
|
150 pause
|
wolffd@0
|
151 clc
|
wolffd@0
|
152 disp('We now plot the trajectory of search points for each algorithm')
|
wolffd@0
|
153 disp('superimposed on the contour plot.')
|
wolffd@0
|
154 disp(' ')
|
wolffd@0
|
155 disp('Press any key to continue.')
|
wolffd@0
|
156 pause
|
wolffd@0
|
157 plot(pointlog4(:,1), pointlog4(:,2), 'bd', 'MarkerSize', 6)
|
wolffd@0
|
158 plot(pointlog3(:,1), pointlog3(:,2), 'mx', 'MarkerSize', 6, 'LineWidth', 2)
|
wolffd@0
|
159 plot(pointlog(:,1), pointlog(:,2), 'k.', 'MarkerSize', 18)
|
wolffd@0
|
160 plot(pointlog2(:,1), pointlog2(:,2), 'g+', 'MarkerSize', 6, 'LineWidth', 2)
|
wolffd@0
|
161 lh = legend( 'Gradient Descent', 'Scaled Conjugate Gradients', ...
|
wolffd@0
|
162 'Quasi Newton', 'Conjugate Gradients');
|
wolffd@0
|
163
|
wolffd@0
|
164 hold off
|
wolffd@0
|
165
|
wolffd@0
|
166 clc
|
wolffd@0
|
167 disp('Press any key to end.')
|
wolffd@0
|
168 pause
|
wolffd@0
|
169 close(fh1);
|
wolffd@0
|
170 clear all; |