wolffd@0
|
1 %DEMGPARD Demonstrate ARD using a Gaussian Process.
|
wolffd@0
|
2 %
|
wolffd@0
|
3 % Description
|
wolffd@0
|
4 % The data consists of three input variables X1, X2 and X3, and one
|
wolffd@0
|
5 % target variable T. The target data is generated by computing
|
wolffd@0
|
6 % SIN(2*PI*X1) and adding Gaussian noise, x2 is a copy of x1 with a
|
wolffd@0
|
7 % higher level of added noise, and x3 is sampled randomly from a
|
wolffd@0
|
8 % Gaussian distribution. A Gaussian Process, is trained by optimising
|
wolffd@0
|
9 % the hyperparameters using the scaled conjugate gradient algorithm.
|
wolffd@0
|
10 % The final values of the hyperparameters show that the model
|
wolffd@0
|
11 % successfully identifies the importance of each input.
|
wolffd@0
|
12 %
|
wolffd@0
|
13 % See also
|
wolffd@0
|
14 % DEMGP, GP, GPERR, GPFWD, GPGRAD, GPINIT, SCG
|
wolffd@0
|
15 %
|
wolffd@0
|
16
|
wolffd@0
|
17 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
18
|
wolffd@0
|
19 clc;
|
wolffd@0
|
20 randn('state', 1729);
|
wolffd@0
|
21 rand('state', 1729);
|
wolffd@0
|
22 disp('This demonstration illustrates the technique of automatic relevance')
|
wolffd@0
|
23 disp('determination (ARD) using a Gaussian Process.')
|
wolffd@0
|
24 disp(' ');
|
wolffd@0
|
25 disp('First, we set up a synthetic data set involving three input variables:')
|
wolffd@0
|
26 disp('x1 is sampled uniformly from the range (0,1) and has a low level of')
|
wolffd@0
|
27 disp('added Gaussian noise, x2 is a copy of x1 with a higher level of added')
|
wolffd@0
|
28 disp('noise, and x3 is sampled randomly from a Gaussian distribution. The')
|
wolffd@0
|
29 disp('single target variable is given by t = sin(2*pi*x1) with additive')
|
wolffd@0
|
30 disp('Gaussian noise. Thus x1 is very relevant for determining the target')
|
wolffd@0
|
31 disp('value, x2 is of some relevance, while x3 should in principle be')
|
wolffd@0
|
32 disp('irrelevant.')
|
wolffd@0
|
33 disp(' ');
|
wolffd@0
|
34 disp('Press any key to see a plot of t against x1.')
|
wolffd@0
|
35 pause;
|
wolffd@0
|
36
|
wolffd@0
|
37 ndata = 100;
|
wolffd@0
|
38 x1 = rand(ndata, 1);
|
wolffd@0
|
39 x2 = x1 + 0.05*randn(ndata, 1);
|
wolffd@0
|
40 x3 = 0.5 + 0.5*randn(ndata, 1);
|
wolffd@0
|
41 x = [x1, x2, x3];
|
wolffd@0
|
42 t = sin(2*pi*x1) + 0.1*randn(ndata, 1);
|
wolffd@0
|
43
|
wolffd@0
|
44 % Plot the data and the original function.
|
wolffd@0
|
45 h = figure;
|
wolffd@0
|
46 plotvals = linspace(0, 1, 200)';
|
wolffd@0
|
47 plot(x1, t, 'ob')
|
wolffd@0
|
48 hold on
|
wolffd@0
|
49 xlabel('Input x1')
|
wolffd@0
|
50 ylabel('Target')
|
wolffd@0
|
51 axis([0 1 -1.5 1.5])
|
wolffd@0
|
52 [fx, fy] = fplot('sin(2*pi*x)', [0 1]);
|
wolffd@0
|
53 plot(fx, fy, '-g', 'LineWidth', 2);
|
wolffd@0
|
54 legend('data', 'function');
|
wolffd@0
|
55
|
wolffd@0
|
56 disp(' ');
|
wolffd@0
|
57 disp('Press any key to continue')
|
wolffd@0
|
58 pause; clc;
|
wolffd@0
|
59
|
wolffd@0
|
60 disp('The Gaussian Process has a separate hyperparameter for each input.')
|
wolffd@0
|
61 disp('The hyperparameters are trained by error minimisation using the scaled.')
|
wolffd@0
|
62 disp('conjugate gradient optimiser.')
|
wolffd@0
|
63 disp(' ');
|
wolffd@0
|
64 disp('Press any key to create and train the model.')
|
wolffd@0
|
65 disp(' ');
|
wolffd@0
|
66 pause;
|
wolffd@0
|
67
|
wolffd@0
|
68 net = gp(3, 'sqexp');
|
wolffd@0
|
69 % Initialise the parameters.
|
wolffd@0
|
70 prior.pr_mean = 0;
|
wolffd@0
|
71 prior.pr_var = 0.1;
|
wolffd@0
|
72 net = gpinit(net, x, t, prior);
|
wolffd@0
|
73
|
wolffd@0
|
74 % Now train to find the hyperparameters.
|
wolffd@0
|
75 options = foptions;
|
wolffd@0
|
76 options(1) = 1;
|
wolffd@0
|
77 options(14) = 30;
|
wolffd@0
|
78
|
wolffd@0
|
79 [net, options] = netopt(net, options, x, t, 'scg');
|
wolffd@0
|
80
|
wolffd@0
|
81 rel = exp(net.inweights);
|
wolffd@0
|
82
|
wolffd@0
|
83 fprintf(1, ...
|
wolffd@0
|
84 '\nFinal hyperparameters:\n\n bias:\t\t%10.6f\n noise:\t%10.6f\n', ...
|
wolffd@0
|
85 exp(net.bias), exp(net.noise));
|
wolffd@0
|
86 fprintf(1, ' Vertical scale: %8.6f\n', exp(net.fpar(1)));
|
wolffd@0
|
87 fprintf(1, ' Input 1:\t%10.6f\n Input 2:\t%10.6f\n', ...
|
wolffd@0
|
88 rel(1), rel(2));
|
wolffd@0
|
89 fprintf(1, ' Input 3:\t%10.6f\n\n', rel(3));
|
wolffd@0
|
90 disp(' ');
|
wolffd@0
|
91 disp('We see that the inverse lengthscale associated with')
|
wolffd@0
|
92 disp('input x1 is large, that of x2 has an intermediate value and the variance')
|
wolffd@0
|
93 disp('of weights associated with x3 is small.')
|
wolffd@0
|
94 disp(' ');
|
wolffd@0
|
95 disp('This implies that the Gaussian Process is giving greatest emphasis')
|
wolffd@0
|
96 disp('to x1 and least emphasis to x3, with intermediate emphasis on')
|
wolffd@0
|
97 disp('x2 in the covariance function.')
|
wolffd@0
|
98 disp(' ')
|
wolffd@0
|
99 disp('Since the target t is statistically independent of x3 we might')
|
wolffd@0
|
100 disp('expect the weights associated with this input would go to')
|
wolffd@0
|
101 disp('zero. However, for any finite data set there may be some chance')
|
wolffd@0
|
102 disp('correlation between x3 and t, and so the corresponding hyperparameter remains')
|
wolffd@0
|
103 disp('finite.')
|
wolffd@0
|
104 disp('Press any key to continue.')
|
wolffd@0
|
105 pause
|
wolffd@0
|
106
|
wolffd@0
|
107 disp('Finally, we plot the output of the Gaussian Process along the line')
|
wolffd@0
|
108 disp('x1 = x2 = x3, together with the true underlying function.')
|
wolffd@0
|
109 xt = linspace(0, 1, 50);
|
wolffd@0
|
110 xtest = [xt', xt', xt'];
|
wolffd@0
|
111
|
wolffd@0
|
112 cn = gpcovar(net, x);
|
wolffd@0
|
113 cninv = inv(cn);
|
wolffd@0
|
114 [ytest, sigsq] = gpfwd(net, xtest, cninv);
|
wolffd@0
|
115 sig = sqrt(sigsq);
|
wolffd@0
|
116
|
wolffd@0
|
117 figure(h); hold on;
|
wolffd@0
|
118 plot(xt, ytest, '-k');
|
wolffd@0
|
119 plot(xt, ytest+(2*sig), '-b', xt, ytest-(2*sig), '-b');
|
wolffd@0
|
120 axis([0 1 -1.5 1.5]);
|
wolffd@0
|
121 fplot('sin(2*pi*x)', [0 1], '--m');
|
wolffd@0
|
122
|
wolffd@0
|
123 disp(' ');
|
wolffd@0
|
124 disp('Press any key to end.')
|
wolffd@0
|
125 pause; clc; close(h); clear all
|
wolffd@0
|
126
|