wolffd@0
|
1 % Fit a piece-wise linear regression model.
|
wolffd@0
|
2 % Here is the model
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % X \
|
wolffd@0
|
5 % | |
|
wolffd@0
|
6 % Q |
|
wolffd@0
|
7 % | /
|
wolffd@0
|
8 % Y
|
wolffd@0
|
9 %
|
wolffd@0
|
10 % where all arcs point down.
|
wolffd@0
|
11 % We condition everything on X, so X is a root node. Q is a softmax, and Y is a linear Gaussian.
|
wolffd@0
|
12 % Q is hidden, X and Y are observed.
|
wolffd@0
|
13
|
wolffd@0
|
14 X = 1;
|
wolffd@0
|
15 Q = 2;
|
wolffd@0
|
16 Y = 3;
|
wolffd@0
|
17 dag = zeros(3,3);
|
wolffd@0
|
18 dag(X,[Q Y]) = 1;
|
wolffd@0
|
19 dag(Q,Y) = 1;
|
wolffd@0
|
20 ns = [1 2 1]; % make X and Y scalars, and have 2 experts
|
wolffd@0
|
21 dnodes = [2];
|
wolffd@0
|
22 onodes = [1 3];
|
wolffd@0
|
23 bnet = mk_bnet(dag, ns, 'discrete', dnodes, 'observed', onodes);
|
wolffd@0
|
24
|
wolffd@0
|
25
|
wolffd@0
|
26 w = [-5 5]; % w(:,i) is the normal vector to the i'th decisions boundary
|
wolffd@0
|
27 b = [0 0]; % b(i) is the offset (bias) to the i'th decisions boundary
|
wolffd@0
|
28
|
wolffd@0
|
29 mu = [0 0];
|
wolffd@0
|
30 sigma = 1;
|
wolffd@0
|
31 Sigma = repmat(sigma*eye(ns(Y)), [ns(Y) ns(Y) ns(Q)]);
|
wolffd@0
|
32 W = [-1 1];
|
wolffd@0
|
33 W2 = reshape(W, [ns(Y) ns(X) ns(Q)]);
|
wolffd@0
|
34
|
wolffd@0
|
35 bnet.CPD{1} = root_CPD(bnet, 1);
|
wolffd@0
|
36 bnet.CPD{2} = softmax_CPD(bnet, 2, w, b);
|
wolffd@0
|
37 bnet.CPD{3} = gaussian_CPD(bnet, 3, 'mean', mu, 'cov', Sigma, 'weights', W2);
|
wolffd@0
|
38
|
wolffd@0
|
39
|
wolffd@0
|
40
|
wolffd@0
|
41 % Check inference
|
wolffd@0
|
42
|
wolffd@0
|
43 x = 0.1;
|
wolffd@0
|
44 ystar = 1;
|
wolffd@0
|
45
|
wolffd@0
|
46 engine = jtree_inf_engine(bnet);
|
wolffd@0
|
47 [engine, loglik] = enter_evidence(engine, {x, [], ystar});
|
wolffd@0
|
48 Qpost = marginal_nodes(engine, 2);
|
wolffd@0
|
49
|
wolffd@0
|
50 % eta(i,:) = softmax (gating) params for expert i
|
wolffd@0
|
51 eta = [b' w'];
|
wolffd@0
|
52
|
wolffd@0
|
53 % theta(i,:) = regression vector for expert i
|
wolffd@0
|
54 theta = [mu' W'];
|
wolffd@0
|
55
|
wolffd@0
|
56 % yhat(i) = E[y | Q=i, x] = prediction of i'th expert
|
wolffd@0
|
57 x1 = [1 x]';
|
wolffd@0
|
58 yhat = theta * x1;
|
wolffd@0
|
59
|
wolffd@0
|
60 % gate_prior(i,:) = Pr(Q=i | x)
|
wolffd@0
|
61 gate_prior = normalise(exp(eta * x1));
|
wolffd@0
|
62
|
wolffd@0
|
63 % cond_lik(i) = Pr(y | Q=i, x)
|
wolffd@0
|
64 cond_lik = (1/(sqrt(2*pi)*sigma)) * exp(-(0.5/sigma^2) * ((ystar - yhat) .* (ystar - yhat)));
|
wolffd@0
|
65
|
wolffd@0
|
66 % gate_posterior(i,:) = Pr(Q=i | x, y)
|
wolffd@0
|
67 [gate_posterior, lik] = normalise(gate_prior .* cond_lik);
|
wolffd@0
|
68
|
wolffd@0
|
69 assert(approxeq(gate_posterior(:), Qpost.T(:)));
|
wolffd@0
|
70 assert(approxeq(log(lik), loglik));
|
wolffd@0
|
71
|
wolffd@0
|
72
|