wolffd@0
|
1 % Fit a piece-wise linear regression model.
|
wolffd@0
|
2 % Here is the model
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % X \
|
wolffd@0
|
5 % | |
|
wolffd@0
|
6 % Q |
|
wolffd@0
|
7 % | /
|
wolffd@0
|
8 % Y
|
wolffd@0
|
9 %
|
wolffd@0
|
10 % where all arcs point down.
|
wolffd@0
|
11 % We condition everything on X, so X is a root node. Q is a softmax, and Y is a linear Gaussian.
|
wolffd@0
|
12 % Q is hidden, X and Y are observed.
|
wolffd@0
|
13
|
wolffd@0
|
14 X = 1;
|
wolffd@0
|
15 Q = 2;
|
wolffd@0
|
16 Y = 3;
|
wolffd@0
|
17 dag = zeros(3,3);
|
wolffd@0
|
18 dag(X,[Q Y]) = 1;
|
wolffd@0
|
19 dag(Q,Y) = 1;
|
wolffd@0
|
20 ns = [1 2 1]; % make X and Y scalars, and have 2 experts
|
wolffd@0
|
21 dnodes = [2];
|
wolffd@0
|
22 onodes = [1 3];
|
wolffd@0
|
23 bnet = mk_bnet(dag, ns, 'discrete', dnodes, 'observed', onodes);
|
wolffd@0
|
24
|
wolffd@0
|
25 IRLS_iter = 10;
|
wolffd@0
|
26 clamped = 0;
|
wolffd@0
|
27
|
wolffd@0
|
28 bnet.CPD{1} = root_CPD(bnet, 1);
|
wolffd@0
|
29
|
wolffd@0
|
30 if 0
|
wolffd@0
|
31 % start with good initial params
|
wolffd@0
|
32 w = [-5 5]; % w(:,i) is the normal vector to the i'th decisions boundary
|
wolffd@0
|
33 b = [0 0]; % b(i) is the offset (bias) to the i'th decisions boundary
|
wolffd@0
|
34
|
wolffd@0
|
35 mu = [0 0];
|
wolffd@0
|
36 sigma = 1;
|
wolffd@0
|
37 Sigma = repmat(sigma*eye(ns(Y)), [ns(Y) ns(Y) ns(Q)]);
|
wolffd@0
|
38 W = [-1 1];
|
wolffd@0
|
39 W2 = reshape(W, [ns(Y) ns(X) ns(Q)]);
|
wolffd@0
|
40
|
wolffd@0
|
41 bnet.CPD{2} = softmax_CPD(bnet, 2, w, b, clamped, IRLS_iter);
|
wolffd@0
|
42 bnet.CPD{3} = gaussian_CPD(bnet, 3, mu, Sigma, W2);
|
wolffd@0
|
43 else
|
wolffd@0
|
44 % start with rnd initial params
|
wolffd@0
|
45 rand('state', 0);
|
wolffd@0
|
46 randn('state', 0);
|
wolffd@0
|
47 bnet.CPD{2} = softmax_CPD(bnet, 2, 'clamped', clamped, 'max_iter', IRLS_iter);
|
wolffd@0
|
48 bnet.CPD{3} = gaussian_CPD(bnet, 3);
|
wolffd@0
|
49 end
|
wolffd@0
|
50
|
wolffd@0
|
51
|
wolffd@0
|
52
|
wolffd@0
|
53 load('/examples/static/Misc/mixexp_data.txt', '-ascii');
|
wolffd@0
|
54 % Just use 1/10th of the data, to speed things up
|
wolffd@0
|
55 data = mixexp_data(1:10:end, :);
|
wolffd@0
|
56 %data = mixexp_data;
|
wolffd@0
|
57
|
wolffd@0
|
58 %plot(data(:,1), data(:,2), '.')
|
wolffd@0
|
59
|
wolffd@0
|
60
|
wolffd@0
|
61 s = struct(bnet.CPD{2}); % violate object privacy
|
wolffd@0
|
62 %eta0 = [s.glim.b1; s.glim.w1]';
|
wolffd@0
|
63 eta0 = [s.glim{1}.b1; s.glim{1}.w1]';
|
wolffd@0
|
64 s = struct(bnet.CPD{3}); % violate object privacy
|
wolffd@0
|
65 W = reshape(s.weights, [1 2]);
|
wolffd@0
|
66 theta0 = [s.mean; W]';
|
wolffd@0
|
67
|
wolffd@0
|
68 %figure(1)
|
wolffd@0
|
69 %mixexp_plot(theta0, eta0, data);
|
wolffd@0
|
70 %suptitle('before learning')
|
wolffd@0
|
71
|
wolffd@0
|
72 ncases = size(data, 1);
|
wolffd@0
|
73 cases = cell(3, ncases);
|
wolffd@0
|
74 cases([1 3], :) = num2cell(data');
|
wolffd@0
|
75
|
wolffd@0
|
76 engine = jtree_inf_engine(bnet);
|
wolffd@0
|
77
|
wolffd@0
|
78 % log lik before learning
|
wolffd@0
|
79 ll = 0;
|
wolffd@0
|
80 for l=1:ncases
|
wolffd@0
|
81 ev = cases(:,l);
|
wolffd@0
|
82 [engine, loglik] = enter_evidence(engine, ev);
|
wolffd@0
|
83 ll = ll + loglik;
|
wolffd@0
|
84 end
|
wolffd@0
|
85
|
wolffd@0
|
86 % do learning
|
wolffd@0
|
87 max_iter = 5;
|
wolffd@0
|
88 [bnet2, LL2] = learn_params_em(engine, cases, max_iter);
|
wolffd@0
|
89
|
wolffd@0
|
90 s = struct(bnet2.CPD{2});
|
wolffd@0
|
91 %eta2 = [s.glim.b1; s.glim.w1]';
|
wolffd@0
|
92 eta2 = [s.glim{1}.b1; s.glim{1}.w1]';
|
wolffd@0
|
93 s = struct(bnet2.CPD{3});
|
wolffd@0
|
94 W = reshape(s.weights, [1 2]);
|
wolffd@0
|
95 theta2 = [s.mean; W]';
|
wolffd@0
|
96
|
wolffd@0
|
97 %figure(2)
|
wolffd@0
|
98 %mixexp_plot(theta2, eta2, data);
|
wolffd@0
|
99 %suptitle('after learning')
|
wolffd@0
|
100
|
wolffd@0
|
101 fprintf('mixexp2: loglik before learning %f, after %d iters %f\n', ll, length(LL2), LL2(end));
|
wolffd@0
|
102
|
wolffd@0
|
103
|
wolffd@0
|
104
|