wolffd@0
|
1 % Check that softmax works with a simple classification demo.
|
wolffd@0
|
2 % Based on netlab's demglm2
|
wolffd@0
|
3 % X -> Q where X is an input node, and Q is a softmax
|
wolffd@0
|
4
|
wolffd@0
|
5 rand('state', 0);
|
wolffd@0
|
6 randn('state', 0);
|
wolffd@0
|
7
|
wolffd@0
|
8 % Check inference
|
wolffd@0
|
9
|
wolffd@0
|
10 input_dim = 2;
|
wolffd@0
|
11 num_classes = 3;
|
wolffd@0
|
12 IRLS_iter = 3;
|
wolffd@0
|
13
|
wolffd@0
|
14 net = glm(input_dim, num_classes, 'softmax');
|
wolffd@0
|
15
|
wolffd@0
|
16 dag = zeros(2);
|
wolffd@0
|
17 dag(1,2) = 1;
|
wolffd@0
|
18 discrete_nodes = [2];
|
wolffd@0
|
19 bnet = mk_bnet(dag, [input_dim num_classes], 'discrete', discrete_nodes, 'observed', 1);
|
wolffd@0
|
20 bnet.CPD{1} = root_CPD(bnet, 1);
|
wolffd@0
|
21 clamped = 0;
|
wolffd@0
|
22 bnet.CPD{2} = softmax_CPD(bnet, 2, net.w1, net.b1, clamped, IRLS_iter);
|
wolffd@0
|
23
|
wolffd@0
|
24 engine = jtree_inf_engine(bnet);
|
wolffd@0
|
25
|
wolffd@0
|
26 x = rand(1, input_dim);
|
wolffd@0
|
27 q = glmfwd(net, x);
|
wolffd@0
|
28
|
wolffd@0
|
29 [engine, ll] = enter_evidence(engine, {x, []});
|
wolffd@0
|
30 m = marginal_nodes(engine, 2);
|
wolffd@0
|
31 assert(approxeq(m.T(:), q(:)));
|
wolffd@0
|
32
|
wolffd@0
|
33
|
wolffd@0
|
34 % Check learning
|
wolffd@0
|
35 % We use EM, but in fact there is no hidden data.
|
wolffd@0
|
36 % The M step will call IRLS on the softmax node.
|
wolffd@0
|
37
|
wolffd@0
|
38 % Generate data from three classes in 2d
|
wolffd@0
|
39 input_dim = 2;
|
wolffd@0
|
40 num_classes = 3;
|
wolffd@0
|
41
|
wolffd@0
|
42 % Fix seeds for reproducible results
|
wolffd@0
|
43 randn('state', 42);
|
wolffd@0
|
44 rand('state', 42);
|
wolffd@0
|
45
|
wolffd@0
|
46 ndata = 10;
|
wolffd@0
|
47 % Generate mixture of three Gaussians in two dimensional space
|
wolffd@0
|
48 data = randn(ndata, input_dim);
|
wolffd@0
|
49 targets = zeros(ndata, 3);
|
wolffd@0
|
50
|
wolffd@0
|
51 % Priors for the clusters
|
wolffd@0
|
52 prior(1) = 0.4;
|
wolffd@0
|
53 prior(2) = 0.3;
|
wolffd@0
|
54 prior(3) = 0.3;
|
wolffd@0
|
55
|
wolffd@0
|
56 % Cluster centres
|
wolffd@0
|
57 c = [2.0, 2.0; 0.0, 0.0; 1, -1];
|
wolffd@0
|
58
|
wolffd@0
|
59 ndata1 = prior(1)*ndata;
|
wolffd@0
|
60 ndata2 = (prior(1) + prior(2))*ndata;
|
wolffd@0
|
61 % Put first cluster at (2, 2)
|
wolffd@0
|
62 data(1:ndata1, 1) = data(1:ndata1, 1) * 0.5 + c(1,1);
|
wolffd@0
|
63 data(1:ndata1, 2) = data(1:ndata1, 2) * 0.5 + c(1,2);
|
wolffd@0
|
64 targets(1:ndata1, 1) = 1;
|
wolffd@0
|
65
|
wolffd@0
|
66 % Leave second cluster at (0,0)
|
wolffd@0
|
67 data((ndata1 + 1):ndata2, :) = ...
|
wolffd@0
|
68 data((ndata1 + 1):ndata2, :);
|
wolffd@0
|
69 targets((ndata1+1):ndata2, 2) = 1;
|
wolffd@0
|
70
|
wolffd@0
|
71 data((ndata2+1):ndata, 1) = data((ndata2+1):ndata,1) *0.6 + c(3, 1);
|
wolffd@0
|
72 data((ndata2+1):ndata, 2) = data((ndata2+1):ndata,2) *0.6 + c(3, 2);
|
wolffd@0
|
73 targets((ndata2+1):ndata, 3) = 1;
|
wolffd@0
|
74
|
wolffd@0
|
75
|
wolffd@0
|
76 if 0
|
wolffd@0
|
77 ndata = 1;
|
wolffd@0
|
78 data = x;
|
wolffd@0
|
79 targets = [1 0 0];
|
wolffd@0
|
80 end
|
wolffd@0
|
81
|
wolffd@0
|
82 options = foptions;
|
wolffd@0
|
83 options(1) = -1; % verbose
|
wolffd@0
|
84 options(14) = IRLS_iter;
|
wolffd@0
|
85 [net2, options2] = glmtrain(net, options, data, targets);
|
wolffd@0
|
86 net2.ll = options2(8); % type 'help foptions' for details
|
wolffd@0
|
87
|
wolffd@0
|
88 cases = cell(2, ndata);
|
wolffd@0
|
89 for l=1:ndata
|
wolffd@0
|
90 q = find(targets(l,:)==1);
|
wolffd@0
|
91 x = data(l,:);
|
wolffd@0
|
92 cases{1,l} = x(:);
|
wolffd@0
|
93 cases{2,l} = q;
|
wolffd@0
|
94 end
|
wolffd@0
|
95
|
wolffd@0
|
96 max_iter = 2; % we have complete observability, so 1 iter is enough
|
wolffd@0
|
97 [bnet2, ll2] = learn_params_em(engine, cases, max_iter);
|
wolffd@0
|
98
|
wolffd@0
|
99 w = get_field(bnet2.CPD{2},'weights');
|
wolffd@0
|
100 b = get_field(bnet2.CPD{2},'offset')';
|
wolffd@0
|
101
|
wolffd@0
|
102 w
|
wolffd@0
|
103 net2.w1
|
wolffd@0
|
104
|
wolffd@0
|
105 b
|
wolffd@0
|
106 net2.b1
|
wolffd@0
|
107
|
wolffd@0
|
108 % assert(approxeq(net2.ll, ll2)); % glmtrain returns ll after final M step, learn_params before
|
wolffd@0
|
109
|