wolffd@0
|
1 % Try to learn a 3 level HHMM similar to mk_square_hhmm
|
wolffd@0
|
2 % from synthetic discrete sequences
|
wolffd@0
|
3
|
wolffd@0
|
4
|
wolffd@0
|
5 discrete_obs = 1;
|
wolffd@0
|
6 supervised = 0;
|
wolffd@0
|
7 obs_finalF2 = 0;
|
wolffd@0
|
8
|
wolffd@0
|
9 seed = 1;
|
wolffd@0
|
10 rand('state', seed);
|
wolffd@0
|
11 randn('state', seed);
|
wolffd@0
|
12
|
wolffd@0
|
13 bnet_init = mk_square_hhmm(discrete_obs, 0);
|
wolffd@0
|
14
|
wolffd@0
|
15 ss = 6;
|
wolffd@0
|
16 Q1 = 1; Q2 = 2; Q3 = 3; F3 = 4; F2 = 5; Onode = 6;
|
wolffd@0
|
17 Qnodes = [Q1 Q2 Q3]; Fnodes = [F2 F3];
|
wolffd@0
|
18
|
wolffd@0
|
19 if supervised
|
wolffd@0
|
20 bnet_init.observed = [Q1 Q2 Onode];
|
wolffd@0
|
21 else
|
wolffd@0
|
22 bnet_init.observed = [Onode];
|
wolffd@0
|
23 end
|
wolffd@0
|
24
|
wolffd@0
|
25 if obs_finalF2
|
wolffd@0
|
26 engine_init = jtree_dbn_inf_engine(bnet_init);
|
wolffd@0
|
27 % can't use ndx version because sometimes F2 is hidden, sometimes observed
|
wolffd@0
|
28 error('can''t observe F when learning')
|
wolffd@0
|
29 % It is not possible to observe F2 if we learn
|
wolffd@0
|
30 % because the update_ess method for hhmmF_CPD and hhmmQ_CPD assume
|
wolffd@0
|
31 % the F nodes are always hidden (for speed).
|
wolffd@0
|
32 % However, for generating, we might want to set the final F2=true
|
wolffd@0
|
33 % to force all subroutines to finish.
|
wolffd@0
|
34 else
|
wolffd@0
|
35 if supervised
|
wolffd@0
|
36 engine_init = jtree_ndx_dbn_inf_engine(bnet_init);
|
wolffd@0
|
37 else
|
wolffd@0
|
38 engine_init = hmm_inf_engine(bnet_init);
|
wolffd@0
|
39 end
|
wolffd@0
|
40 end
|
wolffd@0
|
41
|
wolffd@0
|
42 % generate some synthetic data (easier to debug)
|
wolffd@0
|
43 chars = ['L', 'l', 'U', 'u', 'R', 'r', 'D', 'd'];
|
wolffd@0
|
44 L=find(chars=='L'); l=find(chars=='l');
|
wolffd@0
|
45 U=find(chars=='U'); u=find(chars=='u');
|
wolffd@0
|
46 R=find(chars=='R'); r=find(chars=='r');
|
wolffd@0
|
47 D=find(chars=='D'); d=find(chars=='d');
|
wolffd@0
|
48
|
wolffd@0
|
49 cases = {};
|
wolffd@0
|
50
|
wolffd@0
|
51 T = 8;
|
wolffd@0
|
52 ev = cell(ss, T);
|
wolffd@0
|
53 ev(Onode,:) = num2cell([L l U u R r D d]);
|
wolffd@0
|
54 if supervised
|
wolffd@0
|
55 ev(Q1,:) = num2cell(1*ones(1,T));
|
wolffd@0
|
56 ev(Q2,:) = num2cell( [1 1 2 2 3 3 4 4]);
|
wolffd@0
|
57 end
|
wolffd@0
|
58 cases{1} = ev;
|
wolffd@0
|
59 cases{3} = ev;
|
wolffd@0
|
60
|
wolffd@0
|
61 T = 8;
|
wolffd@0
|
62 ev = cell(ss, T);
|
wolffd@0
|
63 %we start with R then r, even though we are running the model 'backwards'!
|
wolffd@0
|
64 ev(Onode,:) = num2cell([R r U u L l D d]);
|
wolffd@0
|
65
|
wolffd@0
|
66 if supervised
|
wolffd@0
|
67 ev(Q1,:) = num2cell(2*ones(1,T));
|
wolffd@0
|
68 ev(Q2,:) = num2cell( [3 3 2 2 1 1 4 4]);
|
wolffd@0
|
69 end
|
wolffd@0
|
70
|
wolffd@0
|
71 cases{2} = ev;
|
wolffd@0
|
72 cases{4} = ev;
|
wolffd@0
|
73
|
wolffd@0
|
74 if obs_finalF2
|
wolffd@0
|
75 for i=1:length(cases)
|
wolffd@0
|
76 T = size(cases{i},2);
|
wolffd@0
|
77 cases{i}(F2,T)={2}; % force F2 to be finished at end of seq
|
wolffd@0
|
78 end
|
wolffd@0
|
79 end
|
wolffd@0
|
80
|
wolffd@0
|
81
|
wolffd@0
|
82 % startprob should be shared for t=1:T,
|
wolffd@0
|
83 % but in the DBN it is shared for t=2:T,
|
wolffd@0
|
84 % so we train using a single long sequence.
|
wolffd@0
|
85 long_seq = cat(2, cases{:});
|
wolffd@0
|
86 [bnet_learned, LL, engine_learned] = ...
|
wolffd@0
|
87 learn_params_dbn_em(engine_init, {long_seq}, 'max_iter', 200);
|
wolffd@0
|
88
|
wolffd@0
|
89 % figure out which subsequence each model is responsible for
|
wolffd@0
|
90 mpe = calc_mpe_dbn(engine_learned, long_seq);
|
wolffd@0
|
91 pretty_print_hhmm_parse(mpe, Qnodes, Fnodes, Onode, chars);
|
wolffd@0
|
92
|
wolffd@0
|
93
|
wolffd@0
|
94 % The "true" segmentation of the training sequence is
|
wolffd@0
|
95 % Q1: 1 2
|
wolffd@0
|
96 % O: L l U u R r D d | R r U u L l D d | etc.
|
wolffd@0
|
97 %
|
wolffd@0
|
98 % When we learn in a supervised fashion, we recover the "truth".
|
wolffd@0
|
99
|
wolffd@0
|
100 % When we learn in an unsupervised fashion with seed=1, we get
|
wolffd@0
|
101 % Q1: 2 1
|
wolffd@0
|
102 % O: L l U u R r D d R r | U u L l D d | etc.
|
wolffd@0
|
103 %
|
wolffd@0
|
104 % This means for model 1:
|
wolffd@0
|
105 % starts in state 2
|
wolffd@0
|
106 % transitions 2->1, 1->4, 4->e, 3->2
|
wolffd@0
|
107 %
|
wolffd@0
|
108 % For model 2,
|
wolffd@0
|
109 % starts in state 1
|
wolffd@0
|
110 % transitions 1->2, 2->3, 3->4 or e, 4->3
|
wolffd@0
|
111
|
wolffd@0
|
112 % examine the params
|
wolffd@0
|
113 eclass = bnet_learned.equiv_class;
|
wolffd@0
|
114 CPDQ1=struct(bnet_learned.CPD{eclass(Q1,2)});
|
wolffd@0
|
115 CPDQ2=struct(bnet_learned.CPD{eclass(Q2,2)});
|
wolffd@0
|
116 CPDQ3=struct(bnet_learned.CPD{eclass(Q3,2)});
|
wolffd@0
|
117 CPDF2=struct(bnet_learned.CPD{eclass(F2,1)});
|
wolffd@0
|
118 CPDF3=struct(bnet_learned.CPD{eclass(F3,1)});
|
wolffd@0
|
119 CPDO=struct(bnet_learned.CPD{eclass(Onode,1)});
|
wolffd@0
|
120
|
wolffd@0
|
121 A_learned =add_hhmm_end_state(CPDQ2.transprob, CPDF2.termprob(:,:,2));
|
wolffd@0
|
122 squeeze(A_learned(:,1,:))
|
wolffd@0
|
123 squeeze(A_learned(:,2,:))
|
wolffd@0
|
124
|
wolffd@0
|
125
|
wolffd@0
|
126 % Does the "true" model have higher likelihood than the learned one?
|
wolffd@0
|
127 % i.e., Does the unsupervised method learn the wrong model because
|
wolffd@0
|
128 % we have the wrong cost fn, or because of local minima?
|
wolffd@0
|
129
|
wolffd@0
|
130 bnet_true = mk_square_hhmm(discrete_obs,1);
|
wolffd@0
|
131
|
wolffd@0
|
132 % examine the params
|
wolffd@0
|
133 eclass = bnet_learned.equiv_class;
|
wolffd@0
|
134 CPDQ1_true=struct(bnet_true.CPD{eclass(Q1,2)});
|
wolffd@0
|
135 CPDQ2_true=struct(bnet_true.CPD{eclass(Q2,2)});
|
wolffd@0
|
136 CPDQ3_true=struct(bnet_true.CPD{eclass(Q3,2)});
|
wolffd@0
|
137 CPDF2_true=struct(bnet_true.CPD{eclass(F2,1)});
|
wolffd@0
|
138 CPDF3_true=struct(bnet_true.CPD{eclass(F3,1)});
|
wolffd@0
|
139
|
wolffd@0
|
140 A_true =add_hhmm_end_state(CPDQ2_true.transprob, CPDF2_true.termprob(:,:,2));
|
wolffd@0
|
141 squeeze(A_true(:,1,:))
|
wolffd@0
|
142
|
wolffd@0
|
143
|
wolffd@0
|
144 if supervised
|
wolffd@0
|
145 engine_true = jtree_ndx_dbn_inf_engine(bnet_true);
|
wolffd@0
|
146 else
|
wolffd@0
|
147 engine_true = hmm_inf_engine(bnet_true);
|
wolffd@0
|
148 end
|
wolffd@0
|
149
|
wolffd@0
|
150 %[engine_learned, ll_learned] = enter_evidence(engine_learned, long_seq);
|
wolffd@0
|
151 %[engine_true, ll_true] = enter_evidence(engine_true, long_seq);
|
wolffd@0
|
152 [engine_learned, ll_learned] = enter_evidence(engine_learned, cases{2});
|
wolffd@0
|
153 [engine_true, ll_true] = enter_evidence(engine_true, cases{2});
|
wolffd@0
|
154 ll_learned
|
wolffd@0
|
155 ll_true
|
wolffd@0
|
156
|
wolffd@0
|
157
|
wolffd@0
|
158 % remove concatentation artefacts
|
wolffd@0
|
159 ll_learned = 0;
|
wolffd@0
|
160 ll_true = 0;
|
wolffd@0
|
161 for m=1:length(cases)
|
wolffd@0
|
162 [engine_learned, ll_learned_tmp] = enter_evidence(engine_learned, cases{m});
|
wolffd@0
|
163 [engine_true, ll_true_tmp] = enter_evidence(engine_true, cases{m});
|
wolffd@0
|
164 ll_learned = ll_learned + ll_learned_tmp;
|
wolffd@0
|
165 ll_true = ll_true + ll_true_tmp;
|
wolffd@0
|
166 end
|
wolffd@0
|
167 ll_learned
|
wolffd@0
|
168 ll_true
|
wolffd@0
|
169
|
wolffd@0
|
170 % In both cases, ll_learned >> ll_true
|
wolffd@0
|
171 % which shows we are using the wrong cost function!
|