annotate toolboxes/FullBNT-1.0.7/bnt/examples/dynamic/HHMM/Square/learn_square_hhmm_discrete.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 % Try to learn a 3 level HHMM similar to mk_square_hhmm
wolffd@0 2 % from synthetic discrete sequences
wolffd@0 3
wolffd@0 4
wolffd@0 5 discrete_obs = 1;
wolffd@0 6 supervised = 0;
wolffd@0 7 obs_finalF2 = 0;
wolffd@0 8
wolffd@0 9 seed = 1;
wolffd@0 10 rand('state', seed);
wolffd@0 11 randn('state', seed);
wolffd@0 12
wolffd@0 13 bnet_init = mk_square_hhmm(discrete_obs, 0);
wolffd@0 14
wolffd@0 15 ss = 6;
wolffd@0 16 Q1 = 1; Q2 = 2; Q3 = 3; F3 = 4; F2 = 5; Onode = 6;
wolffd@0 17 Qnodes = [Q1 Q2 Q3]; Fnodes = [F2 F3];
wolffd@0 18
wolffd@0 19 if supervised
wolffd@0 20 bnet_init.observed = [Q1 Q2 Onode];
wolffd@0 21 else
wolffd@0 22 bnet_init.observed = [Onode];
wolffd@0 23 end
wolffd@0 24
wolffd@0 25 if obs_finalF2
wolffd@0 26 engine_init = jtree_dbn_inf_engine(bnet_init);
wolffd@0 27 % can't use ndx version because sometimes F2 is hidden, sometimes observed
wolffd@0 28 error('can''t observe F when learning')
wolffd@0 29 % It is not possible to observe F2 if we learn
wolffd@0 30 % because the update_ess method for hhmmF_CPD and hhmmQ_CPD assume
wolffd@0 31 % the F nodes are always hidden (for speed).
wolffd@0 32 % However, for generating, we might want to set the final F2=true
wolffd@0 33 % to force all subroutines to finish.
wolffd@0 34 else
wolffd@0 35 if supervised
wolffd@0 36 engine_init = jtree_ndx_dbn_inf_engine(bnet_init);
wolffd@0 37 else
wolffd@0 38 engine_init = hmm_inf_engine(bnet_init);
wolffd@0 39 end
wolffd@0 40 end
wolffd@0 41
wolffd@0 42 % generate some synthetic data (easier to debug)
wolffd@0 43 chars = ['L', 'l', 'U', 'u', 'R', 'r', 'D', 'd'];
wolffd@0 44 L=find(chars=='L'); l=find(chars=='l');
wolffd@0 45 U=find(chars=='U'); u=find(chars=='u');
wolffd@0 46 R=find(chars=='R'); r=find(chars=='r');
wolffd@0 47 D=find(chars=='D'); d=find(chars=='d');
wolffd@0 48
wolffd@0 49 cases = {};
wolffd@0 50
wolffd@0 51 T = 8;
wolffd@0 52 ev = cell(ss, T);
wolffd@0 53 ev(Onode,:) = num2cell([L l U u R r D d]);
wolffd@0 54 if supervised
wolffd@0 55 ev(Q1,:) = num2cell(1*ones(1,T));
wolffd@0 56 ev(Q2,:) = num2cell( [1 1 2 2 3 3 4 4]);
wolffd@0 57 end
wolffd@0 58 cases{1} = ev;
wolffd@0 59 cases{3} = ev;
wolffd@0 60
wolffd@0 61 T = 8;
wolffd@0 62 ev = cell(ss, T);
wolffd@0 63 %we start with R then r, even though we are running the model 'backwards'!
wolffd@0 64 ev(Onode,:) = num2cell([R r U u L l D d]);
wolffd@0 65
wolffd@0 66 if supervised
wolffd@0 67 ev(Q1,:) = num2cell(2*ones(1,T));
wolffd@0 68 ev(Q2,:) = num2cell( [3 3 2 2 1 1 4 4]);
wolffd@0 69 end
wolffd@0 70
wolffd@0 71 cases{2} = ev;
wolffd@0 72 cases{4} = ev;
wolffd@0 73
wolffd@0 74 if obs_finalF2
wolffd@0 75 for i=1:length(cases)
wolffd@0 76 T = size(cases{i},2);
wolffd@0 77 cases{i}(F2,T)={2}; % force F2 to be finished at end of seq
wolffd@0 78 end
wolffd@0 79 end
wolffd@0 80
wolffd@0 81
wolffd@0 82 % startprob should be shared for t=1:T,
wolffd@0 83 % but in the DBN it is shared for t=2:T,
wolffd@0 84 % so we train using a single long sequence.
wolffd@0 85 long_seq = cat(2, cases{:});
wolffd@0 86 [bnet_learned, LL, engine_learned] = ...
wolffd@0 87 learn_params_dbn_em(engine_init, {long_seq}, 'max_iter', 200);
wolffd@0 88
wolffd@0 89 % figure out which subsequence each model is responsible for
wolffd@0 90 mpe = calc_mpe_dbn(engine_learned, long_seq);
wolffd@0 91 pretty_print_hhmm_parse(mpe, Qnodes, Fnodes, Onode, chars);
wolffd@0 92
wolffd@0 93
wolffd@0 94 % The "true" segmentation of the training sequence is
wolffd@0 95 % Q1: 1 2
wolffd@0 96 % O: L l U u R r D d | R r U u L l D d | etc.
wolffd@0 97 %
wolffd@0 98 % When we learn in a supervised fashion, we recover the "truth".
wolffd@0 99
wolffd@0 100 % When we learn in an unsupervised fashion with seed=1, we get
wolffd@0 101 % Q1: 2 1
wolffd@0 102 % O: L l U u R r D d R r | U u L l D d | etc.
wolffd@0 103 %
wolffd@0 104 % This means for model 1:
wolffd@0 105 % starts in state 2
wolffd@0 106 % transitions 2->1, 1->4, 4->e, 3->2
wolffd@0 107 %
wolffd@0 108 % For model 2,
wolffd@0 109 % starts in state 1
wolffd@0 110 % transitions 1->2, 2->3, 3->4 or e, 4->3
wolffd@0 111
wolffd@0 112 % examine the params
wolffd@0 113 eclass = bnet_learned.equiv_class;
wolffd@0 114 CPDQ1=struct(bnet_learned.CPD{eclass(Q1,2)});
wolffd@0 115 CPDQ2=struct(bnet_learned.CPD{eclass(Q2,2)});
wolffd@0 116 CPDQ3=struct(bnet_learned.CPD{eclass(Q3,2)});
wolffd@0 117 CPDF2=struct(bnet_learned.CPD{eclass(F2,1)});
wolffd@0 118 CPDF3=struct(bnet_learned.CPD{eclass(F3,1)});
wolffd@0 119 CPDO=struct(bnet_learned.CPD{eclass(Onode,1)});
wolffd@0 120
wolffd@0 121 A_learned =add_hhmm_end_state(CPDQ2.transprob, CPDF2.termprob(:,:,2));
wolffd@0 122 squeeze(A_learned(:,1,:))
wolffd@0 123 squeeze(A_learned(:,2,:))
wolffd@0 124
wolffd@0 125
wolffd@0 126 % Does the "true" model have higher likelihood than the learned one?
wolffd@0 127 % i.e., Does the unsupervised method learn the wrong model because
wolffd@0 128 % we have the wrong cost fn, or because of local minima?
wolffd@0 129
wolffd@0 130 bnet_true = mk_square_hhmm(discrete_obs,1);
wolffd@0 131
wolffd@0 132 % examine the params
wolffd@0 133 eclass = bnet_learned.equiv_class;
wolffd@0 134 CPDQ1_true=struct(bnet_true.CPD{eclass(Q1,2)});
wolffd@0 135 CPDQ2_true=struct(bnet_true.CPD{eclass(Q2,2)});
wolffd@0 136 CPDQ3_true=struct(bnet_true.CPD{eclass(Q3,2)});
wolffd@0 137 CPDF2_true=struct(bnet_true.CPD{eclass(F2,1)});
wolffd@0 138 CPDF3_true=struct(bnet_true.CPD{eclass(F3,1)});
wolffd@0 139
wolffd@0 140 A_true =add_hhmm_end_state(CPDQ2_true.transprob, CPDF2_true.termprob(:,:,2));
wolffd@0 141 squeeze(A_true(:,1,:))
wolffd@0 142
wolffd@0 143
wolffd@0 144 if supervised
wolffd@0 145 engine_true = jtree_ndx_dbn_inf_engine(bnet_true);
wolffd@0 146 else
wolffd@0 147 engine_true = hmm_inf_engine(bnet_true);
wolffd@0 148 end
wolffd@0 149
wolffd@0 150 %[engine_learned, ll_learned] = enter_evidence(engine_learned, long_seq);
wolffd@0 151 %[engine_true, ll_true] = enter_evidence(engine_true, long_seq);
wolffd@0 152 [engine_learned, ll_learned] = enter_evidence(engine_learned, cases{2});
wolffd@0 153 [engine_true, ll_true] = enter_evidence(engine_true, cases{2});
wolffd@0 154 ll_learned
wolffd@0 155 ll_true
wolffd@0 156
wolffd@0 157
wolffd@0 158 % remove concatentation artefacts
wolffd@0 159 ll_learned = 0;
wolffd@0 160 ll_true = 0;
wolffd@0 161 for m=1:length(cases)
wolffd@0 162 [engine_learned, ll_learned_tmp] = enter_evidence(engine_learned, cases{m});
wolffd@0 163 [engine_true, ll_true_tmp] = enter_evidence(engine_true, cases{m});
wolffd@0 164 ll_learned = ll_learned + ll_learned_tmp;
wolffd@0 165 ll_true = ll_true + ll_true_tmp;
wolffd@0 166 end
wolffd@0 167 ll_learned
wolffd@0 168 ll_true
wolffd@0 169
wolffd@0 170 % In both cases, ll_learned >> ll_true
wolffd@0 171 % which shows we are using the wrong cost function!