comparison toolboxes/FullBNT-1.0.7/bnt/examples/dynamic/HHMM/Mgram/mgram2.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 % Like a durational HMM, except we use soft evidence on the observed nodes.
2 % Should give the same results as HSMM/test_mgram2.
3
4 past = 1;
5 % If past=1, P(Yt|Qt=j,Dt=d) = P(y_{t-d+1:t}|j)
6 % If past=0, P(Yt|Qt=j,Dt=d) = P(y_{t:t+d-1}|j) - future evidence
7
8 words = {'the', 't', 'h', 'e'};
9 data = 'the';
10 nwords = length(words);
11 word_len = zeros(1, nwords);
12 word_prob = normalise(ones(1,nwords));
13 word_logprob = log(word_prob);
14 for wi=1:nwords
15 word_len(wi)=length(words{wi});
16 end
17 D = max(word_len);
18
19
20 alphasize = 26*2;
21 data = letter2num(data);
22 T = length(data);
23
24 % node numbers
25 W = 1; % top level state = word id
26 L = 2; % bottom level state = letter position within word
27 F = 3;
28 O = 4;
29
30 ss = 4;
31 intra = zeros(ss,ss);
32 intra(W,[F L O])=1;
33 intra(L,[O F])=1;
34
35 inter = zeros(ss,ss);
36 inter(W,W)=1;
37 inter(L,L)=1;
38 inter(F,[W L O])=1;
39
40 % node sizes
41 ns = zeros(1,ss);
42 ns(W) = nwords;
43 ns(L) = D;
44 ns(F) = 2;
45 ns(O) = alphasize;
46 ns2 = [ns ns];
47
48 % Make the DBN
49 bnet = mk_dbn(intra, inter, ns, 'observed', O);
50 eclass = bnet.equiv_class;
51
52 % uniform start distrib over words, uniform trans mat
53 Wstart = normalise(ones(1,nwords));
54 Wtrans = mk_stochastic(ones(nwords,nwords));
55 %Wtrans = ones(nwords,nwords);
56
57 % always start in state d = length(word) for each bottom level HMM
58 Lstart = zeros(nwords, D);
59 for i=1:nwords
60 l = length(words{i});
61 Lstart(i,l)=1;
62 end
63
64 % make downcounters
65 RLtrans = mk_rightleft_transmat(D, 0); % 0 self loop prob
66 Ltrans = repmat(RLtrans, [1 1 nwords]);
67
68 % Finish when downcoutner = 1
69 Fprob = zeros(nwords, D, 2);
70 Fprob(:,1,2)=1;
71 Fprob(:,2:end,1)=1;
72
73
74 % Define CPDs for slice 1
75 bnet.CPD{eclass(W,1)} = tabular_CPD(bnet, W, 'CPT', Wstart);
76 bnet.CPD{eclass(L,1)} = tabular_CPD(bnet, L, 'CPT', Lstart);
77 bnet.CPD{eclass(F,1)} = tabular_CPD(bnet, F, 'CPT', Fprob);
78
79
80 % Define CPDs for slice 2
81 bnet.CPD{eclass(W,2)} = hhmmQ_CPD(bnet, W+ss, 'Fbelow', F, 'startprob', Wstart, 'transprob', Wtrans);
82 bnet.CPD{eclass(L,2)} = hhmmQ_CPD(bnet, L+ss, 'Fself', F, 'Qps', W+ss, 'startprob', Lstart, 'transprob', Ltrans);
83
84
85 if 0
86 % To test it is generating correctly, we create an artificial
87 % observation process that capitalizes at the start of a new segment
88 % Oprob(Ft-1,Qt,Dt,Yt)
89 Oprob = zeros(2,nwords,D,alphasize);
90 Oprob(1,1,3,letter2num('t'),1)=1;
91 Oprob(1,1,2,letter2num('h'),1)=1;
92 Oprob(1,1,1,letter2num('e'),1)=1;
93 Oprob(2,1,3,letter2num('T'),1)=1;
94 Oprob(2,1,2,letter2num('H'),1)=1;
95 Oprob(2,1,1,letter2num('E'),1)=1;
96 Oprob(1,2,1,letter2num('a'),1)=1;
97 Oprob(2,2,1,letter2num('A'),1)=1;
98 Oprob(1,3,1,letter2num('b'),1)=1;
99 Oprob(2,3,1,letter2num('B'),1)=1;
100 Oprob(1,4,1,letter2num('c'),1)=1;
101 Oprob(2,4,1,letter2num('C'),1)=1;
102
103 % Oprob1(Qt,Dt,Yt)
104 Oprob1 = zeros(nwords,D,alphasize);
105 Oprob1(1,3,letter2num('t'),1)=1;
106 Oprob1(1,2,letter2num('h'),1)=1;
107 Oprob1(1,1,letter2num('e'),1)=1;
108 Oprob1(2,1,letter2num('a'),1)=1;
109 Oprob1(3,1,letter2num('b'),1)=1;
110 Oprob1(4,1,letter2num('c'),1)=1;
111
112 bnet.CPD{eclass(O,2)} = tabular_CPD(bnet, O+ss, 'CPT', Oprob);
113 bnet.CPD{eclass(O,1)} = tabular_CPD(bnet, O, 'CPT', Oprob1);
114
115 evidence = cell(ss,T);
116 %evidence{W,1}=1;
117 sample = cell2num(sample_dbn(bnet, 'length', T, 'evidence', evidence));
118 str = num2letter(sample(4,:))
119 end
120
121
122 if 1
123
124 [log_obslik, obslik, match] = mk_mgram_obslik(lower(data), words, word_len, word_prob);
125 % obslik(j,t,d)
126 softCPDpot = cell(ss,T);
127 ens = ns;
128 ens(O)=1;
129 ens2 = [ens ens];
130 for t=2:T
131 dom = [F W+ss L+ss O+ss];
132 % tab(Ft-1, Q2, Dt)
133 tab = ones(2, nwords, D);
134 if past
135 tab(1,:,:)=1; % if haven't finished previous word, likelihood is 1
136 %tab(2,:,:) = squeeze(obslik(:,t,:)); % otherwise likelihood of this segment
137 for d=1:min(t,D)
138 tab(2,:,d) = squeeze(obslik(:,t,d));
139 end
140 else
141 for d=1:max(1,min(D,T+1-t))
142 tab(2,:,d) = squeeze(obslik(:,t+d-1,d));
143 end
144 end
145 softCPDpot{O,t} = dpot(dom, ens2(dom), tab);
146 end
147 t = 1;
148 dom = [W L O];
149 % tab(Q2, Dt)
150 tab = ones(nwords, D);
151 if past
152 %tab = squeeze(obslik(:,t,:));
153 tab(:,1) = squeeze(obslik(:,t,1));
154 else
155 for d=1:min(D,T-t)
156 tab(:,d) = squeeze(obslik(:,t+d-1,d));
157 end
158 end
159 softCPDpot{O,t} = dpot(dom, ens(dom), tab);
160
161
162 %bnet.observed = [];
163 % uniformative observations
164 %bnet.CPD{eclass(O,2)} = tabular_CPD(bnet, O+ss, 'CPT', mk_stochastic(ones(2,nwords,D,alphasize)));
165 %bnet.CPD{eclass(O,1)} = tabular_CPD(bnet, O, 'CPT', mk_stochastic(ones(nwords,D,alphasize)));
166
167 engine = jtree_dbn_inf_engine(bnet);
168 evidence = cell(ss,T);
169 % we add dummy data to O to force its effective size to be 1.
170 % The actual values have already been incorporated into softCPDpot
171 evidence(O,:) = num2cell(ones(1,T));
172 [engine, ll_dbn] = enter_evidence(engine, evidence, 'softCPDpot', softCPDpot);
173
174
175 %evidence(F,:) = num2cell(2*ones(1,T));
176 %[engine, ll_dbn] = enter_evidence(engine, evidence);
177
178
179 gamma = zeros(nwords, T);
180 for t=1:T
181 m = marginal_nodes(engine, [W F], t);
182 gamma(:,t) = m.T(:,2);
183 end
184
185 gamma
186
187 xidbn = zeros(nwords, nwords);
188 for t=1:T-1
189 m = marginal_nodes(engine, [W F W+ss], t);
190 xidbn = xidbn + squeeze(m.T(:,2,:));
191 end
192
193 % thee
194 % xidbn(1,4) = 0.9412 the->e
195 % (2,3)=0.0588 t->h
196 % (3,4)=0.0588 h-e
197 % (4,4)=0.0588 e-e
198
199
200 end