Mercurial > hg > camir-aes2014
comparison toolboxes/FullBNT-1.0.7/bnt/examples/dynamic/HHMM/Mgram/mgram3.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 % like mgram2, except we unroll the DBN so we can use smaller | |
2 % state spaces for the early duration nodes: | |
3 % the state spaces are D1 in {1}, D2 in {1,2} | |
4 | |
5 past = 1; | |
6 | |
7 words = {'the', 't', 'h', 'e'}; | |
8 data = 'the'; | |
9 nwords = length(words); | |
10 word_len = zeros(1, nwords); | |
11 word_prob = normalise(ones(1,nwords)); | |
12 word_logprob = log(word_prob); | |
13 for wi=1:nwords | |
14 word_len(wi)=length(words{wi}); | |
15 end | |
16 D = max(word_len); | |
17 | |
18 | |
19 alphasize = 26*2; | |
20 data = letter2num(data); | |
21 T = length(data); | |
22 | |
23 % node numbers | |
24 W = 1; % top level state = word id | |
25 L = 2; % bottom level state = letter position within word | |
26 F = 3; | |
27 O = 4; | |
28 | |
29 ss = 4; | |
30 intra = zeros(ss,ss); | |
31 intra(W,[F L O])=1; | |
32 intra(L,[O F])=1; | |
33 | |
34 inter = zeros(ss,ss); | |
35 inter(W,W)=1; | |
36 inter(L,L)=1; | |
37 inter(F,[W L O])=1; | |
38 | |
39 T = 3; | |
40 dag = unroll_dbn_topology(intra, inter, T); | |
41 | |
42 % node sizes | |
43 ns = zeros(1,ss); | |
44 ns(W) = nwords; | |
45 ns(L) = D; | |
46 ns(F) = 2; | |
47 ns(O) = alphasize; | |
48 ns = repmat(ns(:), [1 T]); | |
49 for d=1:D | |
50 ns(d,L)=d; % max duration | |
51 end | |
52 ns = ns(:); | |
53 | |
54 % Equiv class in brackets for D=3 | |
55 % The Lt's are not tied until t>=D, since they have different sizes. | |
56 % W1 and W2 are not tied since they have different parent sets. | |
57 | |
58 % W1 (1) W2 (5) W3 (5) W4 (5) | |
59 % L1 (2) L2 (6) L3 (7) L4 (7) | |
60 % F1 (3) F2 (3) F3 (4) F3 (4) | |
61 % O1 (4) O2 (4) O2 (4) O4 (4) | |
62 | |
63 % Since we are not learning, we can dispense with tying | |
64 | |
65 % Make the bnet | |
66 Wnodes = unroll_set(W, ss, T); | |
67 Lnodes = unroll_set(L, ss, T); | |
68 Fnodes = unroll_set(F, ss, T); | |
69 Onodes = unroll_set(O, ss, T); | |
70 | |
71 bnet = mk_bnet(dag, ns); | |
72 eclass = bnet.equiv_class; | |
73 | |
74 % uniform start distrib over words, uniform trans mat | |
75 Wstart = normalise(ones(1,nwords)); | |
76 Wtrans = mk_stochastic(ones(nwords,nwords)); | |
77 bnet.CPD{eclass(Wnodes(1))} = tabular_CPD(bnet, Wnodes(1), 'CPT', Wstart); | |
78 for t=2:T | |
79 bnet.CPD{eclass(Wnodes(t))} = hhmmQ_CPD(bnet, Wnodes(t), 'Fbelow', Fnodes(t-1), ... | |
80 'startprob', Wstart, 'transprob', Wtrans); | |
81 end | |
82 | |
83 % always start in state d = length(word) for each bottom level HMM | |
84 % and then count down | |
85 % make downcounters | |
86 RLtrans = mk_rightleft_transmat(D, 0); % 0 self loop prob | |
87 Ltrans = repmat(RLtrans, [1 1 nwords]); | |
88 | |
89 for t=1:T | |
90 Lstart = zeros(nwords, min(t,D)); | |
91 for i=1:nwords | |
92 l = length(words{i}); | |
93 Lstart(i,l)=1; | |
94 if d==1 | |
95 bnet.CPD{eclass(Lnodes(1))} = tabular_CPD(bnet, Lnodes(1), 'CPT', Lstart); | |
96 else | |
97 bnet.CPD{eclass(Lnodes(t))} = hhmmQ_CPD(bnet, Lnodes(t), 'Fself', Fnodes(t-1), 'Qps', Wnodes(t), ... | |
98 'startprob', Lstart, 'transprob', Ltrans); | |
99 end | |
100 end | |
101 end | |
102 | |
103 | |
104 % Finish when downcoutner = 1 | |
105 Fprob = zeros(nwords, D, 2); | |
106 Fprob(:,1,2)=1; | |
107 Fprob(:,2:end,1)=1; | |
108 | |
109 | |
110 % Define CPDs for slice | |
111 bnet.CPD{eclass(W,1)} = tabular_CPD(bnet, W, 'CPT', Wstart); | |
112 bnet.CPD{eclass(L,1)} = tabular_CPD(bnet, L, 'CPT', Lstart); | |
113 bnet.CPD{eclass(F,1)} = tabular_CPD(bnet, F, 'CPT', Fprob); | |
114 | |
115 | |
116 % Define CPDs for slice 2 | |
117 bnet.CPD{eclass(W,2)} = hhmmQ_CPD(bnet, W+ss, 'Fbelow', F, 'startprob', Wstart, 'transprob', Wtrans); | |
118 bnet.CPD{eclass(L,2)} = hhmmQ_CPD(bnet, L+ss, 'Fself', F, 'Qps', W+ss, 'startprob', Lstart, 'transprob', Ltrans); | |
119 | |
120 | |
121 if 0 | |
122 % To test it is generating correctly, we create an artificial | |
123 % observation process that capitalizes at the start of a new segment | |
124 % Oprob(Ft-1,Qt,Dt,Yt) | |
125 Oprob = zeros(2,nwords,D,alphasize); | |
126 Oprob(1,1,3,letter2num('t'),1)=1; | |
127 Oprob(1,1,2,letter2num('h'),1)=1; | |
128 Oprob(1,1,1,letter2num('e'),1)=1; | |
129 Oprob(2,1,3,letter2num('T'),1)=1; | |
130 Oprob(2,1,2,letter2num('H'),1)=1; | |
131 Oprob(2,1,1,letter2num('E'),1)=1; | |
132 Oprob(1,2,1,letter2num('a'),1)=1; | |
133 Oprob(2,2,1,letter2num('A'),1)=1; | |
134 Oprob(1,3,1,letter2num('b'),1)=1; | |
135 Oprob(2,3,1,letter2num('B'),1)=1; | |
136 Oprob(1,4,1,letter2num('c'),1)=1; | |
137 Oprob(2,4,1,letter2num('C'),1)=1; | |
138 | |
139 % Oprob1(Qt,Dt,Yt) | |
140 Oprob1 = zeros(nwords,D,alphasize); | |
141 Oprob1(1,3,letter2num('t'),1)=1; | |
142 Oprob1(1,2,letter2num('h'),1)=1; | |
143 Oprob1(1,1,letter2num('e'),1)=1; | |
144 Oprob1(2,1,letter2num('a'),1)=1; | |
145 Oprob1(3,1,letter2num('b'),1)=1; | |
146 Oprob1(4,1,letter2num('c'),1)=1; | |
147 | |
148 bnet.CPD{eclass(O,2)} = tabular_CPD(bnet, O+ss, 'CPT', Oprob); | |
149 bnet.CPD{eclass(O,1)} = tabular_CPD(bnet, O, 'CPT', Oprob1); | |
150 | |
151 evidence = cell(ss,T); | |
152 %evidence{W,1}=1; | |
153 sample = cell2num(sample_dbn(bnet, 'length', T, 'evidence', evidence)); | |
154 str = num2letter(sample(4,:)) | |
155 end | |
156 | |
157 | |
158 | |
159 | |
160 [log_obslik, obslik, match] = mk_mgram_obslik(lower(data), words, word_len, word_prob); | |
161 % obslik(j,t,d) | |
162 softCPDpot = cell(ss,T); | |
163 ens = ns; | |
164 ens(O)=1; | |
165 ens2 = [ens ens]; | |
166 for t=2:T | |
167 dom = [F W+ss L+ss O+ss]; | |
168 % tab(Ft-1, Q2, Dt) | |
169 tab = ones(2, nwords, D); | |
170 if past | |
171 tab(1,:,:)=1; % if haven't finished previous word, likelihood is 1 | |
172 %tab(2,:,:) = squeeze(obslik(:,t,:)); % otherwise likelihood of this segment | |
173 for d=1:min(t,D) | |
174 tab(2,:,d) = squeeze(obslik(:,t,d)); | |
175 end | |
176 else | |
177 for d=1:max(1,min(D,T+1-t)) | |
178 tab(2,:,d) = squeeze(obslik(:,t+d-1,d)); | |
179 end | |
180 end | |
181 softCPDpot{O,t} = dpot(dom, ens2(dom), tab); | |
182 end | |
183 t = 1; | |
184 dom = [W L O]; | |
185 % tab(Q2, Dt) | |
186 tab = ones(nwords, D); | |
187 if past | |
188 %tab = squeeze(obslik(:,t,:)); | |
189 tab(:,1) = squeeze(obslik(:,t,1)); | |
190 else | |
191 for d=1:min(D,T-t) | |
192 tab(:,d) = squeeze(obslik(:,t+d-1,d)); | |
193 end | |
194 end | |
195 softCPDpot{O,t} = dpot(dom, ens(dom), tab); | |
196 | |
197 | |
198 %bnet.observed = []; | |
199 % uniformative observations | |
200 %bnet.CPD{eclass(O,2)} = tabular_CPD(bnet, O+ss, 'CPT', mk_stochastic(ones(2,nwords,D,alphasize))); | |
201 %bnet.CPD{eclass(O,1)} = tabular_CPD(bnet, O, 'CPT', mk_stochastic(ones(nwords,D,alphasize))); | |
202 | |
203 engine = jtree_dbn_inf_engine(bnet); | |
204 evidence = cell(ss,T); | |
205 % we add dummy data to O to force its effective size to be 1. | |
206 % The actual values have already been incorporated into softCPDpot | |
207 evidence(O,:) = num2cell(ones(1,T)); | |
208 [engine, ll_dbn] = enter_evidence(engine, evidence, 'softCPDpot', softCPDpot); | |
209 | |
210 | |
211 %evidence(F,:) = num2cell(2*ones(1,T)); | |
212 %[engine, ll_dbn] = enter_evidence(engine, evidence); | |
213 | |
214 | |
215 gamma = zeros(nwords, T); | |
216 for t=1:T | |
217 m = marginal_nodes(engine, [W F], t); | |
218 gamma(:,t) = m.T(:,2); | |
219 end | |
220 | |
221 gamma | |
222 | |
223 xidbn = zeros(nwords, nwords); | |
224 for t=1:T-1 | |
225 m = marginal_nodes(engine, [W F W+ss], t); | |
226 xidbn = xidbn + squeeze(m.T(:,2,:)); | |
227 end | |
228 | |
229 % thee | |
230 % xidbn(1,4) = 0.9412 the->e | |
231 % (2,3)=0.0588 t->h | |
232 % (3,4)=0.0588 h-e | |
233 % (4,4)=0.0588 e-e | |
234 | |
235 |