wolffd@0
|
1 % a multigram is a degenerate 2HHMM where the bottom level HMMs emit deterministic strings
|
wolffd@0
|
2 % and the the top level abstract states are independent of each other
|
wolffd@0
|
3 % cf. HSMM/test_mgram2
|
wolffd@0
|
4
|
wolffd@0
|
5 words = {'the', 't', 'h', 'e'};
|
wolffd@0
|
6 data = 'the';
|
wolffd@0
|
7 nwords = length(words);
|
wolffd@0
|
8 word_len = zeros(1, nwords);
|
wolffd@0
|
9 word_prob = normalise(ones(1,nwords));
|
wolffd@0
|
10 word_logprob = log(word_prob);
|
wolffd@0
|
11 for wi=1:nwords
|
wolffd@0
|
12 word_len(wi)=length(words{wi});
|
wolffd@0
|
13 end
|
wolffd@0
|
14 D = max(word_len);
|
wolffd@0
|
15
|
wolffd@0
|
16 alphasize = 26;
|
wolffd@0
|
17 data = letter2num(data);
|
wolffd@0
|
18 T = length(data);
|
wolffd@0
|
19
|
wolffd@0
|
20 % node numbers
|
wolffd@0
|
21 W = 1; % top level state = word id
|
wolffd@0
|
22 L = 2; % bottom level state = letter position within word
|
wolffd@0
|
23 F = 3;
|
wolffd@0
|
24 O = 4;
|
wolffd@0
|
25
|
wolffd@0
|
26 ss = 4;
|
wolffd@0
|
27 intra = zeros(ss,ss);
|
wolffd@0
|
28 intra(W,[F L O])=1;
|
wolffd@0
|
29 intra(L,[O F])=1;
|
wolffd@0
|
30
|
wolffd@0
|
31 inter = zeros(ss,ss);
|
wolffd@0
|
32 inter(W,W)=1;
|
wolffd@0
|
33 inter(L,L)=1;
|
wolffd@0
|
34 inter(F,[W L])=1;
|
wolffd@0
|
35
|
wolffd@0
|
36 % node sizes
|
wolffd@0
|
37 ns = zeros(1,ss);
|
wolffd@0
|
38 ns(W) = nwords;
|
wolffd@0
|
39 ns(L) = D;
|
wolffd@0
|
40 ns(F) = 2;
|
wolffd@0
|
41 ns(O) = alphasize;
|
wolffd@0
|
42
|
wolffd@0
|
43
|
wolffd@0
|
44 % Make the DBN
|
wolffd@0
|
45 bnet = mk_dbn(intra, inter, ns, 'observed', O);
|
wolffd@0
|
46 eclass = bnet.equiv_class;
|
wolffd@0
|
47
|
wolffd@0
|
48
|
wolffd@0
|
49
|
wolffd@0
|
50 % uniform start distrib over words, uniform trans mat
|
wolffd@0
|
51 Wstart = normalise(ones(1,nwords));
|
wolffd@0
|
52 Wtrans = mk_stochastic(ones(nwords,nwords));
|
wolffd@0
|
53
|
wolffd@0
|
54 % always start in state 1 for each bottom level HMM
|
wolffd@0
|
55 delta1_start = zeros(1, D);
|
wolffd@0
|
56 delta1_start(1) = 1;
|
wolffd@0
|
57 Lstart = repmat(delta1_start, nwords, 1);
|
wolffd@0
|
58 LRtrans = mk_leftright_transmat(D, 0); % 0 self loop prob
|
wolffd@0
|
59 Ltrans = repmat(LRtrans, [1 1 nwords]);
|
wolffd@0
|
60
|
wolffd@0
|
61 % Finish in the last letter of each word
|
wolffd@0
|
62 Fprob = zeros(nwords, D, 2);
|
wolffd@0
|
63 Fprob(:,:,1)=1;
|
wolffd@0
|
64 for i=1:nwords
|
wolffd@0
|
65 Fprob(i,length(words{i}),2)=1;
|
wolffd@0
|
66 Fprob(i,length(words{i}),1)=0;
|
wolffd@0
|
67 end
|
wolffd@0
|
68
|
wolffd@0
|
69 % Each state uniquely emits a letter
|
wolffd@0
|
70 Oprob = zeros(nwords, D, alphasize);
|
wolffd@0
|
71 for i=1:nwords
|
wolffd@0
|
72 for l=1:length(words{i})
|
wolffd@0
|
73 a = double(words{i}(l))-96;
|
wolffd@0
|
74 Oprob(i,l,a)=1;
|
wolffd@0
|
75 end
|
wolffd@0
|
76 end
|
wolffd@0
|
77
|
wolffd@0
|
78
|
wolffd@0
|
79 % Define CPDs for slice
|
wolffd@0
|
80 bnet.CPD{eclass(W,1)} = tabular_CPD(bnet, W, 'CPT', Wstart);
|
wolffd@0
|
81 bnet.CPD{eclass(L,1)} = tabular_CPD(bnet, L, 'CPT', Lstart);
|
wolffd@0
|
82 bnet.CPD{eclass(F,1)} = tabular_CPD(bnet, F, 'CPT', Fprob);
|
wolffd@0
|
83 bnet.CPD{eclass(O,1)} = tabular_CPD(bnet, O, 'CPT', Oprob);
|
wolffd@0
|
84
|
wolffd@0
|
85 % Define CPDs for slice 2
|
wolffd@0
|
86 bnet.CPD{eclass(W,2)} = hhmmQ_CPD(bnet, W+ss, 'Fbelow', F, 'startprob', Wstart, 'transprob', Wtrans);
|
wolffd@0
|
87 bnet.CPD{eclass(L,2)} = hhmmQ_CPD(bnet, L+ss, 'Fself', F, 'Qps', W+ss, 'startprob', Lstart, 'transprob', Ltrans);
|
wolffd@0
|
88
|
wolffd@0
|
89 evidence = cell(ss,T);
|
wolffd@0
|
90 evidence{W,1}=1;
|
wolffd@0
|
91 sample = cell2num(sample_dbn(bnet, 'length', T, 'evidence', evidence));
|
wolffd@0
|
92 str = lower(sample(4,:))
|
wolffd@0
|
93
|
wolffd@0
|
94 engine = jtree_dbn_inf_engine(bnet);
|
wolffd@0
|
95 evidence = cell(ss,T);
|
wolffd@0
|
96 evidence(O,:) = num2cell(data);
|
wolffd@0
|
97 [engine, ll_dbn] = enter_evidence(engine, evidence);
|
wolffd@0
|
98
|
wolffd@0
|
99 gamma = zeros(nwords, T);
|
wolffd@0
|
100 for t=1:T
|
wolffd@0
|
101 m = marginal_nodes(engine, [W F], t);
|
wolffd@0
|
102 gamma(:,t) = m.T(:,2);
|
wolffd@0
|
103 end
|
wolffd@0
|
104 gamma
|
wolffd@0
|
105
|
wolffd@0
|
106 xidbn = zeros(nwords, nwords);
|
wolffd@0
|
107 for t=1:T-1
|
wolffd@0
|
108 m = marginal_nodes(engine, [W F W+ss], t);
|
wolffd@0
|
109 xidbn = xidbn + squeeze(m.T(:,2,:));
|
wolffd@0
|
110 end
|
wolffd@0
|
111
|
wolffd@0
|
112 % thee
|
wolffd@0
|
113 % xidbn(1,4) = 0.9412 the->e
|
wolffd@0
|
114 % (2,3)=0.0588 t->h
|
wolffd@0
|
115 % (3,4)=0.0588 h-e
|
wolffd@0
|
116 % (4,4)=0.0588 e-e
|