wolffd@0: % like mgram2, except we unroll the DBN so we can use smaller wolffd@0: % state spaces for the early duration nodes: wolffd@0: % the state spaces are D1 in {1}, D2 in {1,2} wolffd@0: wolffd@0: past = 1; wolffd@0: wolffd@0: words = {'the', 't', 'h', 'e'}; wolffd@0: data = 'the'; wolffd@0: nwords = length(words); wolffd@0: word_len = zeros(1, nwords); wolffd@0: word_prob = normalise(ones(1,nwords)); wolffd@0: word_logprob = log(word_prob); wolffd@0: for wi=1:nwords wolffd@0: word_len(wi)=length(words{wi}); wolffd@0: end wolffd@0: D = max(word_len); wolffd@0: wolffd@0: wolffd@0: alphasize = 26*2; wolffd@0: data = letter2num(data); wolffd@0: T = length(data); wolffd@0: wolffd@0: % node numbers wolffd@0: W = 1; % top level state = word id wolffd@0: L = 2; % bottom level state = letter position within word wolffd@0: F = 3; wolffd@0: O = 4; wolffd@0: wolffd@0: ss = 4; wolffd@0: intra = zeros(ss,ss); wolffd@0: intra(W,[F L O])=1; wolffd@0: intra(L,[O F])=1; wolffd@0: wolffd@0: inter = zeros(ss,ss); wolffd@0: inter(W,W)=1; wolffd@0: inter(L,L)=1; wolffd@0: inter(F,[W L O])=1; wolffd@0: wolffd@0: T = 3; wolffd@0: dag = unroll_dbn_topology(intra, inter, T); wolffd@0: wolffd@0: % node sizes wolffd@0: ns = zeros(1,ss); wolffd@0: ns(W) = nwords; wolffd@0: ns(L) = D; wolffd@0: ns(F) = 2; wolffd@0: ns(O) = alphasize; wolffd@0: ns = repmat(ns(:), [1 T]); wolffd@0: for d=1:D wolffd@0: ns(d,L)=d; % max duration wolffd@0: end wolffd@0: ns = ns(:); wolffd@0: wolffd@0: % Equiv class in brackets for D=3 wolffd@0: % The Lt's are not tied until t>=D, since they have different sizes. wolffd@0: % W1 and W2 are not tied since they have different parent sets. wolffd@0: wolffd@0: % W1 (1) W2 (5) W3 (5) W4 (5) wolffd@0: % L1 (2) L2 (6) L3 (7) L4 (7) wolffd@0: % F1 (3) F2 (3) F3 (4) F3 (4) wolffd@0: % O1 (4) O2 (4) O2 (4) O4 (4) wolffd@0: wolffd@0: % Since we are not learning, we can dispense with tying wolffd@0: wolffd@0: % Make the bnet wolffd@0: Wnodes = unroll_set(W, ss, T); wolffd@0: Lnodes = unroll_set(L, ss, T); wolffd@0: Fnodes = unroll_set(F, ss, T); wolffd@0: Onodes = unroll_set(O, ss, T); wolffd@0: wolffd@0: bnet = mk_bnet(dag, ns); wolffd@0: eclass = bnet.equiv_class; wolffd@0: wolffd@0: % uniform start distrib over words, uniform trans mat wolffd@0: Wstart = normalise(ones(1,nwords)); wolffd@0: Wtrans = mk_stochastic(ones(nwords,nwords)); wolffd@0: bnet.CPD{eclass(Wnodes(1))} = tabular_CPD(bnet, Wnodes(1), 'CPT', Wstart); wolffd@0: for t=2:T wolffd@0: bnet.CPD{eclass(Wnodes(t))} = hhmmQ_CPD(bnet, Wnodes(t), 'Fbelow', Fnodes(t-1), ... wolffd@0: 'startprob', Wstart, 'transprob', Wtrans); wolffd@0: end wolffd@0: wolffd@0: % always start in state d = length(word) for each bottom level HMM wolffd@0: % and then count down wolffd@0: % make downcounters wolffd@0: RLtrans = mk_rightleft_transmat(D, 0); % 0 self loop prob wolffd@0: Ltrans = repmat(RLtrans, [1 1 nwords]); wolffd@0: wolffd@0: for t=1:T wolffd@0: Lstart = zeros(nwords, min(t,D)); wolffd@0: for i=1:nwords wolffd@0: l = length(words{i}); wolffd@0: Lstart(i,l)=1; wolffd@0: if d==1 wolffd@0: bnet.CPD{eclass(Lnodes(1))} = tabular_CPD(bnet, Lnodes(1), 'CPT', Lstart); wolffd@0: else wolffd@0: bnet.CPD{eclass(Lnodes(t))} = hhmmQ_CPD(bnet, Lnodes(t), 'Fself', Fnodes(t-1), 'Qps', Wnodes(t), ... wolffd@0: 'startprob', Lstart, 'transprob', Ltrans); wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % Finish when downcoutner = 1 wolffd@0: Fprob = zeros(nwords, D, 2); wolffd@0: Fprob(:,1,2)=1; wolffd@0: Fprob(:,2:end,1)=1; wolffd@0: wolffd@0: wolffd@0: % Define CPDs for slice wolffd@0: bnet.CPD{eclass(W,1)} = tabular_CPD(bnet, W, 'CPT', Wstart); wolffd@0: bnet.CPD{eclass(L,1)} = tabular_CPD(bnet, L, 'CPT', Lstart); wolffd@0: bnet.CPD{eclass(F,1)} = tabular_CPD(bnet, F, 'CPT', Fprob); wolffd@0: wolffd@0: wolffd@0: % Define CPDs for slice 2 wolffd@0: bnet.CPD{eclass(W,2)} = hhmmQ_CPD(bnet, W+ss, 'Fbelow', F, 'startprob', Wstart, 'transprob', Wtrans); wolffd@0: bnet.CPD{eclass(L,2)} = hhmmQ_CPD(bnet, L+ss, 'Fself', F, 'Qps', W+ss, 'startprob', Lstart, 'transprob', Ltrans); wolffd@0: wolffd@0: wolffd@0: if 0 wolffd@0: % To test it is generating correctly, we create an artificial wolffd@0: % observation process that capitalizes at the start of a new segment wolffd@0: % Oprob(Ft-1,Qt,Dt,Yt) wolffd@0: Oprob = zeros(2,nwords,D,alphasize); wolffd@0: Oprob(1,1,3,letter2num('t'),1)=1; wolffd@0: Oprob(1,1,2,letter2num('h'),1)=1; wolffd@0: Oprob(1,1,1,letter2num('e'),1)=1; wolffd@0: Oprob(2,1,3,letter2num('T'),1)=1; wolffd@0: Oprob(2,1,2,letter2num('H'),1)=1; wolffd@0: Oprob(2,1,1,letter2num('E'),1)=1; wolffd@0: Oprob(1,2,1,letter2num('a'),1)=1; wolffd@0: Oprob(2,2,1,letter2num('A'),1)=1; wolffd@0: Oprob(1,3,1,letter2num('b'),1)=1; wolffd@0: Oprob(2,3,1,letter2num('B'),1)=1; wolffd@0: Oprob(1,4,1,letter2num('c'),1)=1; wolffd@0: Oprob(2,4,1,letter2num('C'),1)=1; wolffd@0: wolffd@0: % Oprob1(Qt,Dt,Yt) wolffd@0: Oprob1 = zeros(nwords,D,alphasize); wolffd@0: Oprob1(1,3,letter2num('t'),1)=1; wolffd@0: Oprob1(1,2,letter2num('h'),1)=1; wolffd@0: Oprob1(1,1,letter2num('e'),1)=1; wolffd@0: Oprob1(2,1,letter2num('a'),1)=1; wolffd@0: Oprob1(3,1,letter2num('b'),1)=1; wolffd@0: Oprob1(4,1,letter2num('c'),1)=1; wolffd@0: wolffd@0: bnet.CPD{eclass(O,2)} = tabular_CPD(bnet, O+ss, 'CPT', Oprob); wolffd@0: bnet.CPD{eclass(O,1)} = tabular_CPD(bnet, O, 'CPT', Oprob1); wolffd@0: wolffd@0: evidence = cell(ss,T); wolffd@0: %evidence{W,1}=1; wolffd@0: sample = cell2num(sample_dbn(bnet, 'length', T, 'evidence', evidence)); wolffd@0: str = num2letter(sample(4,:)) wolffd@0: end wolffd@0: wolffd@0: wolffd@0: wolffd@0: wolffd@0: [log_obslik, obslik, match] = mk_mgram_obslik(lower(data), words, word_len, word_prob); wolffd@0: % obslik(j,t,d) wolffd@0: softCPDpot = cell(ss,T); wolffd@0: ens = ns; wolffd@0: ens(O)=1; wolffd@0: ens2 = [ens ens]; wolffd@0: for t=2:T wolffd@0: dom = [F W+ss L+ss O+ss]; wolffd@0: % tab(Ft-1, Q2, Dt) wolffd@0: tab = ones(2, nwords, D); wolffd@0: if past wolffd@0: tab(1,:,:)=1; % if haven't finished previous word, likelihood is 1 wolffd@0: %tab(2,:,:) = squeeze(obslik(:,t,:)); % otherwise likelihood of this segment wolffd@0: for d=1:min(t,D) wolffd@0: tab(2,:,d) = squeeze(obslik(:,t,d)); wolffd@0: end wolffd@0: else wolffd@0: for d=1:max(1,min(D,T+1-t)) wolffd@0: tab(2,:,d) = squeeze(obslik(:,t+d-1,d)); wolffd@0: end wolffd@0: end wolffd@0: softCPDpot{O,t} = dpot(dom, ens2(dom), tab); wolffd@0: end wolffd@0: t = 1; wolffd@0: dom = [W L O]; wolffd@0: % tab(Q2, Dt) wolffd@0: tab = ones(nwords, D); wolffd@0: if past wolffd@0: %tab = squeeze(obslik(:,t,:)); wolffd@0: tab(:,1) = squeeze(obslik(:,t,1)); wolffd@0: else wolffd@0: for d=1:min(D,T-t) wolffd@0: tab(:,d) = squeeze(obslik(:,t+d-1,d)); wolffd@0: end wolffd@0: end wolffd@0: softCPDpot{O,t} = dpot(dom, ens(dom), tab); wolffd@0: wolffd@0: wolffd@0: %bnet.observed = []; wolffd@0: % uniformative observations wolffd@0: %bnet.CPD{eclass(O,2)} = tabular_CPD(bnet, O+ss, 'CPT', mk_stochastic(ones(2,nwords,D,alphasize))); wolffd@0: %bnet.CPD{eclass(O,1)} = tabular_CPD(bnet, O, 'CPT', mk_stochastic(ones(nwords,D,alphasize))); wolffd@0: wolffd@0: engine = jtree_dbn_inf_engine(bnet); wolffd@0: evidence = cell(ss,T); wolffd@0: % we add dummy data to O to force its effective size to be 1. wolffd@0: % The actual values have already been incorporated into softCPDpot wolffd@0: evidence(O,:) = num2cell(ones(1,T)); wolffd@0: [engine, ll_dbn] = enter_evidence(engine, evidence, 'softCPDpot', softCPDpot); wolffd@0: wolffd@0: wolffd@0: %evidence(F,:) = num2cell(2*ones(1,T)); wolffd@0: %[engine, ll_dbn] = enter_evidence(engine, evidence); wolffd@0: wolffd@0: wolffd@0: gamma = zeros(nwords, T); wolffd@0: for t=1:T wolffd@0: m = marginal_nodes(engine, [W F], t); wolffd@0: gamma(:,t) = m.T(:,2); wolffd@0: end wolffd@0: wolffd@0: gamma wolffd@0: wolffd@0: xidbn = zeros(nwords, nwords); wolffd@0: for t=1:T-1 wolffd@0: m = marginal_nodes(engine, [W F W+ss], t); wolffd@0: xidbn = xidbn + squeeze(m.T(:,2,:)); wolffd@0: end wolffd@0: wolffd@0: % thee wolffd@0: % xidbn(1,4) = 0.9412 the->e wolffd@0: % (2,3)=0.0588 t->h wolffd@0: % (3,4)=0.0588 h-e wolffd@0: % (4,4)=0.0588 e-e wolffd@0: wolffd@0: