Mercurial > hg > camir-aes2014
diff toolboxes/FullBNT-1.0.7/bnt/examples/dynamic/HHMM/Square/Old/learn_square_hhmm.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/FullBNT-1.0.7/bnt/examples/dynamic/HHMM/Square/Old/learn_square_hhmm.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,294 @@ +% Learn a 3 level HHMM similar to mk_square_hhmm + +% Because startprob should be shared for t=1:T, +% but in the DBN is shared for t=2:T, we train using a single long sequence. + +discrete_obs = 0; +supervised = 1; +obs_finalF2 = 0; +% It is not possible to observe F2 if we learn +% because the update_ess method for hhmmF_CPD and hhmmQ_CPD assume +% the F nodes are always hidden (for speed). +% However, for generating, we might want to set the final F2=true +% to force all subroutines to finish. + +ss = 6; +Q1 = 1; Q2 = 2; Q3 = 3; F3 = 4; F2 = 5; Onode = 6; +Qnodes = [Q1 Q2 Q3]; Fnodes = [F2 F3]; + +seed = 1; +rand('state', seed); +randn('state', seed); + +if discrete_obs + Qsizes = [2 4 2]; +else + Qsizes = [2 4 1]; +end + +D = 3; +Qnodes = 1:D; +startprob = cell(1,D); +transprob = cell(1,D); +termprob = cell(1,D); + +startprob{1} = 'unif'; +transprob{1} = 'unif'; + +% In the unsupervised case, it is essential that we break symmetry +% in the initial param estimates. +%startprob{2} = 'unif'; +%transprob{2} = 'unif'; +%termprob{2} = 'unif'; +startprob{2} = 'rnd'; +transprob{2} = 'rnd'; +termprob{2} = 'rnd'; + +leftright = 0; +if leftright + % Initialise base-level models as left-right. + % If we initialise with delta functions, + % they will remain delat funcitons after learning + startprob{3} = 'leftstart'; + transprob{3} = 'leftright'; + termprob{3} = 'rightstop'; +else + % If we want to be able to run a base-level model backwards... + startprob{3} = 'rnd'; + transprob{3} = 'rnd'; + termprob{3} = 'rnd'; +end + +if discrete_obs + % Initialise observations of lowest level primitives in a way which we can interpret + chars = ['L', 'l', 'U', 'u', 'R', 'r', 'D', 'd']; + L=find(chars=='L'); l=find(chars=='l'); + U=find(chars=='U'); u=find(chars=='u'); + R=find(chars=='R'); r=find(chars=='r'); + D=find(chars=='D'); d=find(chars=='d'); + Osize = length(chars); + + p = 0.9; + obsprob = (1-p)*ones([4 2 Osize]); + % Q2 Q3 O + obsprob(1, 1, L) = p; + obsprob(1, 2, l) = p; + obsprob(2, 1, U) = p; + obsprob(2, 2, u) = p; + obsprob(3, 1, R) = p; + obsprob(3, 2, r) = p; + obsprob(4, 1, D) = p; + obsprob(4, 2, d) = p; + obsprob = mk_stochastic(obsprob); + Oargs = {'CPT', obsprob}; + +else + % Initialise means of lowest level primitives in a way which we can interpret + % These means are little vectors in the east, south, west, north directions. + % (left-right=east, up-down=south, right-left=west, down-up=north) + Osize = 2; + mu = zeros(2, Qsizes(2), Qsizes(3)); + noise = 0; + scale = 3; + for q3=1:Qsizes(3) + mu(:, 1, q3) = scale*[1;0] + noise*rand(2,1); + end + for q3=1:Qsizes(3) + mu(:, 2, q3) = scale*[0;-1] + noise*rand(2,1); + end + for q3=1:Qsizes(3) + mu(:, 3, q3) = scale*[-1;0] + noise*rand(2,1); + end + for q3=1:Qsizes(3) + mu(:, 4, q3) = scale*[0;1] + noise*rand(2,1); + end + Sigma = repmat(reshape(scale*eye(2), [2 2 1 1 ]), [1 1 Qsizes(2) Qsizes(3)]); + Oargs = {'mean', mu, 'cov', Sigma, 'cov_type', 'diag'}; +end + +bnet = mk_hhmm('Qsizes', Qsizes, 'Osize', Osize', 'discrete_obs', discrete_obs,... + 'Oargs', Oargs, 'Ops', Qnodes(2:3), ... + 'startprob', startprob, 'transprob', transprob, 'termprob', termprob); + +if supervised + bnet.observed = [Q1 Q2 Onode]; +else + bnet.observed = [Onode]; +end + +if obs_finalF2 + engine = jtree_dbn_inf_engine(bnet); + % can't use ndx version because sometimes F2 is hidden, sometimes observed + error('can''t observe F when learning') +else + if supervised + engine = jtree_ndx_dbn_inf_engine(bnet); + else + engine = jtree_hmm_inf_engine(bnet); + end +end + +if discrete_obs + % generate some synthetic data (easier to debug) + cases = {}; + + T = 8; + ev = cell(ss, T); + ev(Onode,:) = num2cell([L l U u R r D d]); + if supervised + ev(Q1,:) = num2cell(1*ones(1,T)); + ev(Q2,:) = num2cell( [1 1 2 2 3 3 4 4]); + end + cases{1} = ev; + cases{3} = ev; + + T = 8; + ev = cell(ss, T); + if leftright % base model is left-right + ev(Onode,:) = num2cell([R r U u L l D d]); + else + ev(Onode,:) = num2cell([r R u U l L d D]); + end + if supervised + ev(Q1,:) = num2cell(2*ones(1,T)); + ev(Q2,:) = num2cell( [3 3 2 2 1 1 4 4]); + end + + cases{2} = ev; + cases{4} = ev; + + if obs_finalF2 + for i=1:length(cases) + T = size(cases{i},2); + cases{i}(F2,T)={2}; % force F2 to be finished at end of seq + end + end + + if 0 + ev = cases{4}; + engine2 = enter_evidence(engine2, ev); + T = size(ev,2); + for t=1:T + m=marginal_family(engine2, F2, t); + fprintf('t=%d\n', t); + reshape(m.T, [2 2]) + end + end + + % [bnet2, LL] = learn_params_dbn_em(engine, cases, 'max_iter', 10); + long_seq = cat(2, cases{:}); + [bnet2, LL, engine2] = learn_params_dbn_em(engine, {long_seq}, 'max_iter', 200); + + % figure out which subsequence each model is responsible for + mpe = calc_mpe_dbn(engine2, long_seq); + pretty_print_hhmm_parse(mpe, Qnodes, Fnodes, Onode, chars); + +else + load 'square4_cases' % cases{seq}{i,t} for i=1:ss + %plot_square_hhmm(cases{1}) + %long_seq = cat(2, cases{:}); + train_cases = cases(1:2); + long_seq = cat(2, train_cases{:}); + if ~supervised + T = size(long_seq,2); + for t=1:T + long_seq{Q1,t} = []; + long_seq{Q2,t} = []; + end + end + [bnet2, LL, engine2] = learn_params_dbn_em(engine, {long_seq}, 'max_iter', 100); + + CPDO=struct(bnet2.CPD{eclass(Onode,1)}); + mu = CPDO.mean; + Sigma = CPDO.cov; + CPDO_full = CPDO; + + % force diagonal covs after training + for k=1:size(Sigma,3) + Sigma(:,:,k) = diag(diag(Sigma(:,:,k))); + end + bnet2.CPD{6} = set_fields(bnet.CPD{6}, 'cov', Sigma); + + if 0 + % visualize each model by concatenating means for each model for nsteps in a row + nsteps = 5; + ev = cell(ss, nsteps*prod(Qsizes(2:3))); + t = 1; + for q2=1:Qsizes(2) + for q3=1:Qsizes(3) + for i=1:nsteps + ev{Onode,t} = mu(:,q2,q3); + ev{Q2,t} = q2; + t = t + 1; + end + end + end + plot_square_hhmm(ev) + end + + % bnet3 is the same as the learned model, except we will use it in testing mode + if supervised + bnet3 = bnet2; + bnet3.observed = [Onode]; + engine3 = hmm_inf_engine(bnet3); + %engine3 = jtree_ndx_dbn_inf_engine(bnet3); + else + bnet3 = bnet2; + engine3 = engine2; + end + + if 0 + % segment whole sequence + mpe = calc_mpe_dbn(engine3, long_seq); + pretty_print_hhmm_parse(mpe, Qnodes, Fnodes, Onode, []); + end + + % segment each sequence + test_cases = cases(3:4); + for i=1:2 + ev = test_cases{i}; + T = size(ev, 2); + for t=1:T + ev{Q1,t} = []; + ev{Q2,t} = []; + end + mpe = calc_mpe_dbn(engine3, ev); + subplot(1,2,i) + plot_square_hhmm(mpe) + %pretty_print_hhmm_parse(mpe, Qnodes, Fnodes, Onode, []); + q1s = cell2num(mpe(Q1,:)); + h = hist(q1s, 1:Qsizes(1)); + map_q1 = argmax(h); + str = sprintf('test seq %d is of type %d\n', i, map_q1); + title(str) + end + +end + +if 0 +% Estimate gotten by couting transitions in the labelled data +% Note that a self transition shouldnt count if F2=off. +Q2ev = cell2num(ev(Q2,:)); +Q2a = Q2ev(1:end-1); +Q2b = Q2ev(2:end); +counts = compute_counts([Q2a; Q2b], [4 4]); +end + +eclass = bnet2.equiv_class; +CPDQ1=struct(bnet2.CPD{eclass(Q1,2)}); +CPDQ2=struct(bnet2.CPD{eclass(Q2,2)}); +CPDQ3=struct(bnet2.CPD{eclass(Q3,2)}); +CPDF2=struct(bnet2.CPD{eclass(F2,1)}); +CPDF3=struct(bnet2.CPD{eclass(F3,1)}); + + +A=add_hhmm_end_state(CPDQ2.transprob, CPDF2.termprob(:,:,2)); +squeeze(A(:,1,:)) +squeeze(A(:,2,:)) +CPDQ2.startprob + +if 0 +S=struct(CPDF2.sub_CPD_term); +S.nsamples +reshape(S.counts, [2 4 2]) +end