wolffd@0: % Learn a 3 level HHMM similar to mk_square_hhmm wolffd@0: wolffd@0: % Because startprob should be shared for t=1:T, wolffd@0: % but in the DBN is shared for t=2:T, we train using a single long sequence. wolffd@0: wolffd@0: discrete_obs = 0; wolffd@0: supervised = 1; wolffd@0: obs_finalF2 = 0; wolffd@0: % It is not possible to observe F2 if we learn wolffd@0: % because the update_ess method for hhmmF_CPD and hhmmQ_CPD assume wolffd@0: % the F nodes are always hidden (for speed). wolffd@0: % However, for generating, we might want to set the final F2=true wolffd@0: % to force all subroutines to finish. wolffd@0: wolffd@0: ss = 6; wolffd@0: Q1 = 1; Q2 = 2; Q3 = 3; F3 = 4; F2 = 5; Onode = 6; wolffd@0: Qnodes = [Q1 Q2 Q3]; Fnodes = [F2 F3]; wolffd@0: wolffd@0: seed = 1; wolffd@0: rand('state', seed); wolffd@0: randn('state', seed); wolffd@0: wolffd@0: if discrete_obs wolffd@0: Qsizes = [2 4 2]; wolffd@0: else wolffd@0: Qsizes = [2 4 1]; wolffd@0: end wolffd@0: wolffd@0: D = 3; wolffd@0: Qnodes = 1:D; wolffd@0: startprob = cell(1,D); wolffd@0: transprob = cell(1,D); wolffd@0: termprob = cell(1,D); wolffd@0: wolffd@0: startprob{1} = 'unif'; wolffd@0: transprob{1} = 'unif'; wolffd@0: wolffd@0: % In the unsupervised case, it is essential that we break symmetry wolffd@0: % in the initial param estimates. wolffd@0: %startprob{2} = 'unif'; wolffd@0: %transprob{2} = 'unif'; wolffd@0: %termprob{2} = 'unif'; wolffd@0: startprob{2} = 'rnd'; wolffd@0: transprob{2} = 'rnd'; wolffd@0: termprob{2} = 'rnd'; wolffd@0: wolffd@0: leftright = 0; wolffd@0: if leftright wolffd@0: % Initialise base-level models as left-right. wolffd@0: % If we initialise with delta functions, wolffd@0: % they will remain delat funcitons after learning wolffd@0: startprob{3} = 'leftstart'; wolffd@0: transprob{3} = 'leftright'; wolffd@0: termprob{3} = 'rightstop'; wolffd@0: else wolffd@0: % If we want to be able to run a base-level model backwards... wolffd@0: startprob{3} = 'rnd'; wolffd@0: transprob{3} = 'rnd'; wolffd@0: termprob{3} = 'rnd'; wolffd@0: end wolffd@0: wolffd@0: if discrete_obs wolffd@0: % Initialise observations of lowest level primitives in a way which we can interpret wolffd@0: chars = ['L', 'l', 'U', 'u', 'R', 'r', 'D', 'd']; wolffd@0: L=find(chars=='L'); l=find(chars=='l'); wolffd@0: U=find(chars=='U'); u=find(chars=='u'); wolffd@0: R=find(chars=='R'); r=find(chars=='r'); wolffd@0: D=find(chars=='D'); d=find(chars=='d'); wolffd@0: Osize = length(chars); wolffd@0: wolffd@0: p = 0.9; wolffd@0: obsprob = (1-p)*ones([4 2 Osize]); wolffd@0: % Q2 Q3 O wolffd@0: obsprob(1, 1, L) = p; wolffd@0: obsprob(1, 2, l) = p; wolffd@0: obsprob(2, 1, U) = p; wolffd@0: obsprob(2, 2, u) = p; wolffd@0: obsprob(3, 1, R) = p; wolffd@0: obsprob(3, 2, r) = p; wolffd@0: obsprob(4, 1, D) = p; wolffd@0: obsprob(4, 2, d) = p; wolffd@0: obsprob = mk_stochastic(obsprob); wolffd@0: Oargs = {'CPT', obsprob}; wolffd@0: wolffd@0: else wolffd@0: % Initialise means of lowest level primitives in a way which we can interpret wolffd@0: % These means are little vectors in the east, south, west, north directions. wolffd@0: % (left-right=east, up-down=south, right-left=west, down-up=north) wolffd@0: Osize = 2; wolffd@0: mu = zeros(2, Qsizes(2), Qsizes(3)); wolffd@0: noise = 0; wolffd@0: scale = 3; wolffd@0: for q3=1:Qsizes(3) wolffd@0: mu(:, 1, q3) = scale*[1;0] + noise*rand(2,1); wolffd@0: end wolffd@0: for q3=1:Qsizes(3) wolffd@0: mu(:, 2, q3) = scale*[0;-1] + noise*rand(2,1); wolffd@0: end wolffd@0: for q3=1:Qsizes(3) wolffd@0: mu(:, 3, q3) = scale*[-1;0] + noise*rand(2,1); wolffd@0: end wolffd@0: for q3=1:Qsizes(3) wolffd@0: mu(:, 4, q3) = scale*[0;1] + noise*rand(2,1); wolffd@0: end wolffd@0: Sigma = repmat(reshape(scale*eye(2), [2 2 1 1 ]), [1 1 Qsizes(2) Qsizes(3)]); wolffd@0: Oargs = {'mean', mu, 'cov', Sigma, 'cov_type', 'diag'}; wolffd@0: end wolffd@0: wolffd@0: bnet = mk_hhmm('Qsizes', Qsizes, 'Osize', Osize', 'discrete_obs', discrete_obs,... wolffd@0: 'Oargs', Oargs, 'Ops', Qnodes(2:3), ... wolffd@0: 'startprob', startprob, 'transprob', transprob, 'termprob', termprob); wolffd@0: wolffd@0: if supervised wolffd@0: bnet.observed = [Q1 Q2 Onode]; wolffd@0: else wolffd@0: bnet.observed = [Onode]; wolffd@0: end wolffd@0: wolffd@0: if obs_finalF2 wolffd@0: engine = jtree_dbn_inf_engine(bnet); wolffd@0: % can't use ndx version because sometimes F2 is hidden, sometimes observed wolffd@0: error('can''t observe F when learning') wolffd@0: else wolffd@0: if supervised wolffd@0: engine = jtree_ndx_dbn_inf_engine(bnet); wolffd@0: else wolffd@0: engine = jtree_hmm_inf_engine(bnet); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: if discrete_obs wolffd@0: % generate some synthetic data (easier to debug) wolffd@0: cases = {}; wolffd@0: wolffd@0: T = 8; wolffd@0: ev = cell(ss, T); wolffd@0: ev(Onode,:) = num2cell([L l U u R r D d]); wolffd@0: if supervised wolffd@0: ev(Q1,:) = num2cell(1*ones(1,T)); wolffd@0: ev(Q2,:) = num2cell( [1 1 2 2 3 3 4 4]); wolffd@0: end wolffd@0: cases{1} = ev; wolffd@0: cases{3} = ev; wolffd@0: wolffd@0: T = 8; wolffd@0: ev = cell(ss, T); wolffd@0: if leftright % base model is left-right wolffd@0: ev(Onode,:) = num2cell([R r U u L l D d]); wolffd@0: else wolffd@0: ev(Onode,:) = num2cell([r R u U l L d D]); wolffd@0: end wolffd@0: if supervised wolffd@0: ev(Q1,:) = num2cell(2*ones(1,T)); wolffd@0: ev(Q2,:) = num2cell( [3 3 2 2 1 1 4 4]); wolffd@0: end wolffd@0: wolffd@0: cases{2} = ev; wolffd@0: cases{4} = ev; wolffd@0: wolffd@0: if obs_finalF2 wolffd@0: for i=1:length(cases) wolffd@0: T = size(cases{i},2); wolffd@0: cases{i}(F2,T)={2}; % force F2 to be finished at end of seq wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: if 0 wolffd@0: ev = cases{4}; wolffd@0: engine2 = enter_evidence(engine2, ev); wolffd@0: T = size(ev,2); wolffd@0: for t=1:T wolffd@0: m=marginal_family(engine2, F2, t); wolffd@0: fprintf('t=%d\n', t); wolffd@0: reshape(m.T, [2 2]) wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % [bnet2, LL] = learn_params_dbn_em(engine, cases, 'max_iter', 10); wolffd@0: long_seq = cat(2, cases{:}); wolffd@0: [bnet2, LL, engine2] = learn_params_dbn_em(engine, {long_seq}, 'max_iter', 200); wolffd@0: wolffd@0: % figure out which subsequence each model is responsible for wolffd@0: mpe = calc_mpe_dbn(engine2, long_seq); wolffd@0: pretty_print_hhmm_parse(mpe, Qnodes, Fnodes, Onode, chars); wolffd@0: wolffd@0: else wolffd@0: load 'square4_cases' % cases{seq}{i,t} for i=1:ss wolffd@0: %plot_square_hhmm(cases{1}) wolffd@0: %long_seq = cat(2, cases{:}); wolffd@0: train_cases = cases(1:2); wolffd@0: long_seq = cat(2, train_cases{:}); wolffd@0: if ~supervised wolffd@0: T = size(long_seq,2); wolffd@0: for t=1:T wolffd@0: long_seq{Q1,t} = []; wolffd@0: long_seq{Q2,t} = []; wolffd@0: end wolffd@0: end wolffd@0: [bnet2, LL, engine2] = learn_params_dbn_em(engine, {long_seq}, 'max_iter', 100); wolffd@0: wolffd@0: CPDO=struct(bnet2.CPD{eclass(Onode,1)}); wolffd@0: mu = CPDO.mean; wolffd@0: Sigma = CPDO.cov; wolffd@0: CPDO_full = CPDO; wolffd@0: wolffd@0: % force diagonal covs after training wolffd@0: for k=1:size(Sigma,3) wolffd@0: Sigma(:,:,k) = diag(diag(Sigma(:,:,k))); wolffd@0: end wolffd@0: bnet2.CPD{6} = set_fields(bnet.CPD{6}, 'cov', Sigma); wolffd@0: wolffd@0: if 0 wolffd@0: % visualize each model by concatenating means for each model for nsteps in a row wolffd@0: nsteps = 5; wolffd@0: ev = cell(ss, nsteps*prod(Qsizes(2:3))); wolffd@0: t = 1; wolffd@0: for q2=1:Qsizes(2) wolffd@0: for q3=1:Qsizes(3) wolffd@0: for i=1:nsteps wolffd@0: ev{Onode,t} = mu(:,q2,q3); wolffd@0: ev{Q2,t} = q2; wolffd@0: t = t + 1; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: plot_square_hhmm(ev) wolffd@0: end wolffd@0: wolffd@0: % bnet3 is the same as the learned model, except we will use it in testing mode wolffd@0: if supervised wolffd@0: bnet3 = bnet2; wolffd@0: bnet3.observed = [Onode]; wolffd@0: engine3 = hmm_inf_engine(bnet3); wolffd@0: %engine3 = jtree_ndx_dbn_inf_engine(bnet3); wolffd@0: else wolffd@0: bnet3 = bnet2; wolffd@0: engine3 = engine2; wolffd@0: end wolffd@0: wolffd@0: if 0 wolffd@0: % segment whole sequence wolffd@0: mpe = calc_mpe_dbn(engine3, long_seq); wolffd@0: pretty_print_hhmm_parse(mpe, Qnodes, Fnodes, Onode, []); wolffd@0: end wolffd@0: wolffd@0: % segment each sequence wolffd@0: test_cases = cases(3:4); wolffd@0: for i=1:2 wolffd@0: ev = test_cases{i}; wolffd@0: T = size(ev, 2); wolffd@0: for t=1:T wolffd@0: ev{Q1,t} = []; wolffd@0: ev{Q2,t} = []; wolffd@0: end wolffd@0: mpe = calc_mpe_dbn(engine3, ev); wolffd@0: subplot(1,2,i) wolffd@0: plot_square_hhmm(mpe) wolffd@0: %pretty_print_hhmm_parse(mpe, Qnodes, Fnodes, Onode, []); wolffd@0: q1s = cell2num(mpe(Q1,:)); wolffd@0: h = hist(q1s, 1:Qsizes(1)); wolffd@0: map_q1 = argmax(h); wolffd@0: str = sprintf('test seq %d is of type %d\n', i, map_q1); wolffd@0: title(str) wolffd@0: end wolffd@0: wolffd@0: end wolffd@0: wolffd@0: if 0 wolffd@0: % Estimate gotten by couting transitions in the labelled data wolffd@0: % Note that a self transition shouldnt count if F2=off. wolffd@0: Q2ev = cell2num(ev(Q2,:)); wolffd@0: Q2a = Q2ev(1:end-1); wolffd@0: Q2b = Q2ev(2:end); wolffd@0: counts = compute_counts([Q2a; Q2b], [4 4]); wolffd@0: end wolffd@0: wolffd@0: eclass = bnet2.equiv_class; wolffd@0: CPDQ1=struct(bnet2.CPD{eclass(Q1,2)}); wolffd@0: CPDQ2=struct(bnet2.CPD{eclass(Q2,2)}); wolffd@0: CPDQ3=struct(bnet2.CPD{eclass(Q3,2)}); wolffd@0: CPDF2=struct(bnet2.CPD{eclass(F2,1)}); wolffd@0: CPDF3=struct(bnet2.CPD{eclass(F3,1)}); wolffd@0: wolffd@0: wolffd@0: A=add_hhmm_end_state(CPDQ2.transprob, CPDF2.termprob(:,:,2)); wolffd@0: squeeze(A(:,1,:)) wolffd@0: squeeze(A(:,2,:)) wolffd@0: CPDQ2.startprob wolffd@0: wolffd@0: if 0 wolffd@0: S=struct(CPDF2.sub_CPD_term); wolffd@0: S.nsamples wolffd@0: reshape(S.counts, [2 4 2]) wolffd@0: end