Mercurial > hg > camir-aes2014
diff toolboxes/FullBNT-1.0.7/HMM/dhmm_em_online_demo.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/FullBNT-1.0.7/HMM/dhmm_em_online_demo.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,93 @@ +% Example of online EM applied to a simple POMDP with fixed action seq + +clear all + +% Create a really easy model to learn +rand('state', 1); +O = 2; +S = 2; +A = 2; +prior0 = [1 0]'; +transmat0 = cell(1,A); +transmat0{1} = [0.9 0.1; 0.1 0.9]; % long runs of 1s and 2s +transmat0{2} = [0.1 0.9; 0.9 0.1]; % short runs +obsmat0 = eye(2); + +%prior0 = normalise(rand(S,1)); +%transmat0 = mk_stochastic(rand(S,S)); +%obsmat0 = mk_stochastic(rand(S,O)); + +T = 10; +act = [1*ones(1,25) 2*ones(1,25) 1*ones(1,25) 2*ones(1,25)]; +data = pomdp_sample(prior0, transmat0, obsmat0, act); +%data = sample_dhmm(prior0, transmat0, obsmat0, T, 1); + +% Initial guess of params +rand('state', 2); % different seed! +transmat1 = cell(1,A); +for a=1:A + transmat1{a} = mk_stochastic(rand(S,S)); +end +obsmat1 = mk_stochastic(rand(S,O)); +prior1 = prior0; % so it labels states the same way + +% Uniformative Dirichlet prior (expected sufficient statistics / pseudo counts) +e = 0.001; +ess_trans = cell(1,A); +for a=1:A + ess_trans{a} = repmat(e, S, S); +end +ess_emit = repmat(e, S, O); + +% Params +w = 2; +decay_sched = [0.1:0.1:0.9]; + +% Initialize +LL1 = zeros(1,T); +t = 1; +y = data(t); +data_win = y; +act_win = [1]; % arbitrary initial value +[prior1, LL1(1)] = normalise(prior1 .* obsmat1(:,y)); + +% Iterate +for t=2:T + y = data(t); + a = act(t); + if t <= w + data_win = [data_win y]; + act_win = [act_win a]; + else + data_win = [data_win(2:end) y]; + act_win = [act_win(2:end) a]; + prior1 = gamma(:, 2); + end + d = decay_sched(min(t, length(decay_sched))); + [transmat1, obsmat1, ess_trans, ess_emit, gamma, ll] = dhmm_em_online(... + prior1, transmat1, obsmat1, ess_trans, ess_emit, d, data_win, act_win); + bel = gamma(:, end); + LL1(t) = ll/length(data_win); + %fprintf('t=%d, ll=%f\n', t, ll); +end + +LL1(1) = LL1(2); % since initial likelihood is for 1 slice +plot(1:T, LL1, 'rx-'); + + +% compare with offline learning + +if 0 +rand('state', 2); % same seed as online learner +transmat2 = cell(1,A); +for a=1:A + transmat2{a} = mk_stochastic(rand(S,S)); +end +obsmat2 = mk_stochastic(rand(S,O)); +prior2 = prior0; +[LL2, prior2, transmat2, obsmat2] = dhmm_em(data, prior2, transmat2, obsmat2, .... + 'max_iter', 10, 'thresh', 1e-3, 'verbose', 1, 'act', act); + +LL2 = LL2 / T + +end