wolffd@0: function [loglik, gamma] = fhmm_infer(inter, CPTs_slice1, CPTs, obsmat, node_sizes) wolffd@0: % FHMM_INFER Exact inference for a factorial HMM. wolffd@0: % [loglik, gamma] = fhmm_infer(inter, CPTs_slice1, CPTs, obsmat, node_sizes) wolffd@0: % wolffd@0: % Inputs: wolffd@0: % inter - the inter-slice adjacency matrix wolffd@0: % CPTs_slice1{s}(j) = Pr(Q(s,1) = j) where Q(s,t) = hidden node s in slice t wolffd@0: % CPT{s}(i1, i2, ..., j) = Pr(Q(s,t) = j | Pa(s,t-1) = i1, i2, ...), wolffd@0: % obsmat(i,t) = Pr(y(t) | Q(t)=i) wolffd@0: % node_sizes is a vector with the cardinality of the hidden nodes wolffd@0: % wolffd@0: % Outputs: wolffd@0: % gamma(i,t) = Pr(X(t)=i | O(1:T)) as in an HMM, wolffd@0: % except that i is interpreted as an M digit, base-K number (if there are M chains each of cardinality K). wolffd@0: % wolffd@0: % wolffd@0: % For M chains each of cardinality K, the frontiers (i.e., cliques) wolffd@0: % contain M+1 nodes, and it takes M steps to advance the frontier by one time step, wolffd@0: % so the run time is O(T M K^(M+1)). wolffd@0: % An HMM takes O(T S^2) where S is the size of the state space. wolffd@0: % Collapsing the FHMM to an HMM results in S = K^M. wolffd@0: % For details, see wolffd@0: % "The Factored Frontier Algorithm for Approximate Inference in DBNs", wolffd@0: % Kevin Murphy and Yair Weiss, submitted to NIPS 2000. wolffd@0: % wolffd@0: % The frontier algorithm makes the following topological assumptions: wolffd@0: % wolffd@0: % - All nodes are persistent (connect to the next slice) wolffd@0: % - No connections within a timeslice wolffd@0: % - There is a single observation variable, which depends on all the hidden nodes wolffd@0: % - Each node can have several parents in the previous time slice (generalizes a FHMM slightly) wolffd@0: % wolffd@0: wolffd@0: % The forwards pass of the frontier algorithm can be explained with the following example. wolffd@0: % Suppose we have 3 hidden nodes per slice, A, B, C. wolffd@0: % The goal is to compute alpha(j, t) = Pr( (A_t,B_t,C_t)=j | Y(1:t)) wolffd@0: % We move alpha from t to t+1 one node at a time, as follows. wolffd@0: % We define the following quantities: wolffd@0: % s([a1 b1 c1], 1) = Prob(A(t)=a1, B(t)=b1, C(t)=c1 | Y(1:t)) = alpha(j, t) wolffd@0: % s([a2 b1 c1], 2) = Prob(A(t+1)=a2, B(t)=b1, C(t)=c1 | Y(1:t)) wolffd@0: % s([a2 b2 c1], 3) = Prob(A(t+1)=a2, B(t+1)=b2, C(t)=c1 | Y(1:t)) wolffd@0: % s([a2 b2 c2], 4) = Prob(A(t+1)=a2, B(t+1)=b2, C(t+1)=c2 | Y(1:t)) wolffd@0: % s([a2 b2 c2], 5) = Prob(A(t+1)=a2, B(t+1)=b2, C(t+1)=c2 | Y(1:t+1)) = alpha(j, t+1) wolffd@0: % wolffd@0: % These can be computed recursively as follows: wolffd@0: % wolffd@0: % s([a2 b1 c1], 2) = sum_{a1} P(a2|a1) s([a1 b1 c1], 1) wolffd@0: % s([a2 b2 c1], 3) = sum_{b1} P(b2|b1) s([a2 b1 c1], 2) wolffd@0: % s([a2 b2 c2], 4) = sum_{c1} P(c2|c1) s([a2 b2 c1], 1) wolffd@0: % s([a2 b2 c2], 5) = normalise( s([a2 b2 c2], 4) .* P(Y(t+1)|a2,b2,c2) wolffd@0: wolffd@0: wolffd@0: [kk,ll,mm] = make_frontier_indices(inter, node_sizes); % can pass in as args wolffd@0: wolffd@0: scaled = 1; wolffd@0: wolffd@0: M = length(node_sizes); wolffd@0: S = prod(node_sizes); wolffd@0: T = size(obsmat, 2); wolffd@0: wolffd@0: alpha = zeros(S, T); wolffd@0: beta = zeros(S, T); wolffd@0: gamma = zeros(S, T); wolffd@0: scale = zeros(1,T); wolffd@0: tiny = exp(-700); wolffd@0: wolffd@0: wolffd@0: alpha(:,1) = make_prior_from_CPTs(CPTs_slice1, node_sizes); wolffd@0: alpha(:,1) = alpha(:,1) .* obsmat(:, 1); wolffd@0: wolffd@0: if scaled wolffd@0: s = sum(alpha(:,1)); wolffd@0: if s==0, s = s + tiny; end wolffd@0: scale(1) = 1/s; wolffd@0: else wolffd@0: scale(1) = 1; wolffd@0: end wolffd@0: alpha(:,1) = alpha(:,1) * scale(1); wolffd@0: wolffd@0: %a = zeros(S, M+1); wolffd@0: %b = zeros(S, M+1); wolffd@0: anew = zeros(S,1); wolffd@0: aold = zeros(S,1); wolffd@0: bnew = zeros(S,1); wolffd@0: bold = zeros(S,1); wolffd@0: wolffd@0: for t=2:T wolffd@0: %a(:,1) = alpha(:,t-1); wolffd@0: aold = alpha(:,t-1); wolffd@0: wolffd@0: c = 1; wolffd@0: for i=1:M wolffd@0: ns = node_sizes(i); wolffd@0: cpt = CPTs{i}; wolffd@0: for j=1:S wolffd@0: s = 0; wolffd@0: for xx=1:ns wolffd@0: %k = kk(xx,j,i); wolffd@0: %l = ll(xx,j,i); wolffd@0: k = kk(c); wolffd@0: l = ll(c); wolffd@0: c = c + 1; wolffd@0: % s = s + a(k,i) * CPTs{i}(l); wolffd@0: s = s + aold(k) * cpt(l); wolffd@0: end wolffd@0: %a(j,i+1) = s; wolffd@0: anew(j) = s; wolffd@0: end wolffd@0: aold = anew; wolffd@0: end wolffd@0: wolffd@0: %alpha(:,t) = a(:,M+1) .* obsmat(:, obs(t)); wolffd@0: alpha(:,t) = anew .* obsmat(:, t); wolffd@0: wolffd@0: if scaled wolffd@0: s = sum(alpha(:,t)); wolffd@0: if s==0, s = s + tiny; end wolffd@0: scale(t) = 1/s; wolffd@0: else wolffd@0: scale(t) = 1; wolffd@0: end wolffd@0: alpha(:,t) = alpha(:,t) * scale(t); wolffd@0: wolffd@0: end wolffd@0: wolffd@0: wolffd@0: beta(:,T) = ones(S,1) * scale(T); wolffd@0: for t=T-1:-1:1 wolffd@0: %b(:,1) = beta(:,t+1) .* obsmat(:, obs(t+1)); wolffd@0: bold = beta(:,t+1) .* obsmat(:, t+1); wolffd@0: wolffd@0: c = 1; wolffd@0: for i=1:M wolffd@0: ns = node_sizes(i); wolffd@0: cpt = CPTs{i}; wolffd@0: for j=1:S wolffd@0: s = 0; wolffd@0: for xx=1:ns wolffd@0: %k = kk(xx,j,i); wolffd@0: %m = mm(xx,j,i); wolffd@0: k = kk(c); wolffd@0: m = mm(c); wolffd@0: c = c + 1; wolffd@0: % s = s + b(k,i) * CPTs{i}(m); wolffd@0: s = s + bold(k) * cpt(m); wolffd@0: end wolffd@0: %b(j,i+1) = s; wolffd@0: bnew(j) = s; wolffd@0: end wolffd@0: bold = bnew; wolffd@0: end wolffd@0: % beta(:,t) = b(:,M+1) * scale(t); wolffd@0: beta(:,t) = bnew * scale(t); wolffd@0: end wolffd@0: wolffd@0: wolffd@0: if scaled wolffd@0: loglik = -sum(log(scale)); % scale(i) is finite wolffd@0: else wolffd@0: lik = alpha(:,1)' * beta(:,1); wolffd@0: loglik = log(lik+tiny); wolffd@0: end wolffd@0: wolffd@0: for t=1:T wolffd@0: gamma(:,t) = normalise(alpha(:,t) .* beta(:,t)); wolffd@0: end wolffd@0: wolffd@0: %%%%%%%%%%% wolffd@0: wolffd@0: function [kk,ll,mm] = make_frontier_indices(inter, node_sizes) wolffd@0: % wolffd@0: % Precompute indices for use in the frontier algorithm. wolffd@0: % These only depend on the topology, not the parameters or data. wolffd@0: % Hence we can compute them outside of fhmm_infer. wolffd@0: % This saves a lot of run-time computation. wolffd@0: wolffd@0: M = length(node_sizes); wolffd@0: S = prod(node_sizes); wolffd@0: wolffd@0: mns = max(node_sizes); wolffd@0: kk = zeros(mns, S, M); wolffd@0: ll = zeros(mns, S, M); wolffd@0: mm = zeros(mns, S, M); wolffd@0: wolffd@0: for i=1:M wolffd@0: for j=1:S wolffd@0: u = ind2subv(node_sizes, j); wolffd@0: x = u(i); wolffd@0: for xx=1:node_sizes(i) wolffd@0: uu = u; wolffd@0: uu(i) = xx; wolffd@0: k = subv2ind(node_sizes, uu); wolffd@0: kk(xx,j,i) = k; wolffd@0: ps = find(inter(:,i)==1); wolffd@0: ps = ps(:)'; wolffd@0: l = subv2ind(node_sizes([ps i]), [uu(ps) x]); % sum over parent wolffd@0: ll(xx,j,i) = l; wolffd@0: m = subv2ind(node_sizes([ps i]), [u(ps) xx]); % sum over child wolffd@0: mm(xx,j,i) = m; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: %%%%%%%%% wolffd@0: wolffd@0: function prior=make_prior_from_CPTs(indiv_priors, node_sizes) wolffd@0: % wolffd@0: % composite_prior=make_prior(individual_priors, node_sizes) wolffd@0: % Make the prior for the first node in a Markov chain wolffd@0: % from the priors on each node in the equivalent DBN. wolffd@0: % prior{i}(j) = Pr(X_i=j), where X_i is the i'th node in slice 1. wolffd@0: % composite_prior(i) = Pr(slice1 = i). wolffd@0: wolffd@0: n = length(indiv_priors); wolffd@0: S = prod(node_sizes); wolffd@0: prior = zeros(S,1); wolffd@0: for i=1:S wolffd@0: vi = ind2subv(node_sizes, i); wolffd@0: p = 1; wolffd@0: for k=1:n wolffd@0: p = p * indiv_priors{k}(vi(k)); wolffd@0: end wolffd@0: prior(i) = p; wolffd@0: end wolffd@0: wolffd@0: wolffd@0: wolffd@0: %%%%%%%%%%% wolffd@0: wolffd@0: function [loglik, alpha, beta] = FHMM_slow(inter, CPTs_slice1, CPTs, obsmat, node_sizes, data) wolffd@0: % wolffd@0: % Same as the above, except we don't use the optimization of computing the indices outside the loop. wolffd@0: wolffd@0: wolffd@0: scaled = 1; wolffd@0: wolffd@0: M = length(node_sizes); wolffd@0: S = prod(node_sizes); wolffd@0: [numex T] = size(data); wolffd@0: wolffd@0: obs = data; wolffd@0: wolffd@0: alpha = zeros(S, T); wolffd@0: beta = zeros(S, T); wolffd@0: a = zeros(S, M+1); wolffd@0: b = zeros(S, M+1); wolffd@0: scale = zeros(1,T); wolffd@0: wolffd@0: alpha(:,1) = make_prior_from_CPTs(CPTs_slice1, node_sizes); wolffd@0: alpha(:,1) = alpha(:,1) .* obsmat(:, obs(1)); wolffd@0: if scaled wolffd@0: s = sum(alpha(:,1)); wolffd@0: if s==0, s = s + tiny; end wolffd@0: scale(1) = 1/s; wolffd@0: else wolffd@0: scale(1) = 1; wolffd@0: end wolffd@0: alpha(:,1) = alpha(:,1) * scale(1); wolffd@0: wolffd@0: for t=2:T wolffd@0: fprintf(1, 't %d\n', t); wolffd@0: a(:,1) = alpha(:,t-1); wolffd@0: for i=1:M wolffd@0: for j=1:S wolffd@0: u = ind2subv(node_sizes, j); wolffd@0: xnew = u(i); wolffd@0: s = 0; wolffd@0: for xold=1:node_sizes(i) wolffd@0: uold = u; wolffd@0: uold(i) = xold; wolffd@0: k = subv2ind(node_sizes, uold); wolffd@0: ps = find(inter(:,i)==1); wolffd@0: ps = ps(:)'; wolffd@0: l = subv2ind(node_sizes([ps i]), [uold(ps) xnew]); wolffd@0: s = s + a(k,i) * CPTs{i}(l); wolffd@0: end wolffd@0: a(j,i+1) = s; wolffd@0: end wolffd@0: end wolffd@0: alpha(:,t) = a(:,M+1) .* obsmat(:, obs(t)); wolffd@0: wolffd@0: if scaled wolffd@0: s = sum(alpha(:,t)); wolffd@0: if s==0, s = s + tiny; end wolffd@0: scale(t) = 1/s; wolffd@0: else wolffd@0: scale(t) = 1; wolffd@0: end wolffd@0: alpha(:,t) = alpha(:,t) * scale(t); wolffd@0: wolffd@0: end wolffd@0: wolffd@0: wolffd@0: beta(:,T) = ones(S,1) * scale(T); wolffd@0: for t=T-1:-1:1 wolffd@0: fprintf(1, 't %d\n', t); wolffd@0: b(:,1) = beta(:,t+1) .* obsmat(:, obs(t+1)); wolffd@0: for i=1:M wolffd@0: for j=1:S wolffd@0: u = ind2subv(node_sizes, j); wolffd@0: xold = u(i); wolffd@0: s = 0; wolffd@0: for xnew=1:node_sizes(i) wolffd@0: unew = u; wolffd@0: unew(i) = xnew; wolffd@0: k = subv2ind(node_sizes, unew); wolffd@0: ps = find(inter(:,i)==1); wolffd@0: ps = ps(:)'; wolffd@0: l = subv2ind(node_sizes([ps i]), [u(ps) xnew]); wolffd@0: s = s + b(k,i) * CPTs{i}(l); wolffd@0: end wolffd@0: b(j,i+1) = s; wolffd@0: end wolffd@0: end wolffd@0: beta(:,t) = b(:,M+1) * scale(t); wolffd@0: end wolffd@0: wolffd@0: wolffd@0: if scaled wolffd@0: loglik = -sum(log(scale)); % scale(i) is finite wolffd@0: else wolffd@0: lik = alpha(:,1)' * beta(:,1); wolffd@0: loglik = log(lik+tiny); wolffd@0: end