wolffd@0: function LL = dirichlet_score_family(counts, prior) wolffd@0: % DIRICHLET_SCORE Compute the log marginal likelihood of a single family wolffd@0: % LL = dirichlet_score(counts, prior) wolffd@0: % wolffd@0: % counts(a, b, ..., z) is the number of times parent 1 = a, parent 2 = b, ..., child = z wolffd@0: % prior is an optional multidimensional array of the same shape as counts. wolffd@0: % It defaults to a uniform prior. wolffd@0: % wolffd@0: % We marginalize out the parameters: wolffd@0: % LL = log \int \prod_m P(x(i,m) | x(Pa_i,m), theta_i) P(theta_i) d(theta_i) wolffd@0: wolffd@0: wolffd@0: % LL = log[ prod_j gamma(alpha_ij)/gamma(alpha_ij + N_ij) * wolffd@0: % prod_k gamma(alpha_ijk + N_ijk)/gamma(alpha_ijk) ] wolffd@0: % Call the prod_k term U and the prod_j term V. wolffd@0: % We reshape all quantities into (j,k) matrices wolffd@0: % This formula was first derived by Cooper and Herskovits, 1992. wolffd@0: % See also "Learning Bayesian Networks", Heckerman, Geiger and Chickering, MLJ 95. wolffd@0: wolffd@0: ns = mysize(counts); wolffd@0: ns_ps = ns(1:end-1); wolffd@0: ns_self = ns(end); wolffd@0: wolffd@0: if nargin < 2, prior = normalise(myones(ns)); end wolffd@0: wolffd@0: wolffd@0: if 1 wolffd@0: prior = reshape(prior(:), [prod(ns_ps) ns_self]); wolffd@0: counts = reshape(counts, [prod(ns_ps) ns_self]); wolffd@0: %U = prod(gamma(prior + counts) ./ gamma(prior), 2); % mult over k wolffd@0: LU = sum(gammaln(prior + counts) - gammaln(prior), 2); wolffd@0: alpha_ij = sum(prior, 2); % sum over k wolffd@0: N_ij = sum(counts, 2); wolffd@0: %V = gamma(alpha_ij) ./ gamma(alpha_ij + N_ij); wolffd@0: LV = gammaln(alpha_ij) - gammaln(alpha_ij + N_ij); wolffd@0: %L = prod(U .* V); wolffd@0: LL = sum(LU + LV); wolffd@0: else wolffd@0: CPT = mk_stochastic(prior + counts); wolffd@0: LL = sum(log(CPT(:) .* counts(:))); wolffd@0: end wolffd@0: