wolffd@0: function LL = dirichlet_score_family(counts, prior)
wolffd@0: % DIRICHLET_SCORE Compute the log marginal likelihood of a single family
wolffd@0: % LL = dirichlet_score(counts, prior)
wolffd@0: %
wolffd@0: % counts(a, b, ..., z) is the number of times parent 1 = a, parent 2 = b, ..., child = z
wolffd@0: % prior is an optional multidimensional array of the same shape as counts.
wolffd@0: % It defaults to a uniform prior.
wolffd@0: % 
wolffd@0: % We marginalize out the parameters:
wolffd@0: % LL = log \int \prod_m P(x(i,m) | x(Pa_i,m), theta_i) P(theta_i) d(theta_i)
wolffd@0: 
wolffd@0: 
wolffd@0: % LL = log[  prod_j gamma(alpha_ij)/gamma(alpha_ij + N_ij)  *
wolffd@0: %            prod_k gamma(alpha_ijk + N_ijk)/gamma(alpha_ijk)  ]
wolffd@0: % Call the prod_k term U and the prod_j term  V.
wolffd@0: % We reshape all quantities into (j,k) matrices
wolffd@0: % This formula was first derived by Cooper and Herskovits, 1992.
wolffd@0: % See also "Learning Bayesian Networks", Heckerman, Geiger and Chickering, MLJ 95.
wolffd@0: 
wolffd@0: ns = mysize(counts);
wolffd@0: ns_ps = ns(1:end-1);
wolffd@0: ns_self = ns(end);
wolffd@0: 
wolffd@0: if nargin < 2, prior = normalise(myones(ns)); end
wolffd@0: 
wolffd@0: 
wolffd@0: if 1
wolffd@0:   prior = reshape(prior(:), [prod(ns_ps) ns_self]);
wolffd@0:   counts = reshape(counts,  [prod(ns_ps) ns_self]);
wolffd@0:   %U = prod(gamma(prior + counts) ./ gamma(prior), 2); % mult over k
wolffd@0:   LU = sum(gammaln(prior + counts) - gammaln(prior), 2);
wolffd@0:   alpha_ij = sum(prior, 2); % sum over k
wolffd@0:   N_ij = sum(counts, 2);
wolffd@0:   %V = gamma(alpha_ij) ./ gamma(alpha_ij + N_ij);
wolffd@0:   LV = gammaln(alpha_ij) - gammaln(alpha_ij + N_ij);
wolffd@0:   %L = prod(U .* V);
wolffd@0:   LL = sum(LU + LV);
wolffd@0: else
wolffd@0:   CPT = mk_stochastic(prior + counts);
wolffd@0:   LL = sum(log(CPT(:) .* counts(:)));
wolffd@0: end
wolffd@0: