wolffd@0: % Factor analysis wolffd@0: % Z -> X, Z in R^k, X in R^D, k << D (high dimensional observations explained by small source) wolffd@0: % Z ~ N(0,I), X|Z ~ N(L z, Psi), where Psi is diagonal. wolffd@0: % wolffd@0: % Mixtures of FA wolffd@0: % Now X|Z,W=i ~ N(mu(i) + L(i) Z, Psi(i)) wolffd@0: % wolffd@0: % We compare to Zoubin Ghahramani's code. wolffd@0: wolffd@0: randn('state', 0); wolffd@0: max_iter = 3; wolffd@0: M = 2; wolffd@0: k = 3; wolffd@0: D = 5; wolffd@0: wolffd@0: n = 5; wolffd@0: X1 = randn(n, D); wolffd@0: X2 = randn(n, D) + 2; % move the mean to (2,2,2...) wolffd@0: X = [X1; X2]; wolffd@0: N = size(X, 1); wolffd@0: wolffd@0: % initialise as in mfa wolffd@0: tiny=exp(-700); wolffd@0: mX = mean(X); wolffd@0: cX=cov(X); wolffd@0: scale=det(cX)^(1/D); wolffd@0: randn('state',0); % must reset seed here so initial params are identical to mfa wolffd@0: L0=randn(D*M,k)*sqrt(scale/k); wolffd@0: W0 = permute(reshape(L0, [D M k]), [1 3 2]); % use D,K,M wolffd@0: Psi0=diag(cX)+tiny; wolffd@0: Pi0=ones(M,1)/M; wolffd@0: Mu0=randn(M,D)*sqrtm(cX)+ones(M,1)*mX; wolffd@0: wolffd@0: [Lh1, Ph1, Mu1, Pi1, LL1] = mfa(X,M,k,max_iter); wolffd@0: Lh1 = permute(reshape(Lh1, [D M k]), [1 3 2]); % use D,K,M wolffd@0: wolffd@0: wolffd@0: ns = [M k D]; wolffd@0: dag = zeros(3); wolffd@0: dag(1,3) = 1; wolffd@0: dag(2,3) = 1; wolffd@0: dnodes = 1; wolffd@0: onodes = 3; wolffd@0: wolffd@0: bnet = mk_bnet(dag, ns, 'discrete', dnodes, 'observed', onodes); wolffd@0: bnet.CPD{1} = tabular_CPD(bnet, 1, Pi0); wolffd@0: wolffd@0: %bnet.CPD{2} = gaussian_CPD(bnet, 2, zeros(k, 1), eye(k), [], 'diag', 'untied', 'clamp_mean', 'clamp_cov'); wolffd@0: wolffd@0: bnet.CPD{2} = gaussian_CPD(bnet, 2, 'mean', zeros(k, 1), 'cov', eye(k), 'cov_type', 'diag', ... wolffd@0: 'cov_prior_weight', 0, 'clamp_mean', 1, 'clamp_cov', 1); wolffd@0: wolffd@0: %bnet.CPD{3} = gaussian_CPD(bnet, 3, Mu0', repmat(diag(Psi0), [1 1 M]), W0, 'diag', 'tied'); wolffd@0: wolffd@0: bnet.CPD{3} = gaussian_CPD(bnet, 3, 'mean', Mu0', 'cov', repmat(diag(Psi0), [1 1 M]), ... wolffd@0: 'weights', W0, 'cov_type', 'diag', 'cov_prior_weight', 0, 'tied_cov', 1); wolffd@0: wolffd@0: engine = jtree_inf_engine(bnet); wolffd@0: evidence = cell(3, N); wolffd@0: evidence(3,:) = num2cell(X', 1); wolffd@0: wolffd@0: [bnet2, LL2, engine2] = learn_params_em(engine, evidence, max_iter); wolffd@0: wolffd@0: s = struct(bnet2.CPD{1}); wolffd@0: Pi2 = s.CPT(:); wolffd@0: s = struct(bnet2.CPD{3}); wolffd@0: Mu2 = s.mean; wolffd@0: W2 = s.weights; wolffd@0: Sigma2 = s.cov; wolffd@0: wolffd@0: wolffd@0: % Compare to Zoubin's code wolffd@0: assert(approxeq(LL1,LL2)); wolffd@0: for i=1:M wolffd@0: assert(approxeq(W2(:,:,i), Lh1(:,:,i))); wolffd@0: assert(approxeq(Sigma2(:,:,i), diag(Ph1))); wolffd@0: assert(approxeq(Mu2(:,i), Mu1(i,:))); wolffd@0: assert(approxeq(Pi2(:), Pi1(:))); wolffd@0: end wolffd@0: