Daniel@0: % Fit a piece-wise linear regression model.
Daniel@0: % Here is the model
Daniel@0: %
Daniel@0: %  X \
Daniel@0: %  | |
Daniel@0: %  Q |
Daniel@0: %  | /
Daniel@0: %  Y
Daniel@0: %
Daniel@0: % where all arcs point down.
Daniel@0: % We condition everything on X, so X is a root node. Q is a softmax, and Y is a linear Gaussian.
Daniel@0: % Q is hidden, X and Y are observed.
Daniel@0: 
Daniel@0: X = 1;
Daniel@0: Q = 2;
Daniel@0: Y = 3;
Daniel@0: dag = zeros(3,3);
Daniel@0: dag(X,[Q Y]) = 1;
Daniel@0: dag(Q,Y) = 1;
Daniel@0: ns = [1 2 1]; % make X and Y scalars, and have 2 experts
Daniel@0: dnodes = [2];
Daniel@0: onodes = [1 3];
Daniel@0: bnet = mk_bnet(dag, ns, 'discrete', dnodes, 'observed', onodes);
Daniel@0: 
Daniel@0: 
Daniel@0: w = [-5 5];  % w(:,i) is the normal vector to the i'th decisions boundary
Daniel@0: b = [0 0];  % b(i) is the offset (bias) to the i'th decisions boundary
Daniel@0: 
Daniel@0: mu = [0 0];
Daniel@0: sigma = 1;
Daniel@0: Sigma = repmat(sigma*eye(ns(Y)), [ns(Y) ns(Y) ns(Q)]);
Daniel@0: W = [-1 1];
Daniel@0: W2 = reshape(W, [ns(Y) ns(X) ns(Q)]);
Daniel@0: 
Daniel@0: bnet.CPD{1} = root_CPD(bnet, 1);
Daniel@0: bnet.CPD{2} = softmax_CPD(bnet, 2, w, b);
Daniel@0: bnet.CPD{3} = gaussian_CPD(bnet, 3, 'mean', mu, 'cov', Sigma, 'weights', W2);
Daniel@0: 
Daniel@0: 
Daniel@0: 
Daniel@0: % Check inference
Daniel@0: 
Daniel@0: x = 0.1;
Daniel@0: ystar = 1;
Daniel@0: 
Daniel@0: engine = jtree_inf_engine(bnet);
Daniel@0: [engine, loglik] = enter_evidence(engine, {x, [], ystar});
Daniel@0: Qpost = marginal_nodes(engine, 2);
Daniel@0: 
Daniel@0: % eta(i,:) = softmax (gating) params for expert i
Daniel@0: eta = [b' w'];
Daniel@0: 
Daniel@0: % theta(i,:) = regression vector for expert i
Daniel@0: theta = [mu' W'];
Daniel@0: 
Daniel@0: % yhat(i) = E[y | Q=i, x] = prediction of i'th expert
Daniel@0: x1 = [1 x]';
Daniel@0: yhat = theta * x1;
Daniel@0: 
Daniel@0: % gate_prior(i,:) = Pr(Q=i | x)
Daniel@0: gate_prior = normalise(exp(eta * x1));
Daniel@0: 
Daniel@0: % cond_lik(i) = Pr(y | Q=i, x)
Daniel@0: cond_lik = (1/(sqrt(2*pi)*sigma)) * exp(-(0.5/sigma^2) * ((ystar - yhat) .* (ystar - yhat)));
Daniel@0: 
Daniel@0: % gate_posterior(i,:) = Pr(Q=i | x, y)
Daniel@0: [gate_posterior, lik] = normalise(gate_prior .* cond_lik);
Daniel@0: 
Daniel@0: assert(approxeq(gate_posterior(:), Qpost.T(:)));
Daniel@0: assert(approxeq(log(lik), loglik));
Daniel@0: 
Daniel@0: