wolffd@0: % Check that softmax works with a simple classification demo. wolffd@0: % Based on netlab's demglm2 wolffd@0: % X -> Q where X is an input node, and Q is a softmax wolffd@0: wolffd@0: rand('state', 0); wolffd@0: randn('state', 0); wolffd@0: wolffd@0: % Check inference wolffd@0: wolffd@0: input_dim = 2; wolffd@0: num_classes = 3; wolffd@0: IRLS_iter = 3; wolffd@0: wolffd@0: net = glm(input_dim, num_classes, 'softmax'); wolffd@0: wolffd@0: dag = zeros(2); wolffd@0: dag(1,2) = 1; wolffd@0: discrete_nodes = [2]; wolffd@0: bnet = mk_bnet(dag, [input_dim num_classes], 'discrete', discrete_nodes, 'observed', 1); wolffd@0: bnet.CPD{1} = root_CPD(bnet, 1); wolffd@0: clamped = 0; wolffd@0: bnet.CPD{2} = softmax_CPD(bnet, 2, net.w1, net.b1, clamped, IRLS_iter); wolffd@0: wolffd@0: engine = jtree_inf_engine(bnet); wolffd@0: wolffd@0: x = rand(1, input_dim); wolffd@0: q = glmfwd(net, x); wolffd@0: wolffd@0: [engine, ll] = enter_evidence(engine, {x, []}); wolffd@0: m = marginal_nodes(engine, 2); wolffd@0: assert(approxeq(m.T(:), q(:))); wolffd@0: wolffd@0: wolffd@0: % Check learning wolffd@0: % We use EM, but in fact there is no hidden data. wolffd@0: % The M step will call IRLS on the softmax node. wolffd@0: wolffd@0: % Generate data from three classes in 2d wolffd@0: input_dim = 2; wolffd@0: num_classes = 3; wolffd@0: wolffd@0: % Fix seeds for reproducible results wolffd@0: randn('state', 42); wolffd@0: rand('state', 42); wolffd@0: wolffd@0: ndata = 10; wolffd@0: % Generate mixture of three Gaussians in two dimensional space wolffd@0: data = randn(ndata, input_dim); wolffd@0: targets = zeros(ndata, 3); wolffd@0: wolffd@0: % Priors for the clusters wolffd@0: prior(1) = 0.4; wolffd@0: prior(2) = 0.3; wolffd@0: prior(3) = 0.3; wolffd@0: wolffd@0: % Cluster centres wolffd@0: c = [2.0, 2.0; 0.0, 0.0; 1, -1]; wolffd@0: wolffd@0: ndata1 = prior(1)*ndata; wolffd@0: ndata2 = (prior(1) + prior(2))*ndata; wolffd@0: % Put first cluster at (2, 2) wolffd@0: data(1:ndata1, 1) = data(1:ndata1, 1) * 0.5 + c(1,1); wolffd@0: data(1:ndata1, 2) = data(1:ndata1, 2) * 0.5 + c(1,2); wolffd@0: targets(1:ndata1, 1) = 1; wolffd@0: wolffd@0: % Leave second cluster at (0,0) wolffd@0: data((ndata1 + 1):ndata2, :) = ... wolffd@0: data((ndata1 + 1):ndata2, :); wolffd@0: targets((ndata1+1):ndata2, 2) = 1; wolffd@0: wolffd@0: data((ndata2+1):ndata, 1) = data((ndata2+1):ndata,1) *0.6 + c(3, 1); wolffd@0: data((ndata2+1):ndata, 2) = data((ndata2+1):ndata,2) *0.6 + c(3, 2); wolffd@0: targets((ndata2+1):ndata, 3) = 1; wolffd@0: wolffd@0: wolffd@0: if 0 wolffd@0: ndata = 1; wolffd@0: data = x; wolffd@0: targets = [1 0 0]; wolffd@0: end wolffd@0: wolffd@0: options = foptions; wolffd@0: options(1) = -1; % verbose wolffd@0: options(14) = IRLS_iter; wolffd@0: [net2, options2] = glmtrain(net, options, data, targets); wolffd@0: net2.ll = options2(8); % type 'help foptions' for details wolffd@0: wolffd@0: cases = cell(2, ndata); wolffd@0: for l=1:ndata wolffd@0: q = find(targets(l,:)==1); wolffd@0: x = data(l,:); wolffd@0: cases{1,l} = x(:); wolffd@0: cases{2,l} = q; wolffd@0: end wolffd@0: wolffd@0: max_iter = 2; % we have complete observability, so 1 iter is enough wolffd@0: [bnet2, ll2] = learn_params_em(engine, cases, max_iter); wolffd@0: wolffd@0: w = get_field(bnet2.CPD{2},'weights'); wolffd@0: b = get_field(bnet2.CPD{2},'offset')'; wolffd@0: wolffd@0: w wolffd@0: net2.w1 wolffd@0: wolffd@0: b wolffd@0: net2.b1 wolffd@0: wolffd@0: % assert(approxeq(net2.ll, ll2)); % glmtrain returns ll after final M step, learn_params before wolffd@0: