wolffd@0: function CPD = mlp_CPD(bnet, self, nhidden, w1, b1, w2, b2, clamped, max_iter, verbose, wthresh, llthresh) wolffd@0: % MLP_CPD Make a CPD from a Multi Layer Perceptron (i.e., feedforward neural network) wolffd@0: % wolffd@0: % We use a different MLP for each discrete parent combination (if there are any discrete parents). wolffd@0: % We currently assume this node (the child) is discrete. wolffd@0: % wolffd@0: % CPD = mlp_CPD(bnet, self, nhidden) wolffd@0: % will create a CPD with random parameters, where self is the number of this node and nhidden the number of the hidden nodes. wolffd@0: % The params are drawn from N(0, s*I), where s = 1/sqrt(n+1), n = length(X). wolffd@0: % wolffd@0: % CPD = mlp_CPD(bnet, self, nhidden, w1, b1, w2, b2) allows you to specify the params, where wolffd@0: % w1 = first-layer weight matrix wolffd@0: % b1 = first-layer bias vector wolffd@0: % w2 = second-layer weight matrix wolffd@0: % b2 = second-layer bias vector wolffd@0: % These are assumed to be the same for each discrete parent combination. wolffd@0: % If any of these are [], random values will be created. wolffd@0: % wolffd@0: % CPD = mlp_CPD(bnet, self, nhidden, w1, b1, w2, b2, clamped) allows you to prevent the params from being wolffd@0: % updated during learning (if clamped = 1). Default: clamped = 0. wolffd@0: % wolffd@0: % CPD = mlp_CPD(bnet, self, nhidden, w1, b1, w2, b2, clamped, max_iter, verbose, wthresh, llthresh) wolffd@0: % alllows you to specify params that control the M step: wolffd@0: % max_iter - the maximum number of steps to take (default: 10) wolffd@0: % verbose - controls whether to print (default: 0 means silent). wolffd@0: % wthresh - a measure of the precision required for the value of wolffd@0: % the weights W at the solution. Default: 1e-2. wolffd@0: % llthresh - a measure of the precision required of the objective wolffd@0: % function (log-likelihood) at the solution. Both this and the previous condition must wolffd@0: % be satisfied for termination. Default: 1e-2. wolffd@0: % wolffd@0: % For learning, we use a weighted version of scaled conjugated gradient in the M step. wolffd@0: wolffd@0: if nargin==0 wolffd@0: % This occurs if we are trying to load an object from a file. wolffd@0: CPD = init_fields; wolffd@0: CPD = class(CPD, 'mlp_CPD', discrete_CPD(0,[])); wolffd@0: return; wolffd@0: elseif isa(bnet, 'mlp_CPD') wolffd@0: % This might occur if we are copying an object. wolffd@0: CPD = bnet; wolffd@0: return; wolffd@0: end wolffd@0: CPD = init_fields; wolffd@0: wolffd@0: assert(myismember(self, bnet.dnodes)); wolffd@0: ns = bnet.node_sizes; wolffd@0: wolffd@0: ps = parents(bnet.dag, self); wolffd@0: dnodes = mysetdiff(1:length(bnet.dag), bnet.cnodes); wolffd@0: dps = myintersect(ps, dnodes); wolffd@0: cps = myintersect(ps, bnet.cnodes); wolffd@0: dpsz = prod(ns(dps)); wolffd@0: cpsz = sum(ns(cps)); wolffd@0: self_size = ns(self); wolffd@0: wolffd@0: % discrete/cts parent index - which ones of my parents are discrete/cts? wolffd@0: CPD.dpndx = find_equiv_posns(dps, ps); wolffd@0: CPD.cpndx = find_equiv_posns(cps, ps); wolffd@0: wolffd@0: CPD.mlp = cell(1,dpsz); wolffd@0: for i=1:dpsz wolffd@0: CPD.mlp{i} = mlp(cpsz, nhidden, self_size, 'softmax'); wolffd@0: if nargin >=4 & ~isempty(w1) wolffd@0: CPD.mlp{i}.w1 = w1; wolffd@0: end wolffd@0: if nargin >=5 & ~isempty(b1) wolffd@0: CPD.mlp{i}.b1 = b1; wolffd@0: end wolffd@0: if nargin >=6 & ~isempty(w2) wolffd@0: CPD.mlp{i}.w2 = w2; wolffd@0: end wolffd@0: if nargin >=7 & ~isempty(b2) wolffd@0: CPD.mlp{i}.b2 = b2; wolffd@0: end wolffd@0: W1app(:,:,i)=CPD.mlp{i}.w1; wolffd@0: W2app(:,:,i)=CPD.mlp{i}.w2; wolffd@0: b1app(i,:)=CPD.mlp{i}.b1; wolffd@0: b2app(i,:)=CPD.mlp{i}.b2; wolffd@0: end wolffd@0: if nargin < 8, clamped = 0; end wolffd@0: if nargin < 9, max_iter = 10; end wolffd@0: if nargin < 10, verbose = 0; end wolffd@0: if nargin < 11, wthresh = 1e-2; end wolffd@0: if nargin < 12, llthresh = 1e-2; end wolffd@0: wolffd@0: CPD.self = self; wolffd@0: CPD.max_iter = max_iter; wolffd@0: CPD.verbose = verbose; wolffd@0: CPD.wthresh = wthresh; wolffd@0: CPD.llthresh = llthresh; wolffd@0: wolffd@0: % sufficient statistics wolffd@0: % Since MLP is not in the exponential family, we must store all the raw data. wolffd@0: % wolffd@0: CPD.W1=W1app; % Extract all the parameters of the node for handling discrete obs parents wolffd@0: CPD.W2=W2app; % wolffd@0: nparaW=[size(W1app) size(W2app)]; % wolffd@0: CPD.b1=b1app; % wolffd@0: CPD.b2=b2app; % wolffd@0: nparab=[size(b1app) size(b2app)]; % wolffd@0: wolffd@0: CPD.sizes=bnet.node_sizes(:); % used in CPD_to_table to pump up the node sizes wolffd@0: wolffd@0: CPD.parent_vals = []; % X(l,:) = value of cts parents in l'th example wolffd@0: wolffd@0: CPD.eso_weights=[]; % weights used by the SCG algorithm wolffd@0: wolffd@0: CPD.self_vals = []; % Y(l,:) = value of self in l'th example wolffd@0: wolffd@0: % For BIC wolffd@0: CPD.nsamples = 0; wolffd@0: CPD.nparams=prod(nparaW)+prod(nparab); wolffd@0: CPD = class(CPD, 'mlp_CPD', discrete_CPD(clamped, ns([ps self]))); wolffd@0: wolffd@0: %%%%%%%%%%% wolffd@0: wolffd@0: function CPD = init_fields() wolffd@0: % This ensures we define the fields in the same order wolffd@0: % no matter whether we load an object from a file, wolffd@0: % or create it from scratch. (Matlab requires this.) wolffd@0: wolffd@0: CPD.mlp = {}; wolffd@0: CPD.self = []; wolffd@0: CPD.max_iter = []; wolffd@0: CPD.verbose = []; wolffd@0: CPD.wthresh = []; wolffd@0: CPD.llthresh = []; wolffd@0: CPD.approx_hess = []; wolffd@0: CPD.W1 = []; wolffd@0: CPD.W2 = []; wolffd@0: CPD.b1 = []; wolffd@0: CPD.b2 = []; wolffd@0: CPD.sizes = []; wolffd@0: CPD.parent_vals = []; wolffd@0: CPD.eso_weights=[]; wolffd@0: CPD.self_vals = []; wolffd@0: CPD.nsamples = []; wolffd@0: CPD.nparams = [];