Daniel@0: function dag = learn_struct_K2(data, ns, order, varargin) Daniel@0: % LEARN_STRUCT_K2 Greedily learn the best structure compatible with a fixed node ordering Daniel@0: % best_dag = learn_struct_K2(data, node_sizes, order, ...) Daniel@0: % Daniel@0: % data(i,m) = value of node i in case m (can be a cell array). Daniel@0: % node_sizes(i) is the size of node i. Daniel@0: % order(i) is the i'th node in the topological ordering. Daniel@0: % Daniel@0: % The following optional arguments can be specified in the form of name/value pairs: Daniel@0: % [default value in brackets] Daniel@0: % Daniel@0: % max_fan_in - this the largest number of parents we allow per node [N] Daniel@0: % scoring_fn - 'bayesian' or 'bic' [ 'bayesian' ] Daniel@0: % Currently, only networks with all tabular nodes support Bayesian scoring. Daniel@0: % type - type{i} is the type of CPD to use for node i, where the type is a string Daniel@0: % of the form 'tabular', 'noisy_or', 'gaussian', etc. [ all cells contain 'tabular' ] Daniel@0: % params - params{i} contains optional arguments passed to the CPD constructor for node i, Daniel@0: % or [] if none. [ all cells contain {'prior', 1}, meaning use uniform Dirichlet priors ] Daniel@0: % discrete - the list of discrete nodes [ 1:N ] Daniel@0: % clamped - clamped(i,m) = 1 if node i is clamped in case m [ zeros(N, ncases) ] Daniel@0: % verbose - 'yes' means display output while running [ 'no' ] Daniel@0: % Daniel@0: % e.g., dag = learn_struct_K2(data, ns, order, 'scoring_fn', 'bic', 'params', []) Daniel@0: % Daniel@0: % To be backwards compatible with BNT2, you can also specify arguments as follows Daniel@0: % dag = learn_struct_K2(data, node_sizes, order, max_fan_in) Daniel@0: % Daniel@0: % This algorithm is described in Daniel@0: % - Cooper and Herskovits, "A Bayesian method for the induction of probabilistic Daniel@0: % networks from data", Machine Learning Journal 9:308--347, 1992 Daniel@0: Daniel@0: [n ncases] = size(data); Daniel@0: Daniel@0: % set default params Daniel@0: type = cell(1,n); Daniel@0: params = cell(1,n); Daniel@0: for i=1:n Daniel@0: type{i} = 'tabular'; Daniel@0: %params{i} = { 'prior', 1 }; Daniel@0: params{i} = { 'prior_type', 'dirichlet', 'dirichlet_weight', 1 }; Daniel@0: end Daniel@0: scoring_fn = 'bayesian'; Daniel@0: discrete = 1:n; Daniel@0: clamped = zeros(n, ncases); Daniel@0: Daniel@0: max_fan_in = n; Daniel@0: verbose = 0; Daniel@0: Daniel@0: args = varargin; Daniel@0: nargs = length(args); Daniel@0: if length(args) > 0 Daniel@0: if isstr(args{1}) Daniel@0: for i=1:2:nargs Daniel@0: switch args{i}, Daniel@0: case 'verbose', verbose = strcmp(args{i+1}, 'yes'); Daniel@0: case 'max_fan_in', max_fan_in = args{i+1}; Daniel@0: case 'scoring_fn', scoring_fn = args{i+1}; Daniel@0: case 'type', type = args{i+1}; Daniel@0: case 'discrete', discrete = args{i+1}; Daniel@0: case 'clamped', clamped = args{i+1}; Daniel@0: case 'params', if isempty(args{i+1}), params = cell(1,n); else params = args{i+1}; end Daniel@0: end Daniel@0: end Daniel@0: else Daniel@0: max_fan_in = args{1}; Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: dag = zeros(n,n); Daniel@0: Daniel@0: for i=1:n Daniel@0: ps = []; Daniel@0: j = order(i); Daniel@0: u = find(clamped(j,:)==0); Daniel@0: score = score_family(j, ps, type{j}, scoring_fn, ns, discrete, data(:,u), params{j}); Daniel@0: if verbose, fprintf('\nnode %d, empty score %6.4f\n', j, score); end Daniel@0: done = 0; Daniel@0: while ~done & (length(ps) <= max_fan_in) Daniel@0: pps = mysetdiff(order(1:i-1), ps); % potential parents Daniel@0: nps = length(pps); Daniel@0: pscore = zeros(1, nps); Daniel@0: for pi=1:nps Daniel@0: p = pps(pi); Daniel@0: pscore(pi) = score_family(j, [ps p], type{j}, scoring_fn, ns, discrete, data(:,u), params{j}); Daniel@0: if verbose, fprintf('considering adding %d to %d, score %6.4f\n', p, j, pscore(pi)); end Daniel@0: end Daniel@0: [best_pscore, best_p] = max(pscore); Daniel@0: best_p = pps(best_p); Daniel@0: if best_pscore > score Daniel@0: score = best_pscore; Daniel@0: ps = [ps best_p]; Daniel@0: if verbose, fprintf('* adding %d to %d, score %6.4f\n', best_p, j, best_pscore); end Daniel@0: else Daniel@0: done = 1; Daniel@0: end Daniel@0: end Daniel@0: if ~isempty(ps) % need this check for matlab 5.2 Daniel@0: dag(ps, j) = 1; Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: Daniel@0: Daniel@0: