Mercurial > hg > camir-aes2014
diff toolboxes/FullBNT-1.0.7/bnt/learning/learn_struct_K2.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/FullBNT-1.0.7/bnt/learning/learn_struct_K2.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,104 @@ +function dag = learn_struct_K2(data, ns, order, varargin) +% LEARN_STRUCT_K2 Greedily learn the best structure compatible with a fixed node ordering +% best_dag = learn_struct_K2(data, node_sizes, order, ...) +% +% data(i,m) = value of node i in case m (can be a cell array). +% node_sizes(i) is the size of node i. +% order(i) is the i'th node in the topological ordering. +% +% The following optional arguments can be specified in the form of name/value pairs: +% [default value in brackets] +% +% max_fan_in - this the largest number of parents we allow per node [N] +% scoring_fn - 'bayesian' or 'bic' [ 'bayesian' ] +% Currently, only networks with all tabular nodes support Bayesian scoring. +% type - type{i} is the type of CPD to use for node i, where the type is a string +% of the form 'tabular', 'noisy_or', 'gaussian', etc. [ all cells contain 'tabular' ] +% params - params{i} contains optional arguments passed to the CPD constructor for node i, +% or [] if none. [ all cells contain {'prior', 1}, meaning use uniform Dirichlet priors ] +% discrete - the list of discrete nodes [ 1:N ] +% clamped - clamped(i,m) = 1 if node i is clamped in case m [ zeros(N, ncases) ] +% verbose - 'yes' means display output while running [ 'no' ] +% +% e.g., dag = learn_struct_K2(data, ns, order, 'scoring_fn', 'bic', 'params', []) +% +% To be backwards compatible with BNT2, you can also specify arguments as follows +% dag = learn_struct_K2(data, node_sizes, order, max_fan_in) +% +% This algorithm is described in +% - Cooper and Herskovits, "A Bayesian method for the induction of probabilistic +% networks from data", Machine Learning Journal 9:308--347, 1992 + +[n ncases] = size(data); + +% set default params +type = cell(1,n); +params = cell(1,n); +for i=1:n + type{i} = 'tabular'; + %params{i} = { 'prior', 1 }; + params{i} = { 'prior_type', 'dirichlet', 'dirichlet_weight', 1 }; +end +scoring_fn = 'bayesian'; +discrete = 1:n; +clamped = zeros(n, ncases); + +max_fan_in = n; +verbose = 0; + +args = varargin; +nargs = length(args); +if length(args) > 0 + if isstr(args{1}) + for i=1:2:nargs + switch args{i}, + case 'verbose', verbose = strcmp(args{i+1}, 'yes'); + case 'max_fan_in', max_fan_in = args{i+1}; + case 'scoring_fn', scoring_fn = args{i+1}; + case 'type', type = args{i+1}; + case 'discrete', discrete = args{i+1}; + case 'clamped', clamped = args{i+1}; + case 'params', if isempty(args{i+1}), params = cell(1,n); else params = args{i+1}; end + end + end + else + max_fan_in = args{1}; + end +end + +dag = zeros(n,n); + +for i=1:n + ps = []; + j = order(i); + u = find(clamped(j,:)==0); + score = score_family(j, ps, type{j}, scoring_fn, ns, discrete, data(:,u), params{j}); + if verbose, fprintf('\nnode %d, empty score %6.4f\n', j, score); end + done = 0; + while ~done & (length(ps) <= max_fan_in) + pps = mysetdiff(order(1:i-1), ps); % potential parents + nps = length(pps); + pscore = zeros(1, nps); + for pi=1:nps + p = pps(pi); + pscore(pi) = score_family(j, [ps p], type{j}, scoring_fn, ns, discrete, data(:,u), params{j}); + if verbose, fprintf('considering adding %d to %d, score %6.4f\n', p, j, pscore(pi)); end + end + [best_pscore, best_p] = max(pscore); + best_p = pps(best_p); + if best_pscore > score + score = best_pscore; + ps = [ps best_p]; + if verbose, fprintf('* adding %d to %d, score %6.4f\n', best_p, j, best_pscore); end + else + done = 1; + end + end + if ~isempty(ps) % need this check for matlab 5.2 + dag(ps, j) = 1; + end +end + + + +