mauch-mirex-2010: _FullBNT/BNT/learning/learn_struct

annotate _FullBNT/BNT/learning/learn_struct_K2.m @ 9:4ea6619cb3f5 tip

removed log files

author	matthiasm
date	Fri, 11 Apr 2014 15:55:11 +0100
parents	b5b38998ef3b
children

rev	line source
matthiasm@8	1 function dag = learn_struct_K2(data, ns, order, varargin)
matthiasm@8	2 % LEARN_STRUCT_K2 Greedily learn the best structure compatible with a fixed node ordering
matthiasm@8	3 % best_dag = learn_struct_K2(data, node_sizes, order, ...)
matthiasm@8	4 %
matthiasm@8	5 % data(i,m) = value of node i in case m (can be a cell array).
matthiasm@8	6 % node_sizes(i) is the size of node i.
matthiasm@8	7 % order(i) is the i'th node in the topological ordering.
matthiasm@8	8 %
matthiasm@8	9 % The following optional arguments can be specified in the form of name/value pairs:
matthiasm@8	10 % [default value in brackets]
matthiasm@8	11 %
matthiasm@8	12 % max_fan_in - this the largest number of parents we allow per node [N]
matthiasm@8	13 % scoring_fn - 'bayesian' or 'bic' [ 'bayesian' ]
matthiasm@8	14 % Currently, only networks with all tabular nodes support Bayesian scoring.
matthiasm@8	15 % type - type{i} is the type of CPD to use for node i, where the type is a string
matthiasm@8	16 % of the form 'tabular', 'noisy_or', 'gaussian', etc. [ all cells contain 'tabular' ]
matthiasm@8	17 % params - params{i} contains optional arguments passed to the CPD constructor for node i,
matthiasm@8	18 % or [] if none. [ all cells contain {'prior', 1}, meaning use uniform Dirichlet priors ]
matthiasm@8	19 % discrete - the list of discrete nodes [ 1:N ]
matthiasm@8	20 % clamped - clamped(i,m) = 1 if node i is clamped in case m [ zeros(N, ncases) ]
matthiasm@8	21 % verbose - 'yes' means display output while running [ 'no' ]
matthiasm@8	22 %
matthiasm@8	23 % e.g., dag = learn_struct_K2(data, ns, order, 'scoring_fn', 'bic', 'params', [])
matthiasm@8	24 %
matthiasm@8	25 % To be backwards compatible with BNT2, you can also specify arguments as follows
matthiasm@8	26 % dag = learn_struct_K2(data, node_sizes, order, max_fan_in)
matthiasm@8	27 %
matthiasm@8	28 % This algorithm is described in
matthiasm@8	29 % - Cooper and Herskovits, "A Bayesian method for the induction of probabilistic
matthiasm@8	30 % networks from data", Machine Learning Journal 9:308--347, 1992
matthiasm@8	31
matthiasm@8	32 [n ncases] = size(data);
matthiasm@8	33
matthiasm@8	34 % set default params
matthiasm@8	35 type = cell(1,n);
matthiasm@8	36 params = cell(1,n);
matthiasm@8	37 for i=1:n
matthiasm@8	38 type{i} = 'tabular';
matthiasm@8	39 %params{i} = { 'prior', 1 };
matthiasm@8	40 params{i} = { 'prior_type', 'dirichlet', 'dirichlet_weight', 1 };
matthiasm@8	41 end
matthiasm@8	42 scoring_fn = 'bayesian';
matthiasm@8	43 discrete = 1:n;
matthiasm@8	44 clamped = zeros(n, ncases);
matthiasm@8	45
matthiasm@8	46 max_fan_in = n;
matthiasm@8	47 verbose = 0;
matthiasm@8	48
matthiasm@8	49 args = varargin;
matthiasm@8	50 nargs = length(args);
matthiasm@8	51 if length(args) > 0
matthiasm@8	52 if isstr(args{1})
matthiasm@8	53 for i=1:2:nargs
matthiasm@8	54 switch args{i},
matthiasm@8	55 case 'verbose', verbose = strcmp(args{i+1}, 'yes');
matthiasm@8	56 case 'max_fan_in', max_fan_in = args{i+1};
matthiasm@8	57 case 'scoring_fn', scoring_fn = args{i+1};
matthiasm@8	58 case 'type', type = args{i+1};
matthiasm@8	59 case 'discrete', discrete = args{i+1};
matthiasm@8	60 case 'clamped', clamped = args{i+1};
matthiasm@8	61 case 'params', if isempty(args{i+1}), params = cell(1,n); else params = args{i+1}; end
matthiasm@8	62 end
matthiasm@8	63 end
matthiasm@8	64 else
matthiasm@8	65 max_fan_in = args{1};
matthiasm@8	66 end
matthiasm@8	67 end
matthiasm@8	68
matthiasm@8	69 dag = zeros(n,n);
matthiasm@8	70
matthiasm@8	71 for i=1:n
matthiasm@8	72 ps = [];
matthiasm@8	73 j = order(i);
matthiasm@8	74 u = find(clamped(j,:)==0);
matthiasm@8	75 score = score_family(j, ps, type{j}, scoring_fn, ns, discrete, data(:,u), params{j});
matthiasm@8	76 if verbose, fprintf('\nnode %d, empty score %6.4f\n', j, score); end
matthiasm@8	77 done = 0;
matthiasm@8	78 while ~done & (length(ps) <= max_fan_in)
matthiasm@8	79 pps = mysetdiff(order(1:i-1), ps); % potential parents
matthiasm@8	80 nps = length(pps);
matthiasm@8	81 pscore = zeros(1, nps);
matthiasm@8	82 for pi=1:nps
matthiasm@8	83 p = pps(pi);
matthiasm@8	84 pscore(pi) = score_family(j, [ps p], type{j}, scoring_fn, ns, discrete, data(:,u), params{j});
matthiasm@8	85 if verbose, fprintf('considering adding %d to %d, score %6.4f\n', p, j, pscore(pi)); end
matthiasm@8	86 end
matthiasm@8	87 [best_pscore, best_p] = max(pscore);
matthiasm@8	88 best_p = pps(best_p);
matthiasm@8	89 if best_pscore > score
matthiasm@8	90 score = best_pscore;
matthiasm@8	91 ps = [ps best_p];
matthiasm@8	92 if verbose, fprintf('* adding %d to %d, score %6.4f\n', best_p, j, best_pscore); end
matthiasm@8	93 else
matthiasm@8	94 done = 1;
matthiasm@8	95 end
matthiasm@8	96 end
matthiasm@8	97 if ~isempty(ps) % need this check for matlab 5.2
matthiasm@8	98 dag(ps, j) = 1;
matthiasm@8	99 end
matthiasm@8	100 end
matthiasm@8	101
matthiasm@8	102
matthiasm@8	103
matthiasm@8	104

Mercurial > hg > mauch-mirex-2010

annotate _FullBNT/BNT/learning/learn_struct_K2.m @ 9:4ea6619cb3f5 tip