annotate toolboxes/FullBNT-1.0.7/bnt/learning/learn_struct_K2.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function dag = learn_struct_K2(data, ns, order, varargin)
Daniel@0 2 % LEARN_STRUCT_K2 Greedily learn the best structure compatible with a fixed node ordering
Daniel@0 3 % best_dag = learn_struct_K2(data, node_sizes, order, ...)
Daniel@0 4 %
Daniel@0 5 % data(i,m) = value of node i in case m (can be a cell array).
Daniel@0 6 % node_sizes(i) is the size of node i.
Daniel@0 7 % order(i) is the i'th node in the topological ordering.
Daniel@0 8 %
Daniel@0 9 % The following optional arguments can be specified in the form of name/value pairs:
Daniel@0 10 % [default value in brackets]
Daniel@0 11 %
Daniel@0 12 % max_fan_in - this the largest number of parents we allow per node [N]
Daniel@0 13 % scoring_fn - 'bayesian' or 'bic' [ 'bayesian' ]
Daniel@0 14 % Currently, only networks with all tabular nodes support Bayesian scoring.
Daniel@0 15 % type - type{i} is the type of CPD to use for node i, where the type is a string
Daniel@0 16 % of the form 'tabular', 'noisy_or', 'gaussian', etc. [ all cells contain 'tabular' ]
Daniel@0 17 % params - params{i} contains optional arguments passed to the CPD constructor for node i,
Daniel@0 18 % or [] if none. [ all cells contain {'prior', 1}, meaning use uniform Dirichlet priors ]
Daniel@0 19 % discrete - the list of discrete nodes [ 1:N ]
Daniel@0 20 % clamped - clamped(i,m) = 1 if node i is clamped in case m [ zeros(N, ncases) ]
Daniel@0 21 % verbose - 'yes' means display output while running [ 'no' ]
Daniel@0 22 %
Daniel@0 23 % e.g., dag = learn_struct_K2(data, ns, order, 'scoring_fn', 'bic', 'params', [])
Daniel@0 24 %
Daniel@0 25 % To be backwards compatible with BNT2, you can also specify arguments as follows
Daniel@0 26 % dag = learn_struct_K2(data, node_sizes, order, max_fan_in)
Daniel@0 27 %
Daniel@0 28 % This algorithm is described in
Daniel@0 29 % - Cooper and Herskovits, "A Bayesian method for the induction of probabilistic
Daniel@0 30 % networks from data", Machine Learning Journal 9:308--347, 1992
Daniel@0 31
Daniel@0 32 [n ncases] = size(data);
Daniel@0 33
Daniel@0 34 % set default params
Daniel@0 35 type = cell(1,n);
Daniel@0 36 params = cell(1,n);
Daniel@0 37 for i=1:n
Daniel@0 38 type{i} = 'tabular';
Daniel@0 39 %params{i} = { 'prior', 1 };
Daniel@0 40 params{i} = { 'prior_type', 'dirichlet', 'dirichlet_weight', 1 };
Daniel@0 41 end
Daniel@0 42 scoring_fn = 'bayesian';
Daniel@0 43 discrete = 1:n;
Daniel@0 44 clamped = zeros(n, ncases);
Daniel@0 45
Daniel@0 46 max_fan_in = n;
Daniel@0 47 verbose = 0;
Daniel@0 48
Daniel@0 49 args = varargin;
Daniel@0 50 nargs = length(args);
Daniel@0 51 if length(args) > 0
Daniel@0 52 if isstr(args{1})
Daniel@0 53 for i=1:2:nargs
Daniel@0 54 switch args{i},
Daniel@0 55 case 'verbose', verbose = strcmp(args{i+1}, 'yes');
Daniel@0 56 case 'max_fan_in', max_fan_in = args{i+1};
Daniel@0 57 case 'scoring_fn', scoring_fn = args{i+1};
Daniel@0 58 case 'type', type = args{i+1};
Daniel@0 59 case 'discrete', discrete = args{i+1};
Daniel@0 60 case 'clamped', clamped = args{i+1};
Daniel@0 61 case 'params', if isempty(args{i+1}), params = cell(1,n); else params = args{i+1}; end
Daniel@0 62 end
Daniel@0 63 end
Daniel@0 64 else
Daniel@0 65 max_fan_in = args{1};
Daniel@0 66 end
Daniel@0 67 end
Daniel@0 68
Daniel@0 69 dag = zeros(n,n);
Daniel@0 70
Daniel@0 71 for i=1:n
Daniel@0 72 ps = [];
Daniel@0 73 j = order(i);
Daniel@0 74 u = find(clamped(j,:)==0);
Daniel@0 75 score = score_family(j, ps, type{j}, scoring_fn, ns, discrete, data(:,u), params{j});
Daniel@0 76 if verbose, fprintf('\nnode %d, empty score %6.4f\n', j, score); end
Daniel@0 77 done = 0;
Daniel@0 78 while ~done & (length(ps) <= max_fan_in)
Daniel@0 79 pps = mysetdiff(order(1:i-1), ps); % potential parents
Daniel@0 80 nps = length(pps);
Daniel@0 81 pscore = zeros(1, nps);
Daniel@0 82 for pi=1:nps
Daniel@0 83 p = pps(pi);
Daniel@0 84 pscore(pi) = score_family(j, [ps p], type{j}, scoring_fn, ns, discrete, data(:,u), params{j});
Daniel@0 85 if verbose, fprintf('considering adding %d to %d, score %6.4f\n', p, j, pscore(pi)); end
Daniel@0 86 end
Daniel@0 87 [best_pscore, best_p] = max(pscore);
Daniel@0 88 best_p = pps(best_p);
Daniel@0 89 if best_pscore > score
Daniel@0 90 score = best_pscore;
Daniel@0 91 ps = [ps best_p];
Daniel@0 92 if verbose, fprintf('* adding %d to %d, score %6.4f\n', best_p, j, best_pscore); end
Daniel@0 93 else
Daniel@0 94 done = 1;
Daniel@0 95 end
Daniel@0 96 end
Daniel@0 97 if ~isempty(ps) % need this check for matlab 5.2
Daniel@0 98 dag(ps, j) = 1;
Daniel@0 99 end
Daniel@0 100 end
Daniel@0 101
Daniel@0 102
Daniel@0 103
Daniel@0 104