diff toolboxes/FullBNT-1.0.7/bnt/learning/learn_struct_K2.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/FullBNT-1.0.7/bnt/learning/learn_struct_K2.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,104 @@
+function dag = learn_struct_K2(data, ns, order, varargin)
+% LEARN_STRUCT_K2 Greedily learn the best structure compatible with a fixed node ordering
+% best_dag = learn_struct_K2(data, node_sizes, order, ...)
+%
+% data(i,m) = value of node i in case m (can be a cell array).
+% node_sizes(i) is the size of node i.
+% order(i) is the i'th node in the topological ordering.
+%
+% The following optional arguments can be specified in the form of name/value pairs:
+% [default value in brackets]
+%
+% max_fan_in - this the largest number of parents we allow per node [N]
+% scoring_fn - 'bayesian' or 'bic' [ 'bayesian' ]
+%              Currently, only networks with all tabular nodes support Bayesian scoring.
+% type       - type{i} is the type of CPD to use for node i, where the type is a string
+%              of the form 'tabular', 'noisy_or', 'gaussian', etc. [ all cells contain 'tabular' ]
+% params     - params{i} contains optional arguments passed to the CPD constructor for node i,
+%              or [] if none.  [ all cells contain {'prior', 1}, meaning use uniform Dirichlet priors ]
+% discrete   - the list of discrete nodes [ 1:N ]
+% clamped    - clamped(i,m) = 1 if node i is clamped in case m [ zeros(N, ncases) ]
+% verbose    - 'yes' means display output while running [ 'no' ]
+%
+% e.g., dag = learn_struct_K2(data, ns, order, 'scoring_fn', 'bic', 'params', [])
+%
+% To be backwards compatible with BNT2, you can also specify arguments as follows
+%   dag = learn_struct_K2(data, node_sizes, order, max_fan_in)    
+%
+% This algorithm is described in
+% - Cooper and Herskovits,  "A Bayesian method for the induction of probabilistic
+%      networks from data", Machine Learning Journal 9:308--347, 1992
+
+[n ncases] = size(data);
+
+% set default params
+type = cell(1,n);
+params = cell(1,n);
+for i=1:n
+  type{i} = 'tabular';
+  %params{i} = { 'prior', 1 };
+  params{i} = { 'prior_type', 'dirichlet', 'dirichlet_weight', 1 };
+end
+scoring_fn = 'bayesian';
+discrete = 1:n;
+clamped = zeros(n, ncases);
+
+max_fan_in = n;
+verbose = 0;
+
+args = varargin;
+nargs = length(args);
+if length(args) > 0 
+  if isstr(args{1})
+    for i=1:2:nargs
+      switch args{i},
+       case 'verbose',    verbose = strcmp(args{i+1}, 'yes');
+       case 'max_fan_in', max_fan_in = args{i+1}; 
+       case 'scoring_fn', scoring_fn = args{i+1};
+       case 'type',       type = args{i+1}; 
+       case 'discrete',   discrete = args{i+1}; 
+       case 'clamped',    clamped = args{i+1}; 
+       case 'params',     if isempty(args{i+1}), params = cell(1,n); else params = args{i+1};  end
+      end
+    end
+  else
+    max_fan_in = args{1};
+  end
+end
+
+dag = zeros(n,n);
+
+for i=1:n
+  ps = [];
+  j = order(i);
+  u = find(clamped(j,:)==0);    
+  score = score_family(j, ps, type{j}, scoring_fn, ns, discrete, data(:,u), params{j});
+  if verbose, fprintf('\nnode %d, empty score %6.4f\n', j, score); end
+  done = 0;
+  while ~done & (length(ps) <= max_fan_in)
+    pps = mysetdiff(order(1:i-1), ps); % potential parents
+    nps = length(pps);
+    pscore = zeros(1, nps);
+    for pi=1:nps
+      p = pps(pi);
+      pscore(pi) = score_family(j, [ps p], type{j}, scoring_fn, ns, discrete, data(:,u), params{j});
+      if verbose, fprintf('considering adding %d to %d, score %6.4f\n', p, j, pscore(pi)); end
+    end
+    [best_pscore, best_p] = max(pscore);
+    best_p = pps(best_p);
+    if best_pscore > score
+      score = best_pscore;
+      ps = [ps best_p];
+      if verbose, fprintf('* adding %d to %d, score %6.4f\n', best_p, j, best_pscore); end
+    else
+      done = 1;
+    end
+  end
+  if ~isempty(ps) % need this check for matlab 5.2
+    dag(ps, j) = 1;
+  end
+end
+
+
+
+