wolffd@0
|
1 function CPD = tabular_CPD(bnet, self, varargin)
|
wolffd@0
|
2 % TABULAR_CPD Make a multinomial conditional prob. distrib. (CPT)
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % CPD = tabular_CPD(bnet, node) creates a random CPT.
|
wolffd@0
|
5 %
|
wolffd@0
|
6 % The following arguments can be specified [default in brackets]
|
wolffd@0
|
7 %
|
wolffd@0
|
8 % CPT - specifies the params ['rnd']
|
wolffd@0
|
9 % - T means use table T; it will be reshaped to the size of node's family.
|
wolffd@0
|
10 % - 'rnd' creates rnd params (drawn from uniform)
|
wolffd@0
|
11 % - 'unif' creates a uniform distribution
|
wolffd@0
|
12 % - 'leftright' only transitions from i to i/i+1 are allowed, for each non-self parent context.
|
wolffd@0
|
13 % The non-self parents are all parents except oldself.
|
wolffd@0
|
14 % selfprob - The prob of transition from i to i if CPT = 'leftright' [0.1]
|
wolffd@0
|
15 % old_self - id of the node corresponding to self in the previous slice [self-ss]
|
wolffd@0
|
16 % adjustable - 0 means don't adjust the parameters during learning [1]
|
wolffd@0
|
17 % prior_type - defines type of prior ['none']
|
wolffd@0
|
18 % - 'none' means do ML estimation
|
wolffd@0
|
19 % - 'dirichlet' means add pseudo-counts to every cell
|
wolffd@0
|
20 % - 'entropic' means use a prior P(theta) propto exp(-H(theta)) (see Brand)
|
wolffd@0
|
21 % dirichlet_weight - equivalent sample size (ess) of the dirichlet prior [1]
|
wolffd@0
|
22 % dirichlet_type - defines the type of Dirichlet prior ['BDeu']
|
wolffd@0
|
23 % - 'unif' means put dirichlet_weight in every cell
|
wolffd@0
|
24 % - 'BDeu' means we put 'dirichlet_weight/(r q)' in every cell
|
wolffd@0
|
25 % where r = self_sz and q = prod(parent_sz) (see Heckerman)
|
wolffd@0
|
26 % trim - 1 means trim redundant params (rows in CPT) when using entropic prior [0]
|
wolffd@0
|
27 %
|
wolffd@0
|
28 % e.g., tabular_CPD(bnet, i, 'CPT', T)
|
wolffd@0
|
29 % e.g., tabular_CPD(bnet, i, 'CPT', 'unif', 'dirichlet_weight', 2, 'dirichlet_type', 'unif')
|
wolffd@0
|
30 %
|
wolffd@0
|
31 % REFERENCES
|
wolffd@0
|
32 % M. Brand - "Structure learning in conditional probability models via an entropic prior
|
wolffd@0
|
33 % and parameter extinction", Neural Computation 11 (1999): 1155--1182
|
wolffd@0
|
34 % M. Brand - "Pattern discovery via entropy minimization" [covers annealing]
|
wolffd@0
|
35 % AI & Statistics 1999. Equation numbers refer to this paper, which is available from
|
wolffd@0
|
36 % www.merl.com/reports/docs/TR98-21.pdf
|
wolffd@0
|
37 % D. Heckerman, D. Geiger and M. Chickering,
|
wolffd@0
|
38 % "Learning Bayesian networks: the combination of knowledge and statistical data",
|
wolffd@0
|
39 % Microsoft Research Tech Report, 1994
|
wolffd@0
|
40
|
wolffd@0
|
41
|
wolffd@0
|
42 if nargin==0
|
wolffd@0
|
43 % This occurs if we are trying to load an object from a file.
|
wolffd@0
|
44 CPD = init_fields;
|
wolffd@0
|
45 CPD = class(CPD, 'tabular_CPD', discrete_CPD(0, []));
|
wolffd@0
|
46 return;
|
wolffd@0
|
47 elseif isa(bnet, 'tabular_CPD')
|
wolffd@0
|
48 % This might occur if we are copying an object.
|
wolffd@0
|
49 CPD = bnet;
|
wolffd@0
|
50 return;
|
wolffd@0
|
51 end
|
wolffd@0
|
52 CPD = init_fields;
|
wolffd@0
|
53
|
wolffd@0
|
54 ns = bnet.node_sizes;
|
wolffd@0
|
55 ps = parents(bnet.dag, self);
|
wolffd@0
|
56 fam_sz = ns([ps self]);
|
wolffd@0
|
57 CPD.sizes = fam_sz;
|
wolffd@0
|
58 CPD.leftright = 0;
|
wolffd@0
|
59
|
wolffd@0
|
60 % set defaults
|
wolffd@0
|
61 CPD.CPT = mk_stochastic(myrand(fam_sz));
|
wolffd@0
|
62 CPD.adjustable = 1;
|
wolffd@0
|
63 CPD.prior_type = 'none';
|
wolffd@0
|
64 dirichlet_type = 'BDeu';
|
wolffd@0
|
65 dirichlet_weight = 1;
|
wolffd@0
|
66 CPD.trim = 0;
|
wolffd@0
|
67 selfprob = 0.1;
|
wolffd@0
|
68
|
wolffd@0
|
69 % extract optional args
|
wolffd@0
|
70 args = varargin;
|
wolffd@0
|
71 % check for old syntax CPD(bnet, i, CPT) as opposed to CPD(bnet, i, 'CPT', CPT)
|
wolffd@0
|
72 if ~isempty(args) & ~isstr(args{1})
|
wolffd@0
|
73 CPD.CPT = myreshape(args{1}, fam_sz);
|
wolffd@0
|
74 args = [];
|
wolffd@0
|
75 end
|
wolffd@0
|
76
|
wolffd@0
|
77 % if old_self is specified, read in the value before CPT is created
|
wolffd@0
|
78 old_self = [];
|
wolffd@0
|
79 for i=1:2:length(args)
|
wolffd@0
|
80 switch args{i},
|
wolffd@0
|
81 case 'old_self', old_self = args{i+1};
|
wolffd@0
|
82 end
|
wolffd@0
|
83 end
|
wolffd@0
|
84
|
wolffd@0
|
85 for i=1:2:length(args)
|
wolffd@0
|
86 switch args{i},
|
wolffd@0
|
87 case 'CPT',
|
wolffd@0
|
88 T = args{i+1};
|
wolffd@0
|
89 if ischar(T)
|
wolffd@0
|
90 switch T
|
wolffd@0
|
91 case 'unif', CPD.CPT = mk_stochastic(myones(fam_sz));
|
wolffd@0
|
92 case 'rnd', CPD.CPT = mk_stochastic(myrand(fam_sz));
|
wolffd@0
|
93 case 'leftright',
|
wolffd@0
|
94 % we just initialise the CPT to leftright - this structure will
|
wolffd@0
|
95 % be maintained by EM, assuming we don't use a prior...
|
wolffd@0
|
96 CPD.leftright = 1;
|
wolffd@0
|
97 if isempty(old_self) % we assume the network is a DBN
|
wolffd@0
|
98 ss = bnet.nnodes_per_slice;
|
wolffd@0
|
99 old_self = self-ss;
|
wolffd@0
|
100 end
|
wolffd@0
|
101 other_ps = mysetdiff(ps, old_self);
|
wolffd@0
|
102 Qps = prod(ns(other_ps));
|
wolffd@0
|
103 Q = ns(self);
|
wolffd@0
|
104 p = selfprob;
|
wolffd@0
|
105 LR = mk_leftright_transmat(Q, p);
|
wolffd@0
|
106 transprob = repmat(reshape(LR, [1 Q Q]), [Qps 1 1]); % transprob(k,i,j)
|
wolffd@0
|
107 transprob = permute(transprob, [2 1 3]); % now transprob(i,k,j)
|
wolffd@0
|
108 CPD.CPT = myreshape(transprob, fam_sz);
|
wolffd@0
|
109 otherwise, error(['invalid CPT ' T]);
|
wolffd@0
|
110 end
|
wolffd@0
|
111 else
|
wolffd@0
|
112 CPD.CPT = myreshape(T, fam_sz);
|
wolffd@0
|
113 end
|
wolffd@0
|
114
|
wolffd@0
|
115 case 'prior_type', CPD.prior_type = args{i+1};
|
wolffd@0
|
116 case 'dirichlet_type', dirichlet_type = args{i+1};
|
wolffd@0
|
117 case 'dirichlet_weight', dirichlet_weight = args{i+1};
|
wolffd@0
|
118 case 'adjustable', CPD.adjustable = args{i+1};
|
wolffd@0
|
119 case 'clamped', CPD.adjustable = ~args{i+1};
|
wolffd@0
|
120 case 'trim', CPD.trim = args{i+1};
|
wolffd@0
|
121 case 'old_self', noop = 1; % already read in
|
wolffd@0
|
122 otherwise, error(['invalid argument name: ' args{i}]);
|
wolffd@0
|
123 end
|
wolffd@0
|
124 end
|
wolffd@0
|
125
|
wolffd@0
|
126 switch CPD.prior_type
|
wolffd@0
|
127 case 'dirichlet',
|
wolffd@0
|
128 switch dirichlet_type
|
wolffd@0
|
129 case 'unif', CPD.dirichlet = dirichlet_weight * myones(fam_sz);
|
wolffd@0
|
130 case 'BDeu', CPD.dirichlet = dirichlet_weight * mk_stochastic(myones(fam_sz));
|
wolffd@0
|
131 otherwise, error(['invalid dirichlet_type ' dirichlet_type])
|
wolffd@0
|
132 end
|
wolffd@0
|
133 case {'entropic', 'none'}
|
wolffd@0
|
134 CPD.dirichlet = [];
|
wolffd@0
|
135 otherwise, error(['invalid prior_type ' prior_type])
|
wolffd@0
|
136 end
|
wolffd@0
|
137
|
wolffd@0
|
138
|
wolffd@0
|
139
|
wolffd@0
|
140 % fields to do with learning
|
wolffd@0
|
141 if ~CPD.adjustable
|
wolffd@0
|
142 CPD.counts = [];
|
wolffd@0
|
143 CPD.nparams = 0;
|
wolffd@0
|
144 CPD.nsamples = [];
|
wolffd@0
|
145 else
|
wolffd@0
|
146 CPD.counts = zeros(size(CPD.CPT));
|
wolffd@0
|
147 psz = fam_sz(1:end-1);
|
wolffd@0
|
148 ss = fam_sz(end);
|
wolffd@0
|
149 if CPD.leftright
|
wolffd@0
|
150 % For each of the Qps contexts, we specify Q elements on the diagoanl
|
wolffd@0
|
151 CPD.nparams = Qps * Q;
|
wolffd@0
|
152 else
|
wolffd@0
|
153 % sum-to-1 constraint reduces the effective arity of the node by 1
|
wolffd@0
|
154 CPD.nparams = prod([psz ss-1]);
|
wolffd@0
|
155 end
|
wolffd@0
|
156 CPD.nsamples = 0;
|
wolffd@0
|
157 end
|
wolffd@0
|
158
|
wolffd@0
|
159 fam_sz = CPD.sizes;
|
wolffd@0
|
160 psz = prod(fam_sz(1:end-1));
|
wolffd@0
|
161 ssz = fam_sz(end);
|
wolffd@0
|
162 CPD.trimmed_trans = zeros(psz, ssz); % must declare before reading
|
wolffd@0
|
163
|
wolffd@0
|
164 CPD = class(CPD, 'tabular_CPD', discrete_CPD(~CPD.adjustable, fam_sz));
|
wolffd@0
|
165
|
wolffd@0
|
166
|
wolffd@0
|
167 %%%%%%%%%%%
|
wolffd@0
|
168
|
wolffd@0
|
169 function CPD = init_fields()
|
wolffd@0
|
170 % This ensures we define the fields in the same order
|
wolffd@0
|
171 % no matter whether we load an object from a file,
|
wolffd@0
|
172 % or create it from scratch. (Matlab requires this.)
|
wolffd@0
|
173
|
wolffd@0
|
174 CPD.CPT = [];
|
wolffd@0
|
175 CPD.sizes = [];
|
wolffd@0
|
176 CPD.prior_type = [];
|
wolffd@0
|
177 CPD.dirichlet = [];
|
wolffd@0
|
178 CPD.adjustable = [];
|
wolffd@0
|
179 CPD.counts = [];
|
wolffd@0
|
180 CPD.nparams = [];
|
wolffd@0
|
181 CPD.nsamples = [];
|
wolffd@0
|
182 CPD.trim = [];
|
wolffd@0
|
183 CPD.trimmed_trans = [];
|
wolffd@0
|
184 CPD.leftright = [];
|
wolffd@0
|
185
|
wolffd@0
|
186
|