wolffd@0
|
1 function [transmat, initState] = transmat_train_observed(labels, nstates, varargin)
|
wolffd@0
|
2 % transmat_train_observed ML estimation from fully observed data
|
wolffd@0
|
3 % function [transmat, initState] = transmat_train_observed(labels, nstates, varargin)
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % If all sequences have the same length
|
wolffd@0
|
6 % labels(ex,t)
|
wolffd@0
|
7 % If sequences have different lengths, we use cell arrays
|
wolffd@0
|
8 % labels{ex}(t)
|
wolffd@0
|
9
|
wolffd@0
|
10 [dirichletPriorWeight, mkSymmetric, other] = process_options(...
|
wolffd@0
|
11 varargin, 'dirichletPriorWeight', 0, 'mkSymmetric', 0);
|
wolffd@0
|
12
|
wolffd@0
|
13 if ~iscell(labels)
|
wolffd@0
|
14 [numex T] = size(labels);
|
wolffd@0
|
15 if T==1
|
wolffd@0
|
16 labels = labels';
|
wolffd@0
|
17 end
|
wolffd@0
|
18 %fprintf('T=%d, numex=%d\n', T, numex);
|
wolffd@0
|
19 labels = num2cell(labels,2); % each row gets its own cell
|
wolffd@0
|
20 end
|
wolffd@0
|
21 numex = length(labels);
|
wolffd@0
|
22
|
wolffd@0
|
23 counts = zeros(nstates, nstates);
|
wolffd@0
|
24 counts1 = zeros(nstates,1);
|
wolffd@0
|
25 for s=1:numex
|
wolffd@0
|
26 labs = labels{s}; labs = labs(:)';
|
wolffd@0
|
27 dat = [labs(1:end-1); labs(2:end)];
|
wolffd@0
|
28 counts = counts + compute_counts(dat, [nstates nstates]);
|
wolffd@0
|
29 q = labs(1);
|
wolffd@0
|
30 counts1(q) = counts1(q) + 1;
|
wolffd@0
|
31 end
|
wolffd@0
|
32 pseudo_counts = dirichletPriorWeight*ones(nstates, nstates);
|
wolffd@0
|
33 if mkSymmetric
|
wolffd@0
|
34 counts = counts + counts';
|
wolffd@0
|
35 end
|
wolffd@0
|
36 transmat = mk_stochastic(counts + pseudo_counts);
|
wolffd@0
|
37 initState = normalize(counts1 + dirichletPriorWeight*ones(nstates,1));
|
wolffd@0
|
38
|
wolffd@0
|
39
|