wolffd@0
|
1 function [bnet, Qnodes, Fnodes, Onode] = mk_motif_hhmm(varargin)
|
wolffd@0
|
2 % [bnet, Qnodes, Fnodes, Onode] = mk_motif_hhmm(...)
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % Make the following HHMM
|
wolffd@0
|
5 %
|
wolffd@0
|
6 % S2 <----------------------> S1
|
wolffd@0
|
7 % | |
|
wolffd@0
|
8 % | |
|
wolffd@0
|
9 % M1 -> M2 -> M3 -> end B1 -> end
|
wolffd@0
|
10 %
|
wolffd@0
|
11 % where Mi represents the i'th letter in the motif
|
wolffd@0
|
12 % and B is the background state.
|
wolffd@0
|
13 % Si chooses between running the motif or the background.
|
wolffd@0
|
14 % The Si and B states have self loops (not shown).
|
wolffd@0
|
15 %
|
wolffd@0
|
16 % The transition params are defined to respect the above topology.
|
wolffd@0
|
17 % The background is uniform; each motif state has a random obs. distribution.
|
wolffd@0
|
18 %
|
wolffd@0
|
19 % Optional params:
|
wolffd@0
|
20 % motif_length - required, unless we specify motif_pattern
|
wolffd@0
|
21 % motif_pattern - if specified, we make the motif submodel deterministically
|
wolffd@0
|
22 % emit this pattern
|
wolffd@0
|
23 % background - if specified, we make the background submodel
|
wolffd@0
|
24 % deterministically emit this (makes the motif easier to see!)
|
wolffd@0
|
25
|
wolffd@0
|
26
|
wolffd@0
|
27 args = varargin;
|
wolffd@0
|
28 nargs = length(args);
|
wolffd@0
|
29
|
wolffd@0
|
30 % extract pattern, if any
|
wolffd@0
|
31 motif_pattern = [];
|
wolffd@0
|
32 for i=1:2:nargs
|
wolffd@0
|
33 switch args{i},
|
wolffd@0
|
34 case 'motif_pattern', motif_pattern = args{i+1};
|
wolffd@0
|
35 end
|
wolffd@0
|
36 end
|
wolffd@0
|
37
|
wolffd@0
|
38 % set defaults
|
wolffd@0
|
39 motif_length = length(motif_pattern);
|
wolffd@0
|
40 background_char = [];
|
wolffd@0
|
41
|
wolffd@0
|
42 % get params
|
wolffd@0
|
43 for i=1:2:nargs
|
wolffd@0
|
44 switch args{i},
|
wolffd@0
|
45 case 'motif_length', motif_length = args{i+1};
|
wolffd@0
|
46 case 'background', background_char = args{i+1};
|
wolffd@0
|
47 end
|
wolffd@0
|
48 end
|
wolffd@0
|
49
|
wolffd@0
|
50
|
wolffd@0
|
51 chars = ['a', 'c', 'g', 't'];
|
wolffd@0
|
52 Osize = length(chars);
|
wolffd@0
|
53
|
wolffd@0
|
54 Qsize = [2 motif_length];
|
wolffd@0
|
55 Qnodes = 1:2;
|
wolffd@0
|
56 D = 2;
|
wolffd@0
|
57 transprob = cell(1,D);
|
wolffd@0
|
58 termprob = cell(1,D);
|
wolffd@0
|
59 startprob = cell(1,D);
|
wolffd@0
|
60
|
wolffd@0
|
61 % startprob{d}(k,j), startprob{1}(1,j)
|
wolffd@0
|
62 % transprob{d}(i,k,j), transprob{1}(i,j)
|
wolffd@0
|
63 % termprob{d}(k,j)
|
wolffd@0
|
64
|
wolffd@0
|
65
|
wolffd@0
|
66 % LEVEL 1
|
wolffd@0
|
67
|
wolffd@0
|
68 startprob{1} = zeros(1, 2);
|
wolffd@0
|
69 startprob{1} = [1 0]; % always start in the background model
|
wolffd@0
|
70
|
wolffd@0
|
71 % When in the background state, we stay there with high prob
|
wolffd@0
|
72 % When in the motif state, we immediately return to the background state.
|
wolffd@0
|
73 transprob{1} = [0.8 0.2;
|
wolffd@0
|
74 1.0 0.0];
|
wolffd@0
|
75
|
wolffd@0
|
76
|
wolffd@0
|
77 % LEVEL 2
|
wolffd@0
|
78 startprob{2} = 'leftstart'; % both submodels start in substate 1
|
wolffd@0
|
79 transprob{2} = zeros(motif_length, 2, motif_length);
|
wolffd@0
|
80 termprob{2} = zeros(2, motif_length);
|
wolffd@0
|
81
|
wolffd@0
|
82 % In the background model, we only use state 1.
|
wolffd@0
|
83 transprob{2}(1,1,1) = 1; % self loop
|
wolffd@0
|
84 termprob{2}(1,1) = 0.2; % prob transition to end state
|
wolffd@0
|
85
|
wolffd@0
|
86 % Motif model
|
wolffd@0
|
87 transprob{2}(:,2,:) = mk_leftright_transmat(motif_length, 0); % no self loops
|
wolffd@0
|
88 termprob{2}(2,end) = 1.0; % last state immediately terminates
|
wolffd@0
|
89
|
wolffd@0
|
90
|
wolffd@0
|
91 % OBS LEVEl
|
wolffd@0
|
92
|
wolffd@0
|
93 obsprob = zeros([Qsize Osize]);
|
wolffd@0
|
94 if isempty(background_char)
|
wolffd@0
|
95 % uniform background model
|
wolffd@0
|
96 %obsprob(1,1,:) = normalise(ones(Osize,1));
|
wolffd@0
|
97 obsprob(1,1,:) = normalise(rand(Osize,1));
|
wolffd@0
|
98 else
|
wolffd@0
|
99 % deterministic background model (easy to see!)
|
wolffd@0
|
100 m = find(chars==background_char);
|
wolffd@0
|
101 obsprob(1,1,m) = 1.0;
|
wolffd@0
|
102 end
|
wolffd@0
|
103
|
wolffd@0
|
104 if ~isempty(motif_pattern)
|
wolffd@0
|
105 % initialise with true motif (cheating)
|
wolffd@0
|
106 for i=1:motif_length
|
wolffd@0
|
107 m = find(chars == motif_pattern(i));
|
wolffd@0
|
108 obsprob(2,i,m) = 1.0;
|
wolffd@0
|
109 end
|
wolffd@0
|
110 else
|
wolffd@0
|
111 obsprob(2,:,:) = mk_stochastic(rand(motif_length, Osize));
|
wolffd@0
|
112 end
|
wolffd@0
|
113
|
wolffd@0
|
114 if 0
|
wolffd@0
|
115 Oargs = {'CPT', obsprob};
|
wolffd@0
|
116 else
|
wolffd@0
|
117 % We use a minent prior for the emission distribution for the states in the motif model
|
wolffd@0
|
118 % (but not the background model). This encourages nearly deterministic distributions.
|
wolffd@0
|
119 % We create an index matrix (where M = motif length)
|
wolffd@0
|
120 % [2 1
|
wolffd@0
|
121 % 2 2
|
wolffd@0
|
122 % ...
|
wolffd@0
|
123 % 2 M]
|
wolffd@0
|
124 % and then convert this to a list of integers, which
|
wolffd@0
|
125 % specifies when to use the minent prior (Q1=2 specifies motif model).
|
wolffd@0
|
126 M = motif_length;
|
wolffd@0
|
127 ndx = [2*ones(M,1) (1:M)'];
|
wolffd@0
|
128 pcases = subv2ind([2 motif_length], ndx);
|
wolffd@0
|
129 Oargs = {'CPT', obsprob, 'prior_type', 'entropic', 'entropic_pcases', pcases};
|
wolffd@0
|
130 end
|
wolffd@0
|
131
|
wolffd@0
|
132
|
wolffd@0
|
133
|
wolffd@0
|
134 [bnet, Qnodes, Fnodes, Onode] = mk_hhmm('Qsizes', Qsize, 'Osize', Osize, 'discrete_obs', 1, ...
|
wolffd@0
|
135 'Oargs', Oargs, 'Ops', Qnodes(1:2), ...
|
wolffd@0
|
136 'startprob', startprob, 'transprob', transprob, 'termprob', termprob);
|
wolffd@0
|
137
|