wolffd@0
|
1
|
wolffd@0
|
2 seed = 0;
|
wolffd@0
|
3 rand('state', seed);
|
wolffd@0
|
4 randn('state', seed);
|
wolffd@0
|
5
|
wolffd@0
|
6 chars = ['a', 'c', 'g', 't'];
|
wolffd@0
|
7 motif = 'accca';
|
wolffd@0
|
8 motif_length = length(motif);
|
wolffd@0
|
9 motif_code = zeros(1, motif_length);
|
wolffd@0
|
10 for i=1:motif_length
|
wolffd@0
|
11 motif_code(i) = find(chars == motif(i));
|
wolffd@0
|
12 end
|
wolffd@0
|
13
|
wolffd@0
|
14 [bnet_init, Qnodes, Fnodes, Onode] = mk_motif_hhmm('motif_length', length(motif));
|
wolffd@0
|
15 %[bnet_init, Qnodes, Fnodes, Onode] = mk_motif_hhmm('motif_pattern', motif);
|
wolffd@0
|
16 ss = bnet_init.nnodes_per_slice;
|
wolffd@0
|
17
|
wolffd@0
|
18
|
wolffd@0
|
19
|
wolffd@0
|
20 % We generate a training set by creating uniform sequences,
|
wolffd@0
|
21 % and inserting a single motif at a random location.
|
wolffd@0
|
22 ntrain = 100;
|
wolffd@0
|
23 T = 20;
|
wolffd@0
|
24 cases = cell(1, ntrain);
|
wolffd@0
|
25
|
wolffd@0
|
26 if 1
|
wolffd@0
|
27 % uniform background
|
wolffd@0
|
28 background_dist = normalise(ones(1, length(chars)));
|
wolffd@0
|
29 end
|
wolffd@0
|
30 if 0
|
wolffd@0
|
31 % use a constant background
|
wolffd@0
|
32 background_dist = zeros(1, length(chars));
|
wolffd@0
|
33 m = find(chars=='t');
|
wolffd@0
|
34 background_dist(m) = 1.0;
|
wolffd@0
|
35 end
|
wolffd@0
|
36 if 0
|
wolffd@0
|
37 % use a background skewed away from the motif
|
wolffd@0
|
38 p = 0.01; q = (1-(2*p))/2;
|
wolffd@0
|
39 background_dist = [p p q q];
|
wolffd@0
|
40 end
|
wolffd@0
|
41
|
wolffd@0
|
42 unif_pos = normalise(ones(1, T-length(motif)));
|
wolffd@0
|
43 cases = cell(1, ntrain);
|
wolffd@0
|
44 data = zeros(1,T);
|
wolffd@0
|
45 for i=1:ntrain
|
wolffd@0
|
46 data = sample_discrete(background_dist, 1, T);
|
wolffd@0
|
47 L = sample_discrete(unif_pos, 1, 1);
|
wolffd@0
|
48 data(L:L+length(motif)-1) = motif_code;
|
wolffd@0
|
49 cases{i} = cell(ss, T);
|
wolffd@0
|
50 cases{i}(Onode,:) = num2cell(data);
|
wolffd@0
|
51 end
|
wolffd@0
|
52 disp('sample training cases')
|
wolffd@0
|
53 for i=1:5
|
wolffd@0
|
54 chars(cell2num(cases{i}(Onode,:)))
|
wolffd@0
|
55 end
|
wolffd@0
|
56
|
wolffd@0
|
57 engine_init = hmm_inf_engine(bnet_init);
|
wolffd@0
|
58
|
wolffd@0
|
59 [bnet_learned, LL, engine_learned] = ...
|
wolffd@0
|
60 learn_params_dbn_em(engine_init, cases, 'max_iter', 100, 'thresh', 1e-2);
|
wolffd@0
|
61 % 'anneal', 1, 'anneal_rate', 0.7);
|
wolffd@0
|
62
|
wolffd@0
|
63 % extract the learned motif profile
|
wolffd@0
|
64 eclass = bnet_learned.equiv_class;
|
wolffd@0
|
65 CPDO=struct(bnet_learned.CPD{eclass(Onode,1)});
|
wolffd@0
|
66 fprintf('columns = chars, rows = states\n');
|
wolffd@0
|
67 profile_learned = squeeze(CPDO.CPT(2,:,:))
|
wolffd@0
|
68 [m,ndx] = max(profile_learned, [], 2);
|
wolffd@0
|
69 map_motif_learned = chars(ndx)
|
wolffd@0
|
70 back_learned = squeeze(CPDO.CPT(1,1,:))'
|
wolffd@0
|
71 %map_back_learned = chars(argmax(back_learned))
|
wolffd@0
|
72
|
wolffd@0
|
73 CPDO_init = struct(bnet_init.CPD{eclass(Onode,1)});
|
wolffd@0
|
74 profile_init = squeeze(CPDO_init.CPT(2,:,:));
|
wolffd@0
|
75 back_init = squeeze(CPDO_init.CPT(1,1,:))';
|