diff toolboxes/FullBNT-1.0.7/bnt/examples/dynamic/HHMM/Motif/learn_motif_hhmm.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/FullBNT-1.0.7/bnt/examples/dynamic/HHMM/Motif/learn_motif_hhmm.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,75 @@
+
+seed = 0;
+rand('state', seed);
+randn('state', seed);
+
+chars = ['a', 'c', 'g', 't'];
+motif = 'accca';
+motif_length = length(motif);
+motif_code = zeros(1, motif_length);
+for i=1:motif_length
+  motif_code(i) = find(chars == motif(i));
+end
+
+[bnet_init, Qnodes, Fnodes, Onode] = mk_motif_hhmm('motif_length', length(motif));
+%[bnet_init, Qnodes, Fnodes, Onode] = mk_motif_hhmm('motif_pattern', motif);
+ss = bnet_init.nnodes_per_slice;
+
+
+
+% We generate a training set by creating uniform sequences,
+% and inserting a single motif at a random location.
+ntrain = 100;
+T = 20;
+cases = cell(1, ntrain);
+
+if 1
+  % uniform background 
+  background_dist = normalise(ones(1, length(chars)));
+end
+if 0
+  % use a constant background
+  background_dist = zeros(1, length(chars));
+  m = find(chars=='t');
+  background_dist(m) = 1.0;
+end
+if 0
+  % use a background skewed away from the motif
+  p = 0.01; q = (1-(2*p))/2;
+  background_dist = [p p q q];
+end
+
+unif_pos = normalise(ones(1, T-length(motif)));
+cases = cell(1, ntrain);
+data = zeros(1,T);
+for i=1:ntrain
+  data = sample_discrete(background_dist, 1, T);
+  L = sample_discrete(unif_pos, 1, 1);
+  data(L:L+length(motif)-1) = motif_code;
+  cases{i} = cell(ss, T);
+  cases{i}(Onode,:) = num2cell(data);
+end
+disp('sample training cases')
+for i=1:5
+  chars(cell2num(cases{i}(Onode,:)))
+end
+
+engine_init = hmm_inf_engine(bnet_init);
+
+[bnet_learned, LL, engine_learned] = ...
+    learn_params_dbn_em(engine_init, cases, 'max_iter', 100, 'thresh', 1e-2);
+%			'anneal', 1, 'anneal_rate', 0.7);
+
+% extract the learned motif profile
+eclass = bnet_learned.equiv_class;
+CPDO=struct(bnet_learned.CPD{eclass(Onode,1)});
+fprintf('columns = chars, rows = states\n');
+profile_learned = squeeze(CPDO.CPT(2,:,:))
+[m,ndx] = max(profile_learned, [], 2);
+map_motif_learned = chars(ndx)
+back_learned = squeeze(CPDO.CPT(1,1,:))'
+%map_back_learned = chars(argmax(back_learned))
+
+CPDO_init = struct(bnet_init.CPD{eclass(Onode,1)});
+profile_init = squeeze(CPDO_init.CPT(2,:,:));
+back_init = squeeze(CPDO_init.CPT(1,1,:))';