annotate toolboxes/FullBNT-1.0.7/bnt/examples/dynamic/HHMM/Motif/learn_motif_hhmm.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1
wolffd@0 2 seed = 0;
wolffd@0 3 rand('state', seed);
wolffd@0 4 randn('state', seed);
wolffd@0 5
wolffd@0 6 chars = ['a', 'c', 'g', 't'];
wolffd@0 7 motif = 'accca';
wolffd@0 8 motif_length = length(motif);
wolffd@0 9 motif_code = zeros(1, motif_length);
wolffd@0 10 for i=1:motif_length
wolffd@0 11 motif_code(i) = find(chars == motif(i));
wolffd@0 12 end
wolffd@0 13
wolffd@0 14 [bnet_init, Qnodes, Fnodes, Onode] = mk_motif_hhmm('motif_length', length(motif));
wolffd@0 15 %[bnet_init, Qnodes, Fnodes, Onode] = mk_motif_hhmm('motif_pattern', motif);
wolffd@0 16 ss = bnet_init.nnodes_per_slice;
wolffd@0 17
wolffd@0 18
wolffd@0 19
wolffd@0 20 % We generate a training set by creating uniform sequences,
wolffd@0 21 % and inserting a single motif at a random location.
wolffd@0 22 ntrain = 100;
wolffd@0 23 T = 20;
wolffd@0 24 cases = cell(1, ntrain);
wolffd@0 25
wolffd@0 26 if 1
wolffd@0 27 % uniform background
wolffd@0 28 background_dist = normalise(ones(1, length(chars)));
wolffd@0 29 end
wolffd@0 30 if 0
wolffd@0 31 % use a constant background
wolffd@0 32 background_dist = zeros(1, length(chars));
wolffd@0 33 m = find(chars=='t');
wolffd@0 34 background_dist(m) = 1.0;
wolffd@0 35 end
wolffd@0 36 if 0
wolffd@0 37 % use a background skewed away from the motif
wolffd@0 38 p = 0.01; q = (1-(2*p))/2;
wolffd@0 39 background_dist = [p p q q];
wolffd@0 40 end
wolffd@0 41
wolffd@0 42 unif_pos = normalise(ones(1, T-length(motif)));
wolffd@0 43 cases = cell(1, ntrain);
wolffd@0 44 data = zeros(1,T);
wolffd@0 45 for i=1:ntrain
wolffd@0 46 data = sample_discrete(background_dist, 1, T);
wolffd@0 47 L = sample_discrete(unif_pos, 1, 1);
wolffd@0 48 data(L:L+length(motif)-1) = motif_code;
wolffd@0 49 cases{i} = cell(ss, T);
wolffd@0 50 cases{i}(Onode,:) = num2cell(data);
wolffd@0 51 end
wolffd@0 52 disp('sample training cases')
wolffd@0 53 for i=1:5
wolffd@0 54 chars(cell2num(cases{i}(Onode,:)))
wolffd@0 55 end
wolffd@0 56
wolffd@0 57 engine_init = hmm_inf_engine(bnet_init);
wolffd@0 58
wolffd@0 59 [bnet_learned, LL, engine_learned] = ...
wolffd@0 60 learn_params_dbn_em(engine_init, cases, 'max_iter', 100, 'thresh', 1e-2);
wolffd@0 61 % 'anneal', 1, 'anneal_rate', 0.7);
wolffd@0 62
wolffd@0 63 % extract the learned motif profile
wolffd@0 64 eclass = bnet_learned.equiv_class;
wolffd@0 65 CPDO=struct(bnet_learned.CPD{eclass(Onode,1)});
wolffd@0 66 fprintf('columns = chars, rows = states\n');
wolffd@0 67 profile_learned = squeeze(CPDO.CPT(2,:,:))
wolffd@0 68 [m,ndx] = max(profile_learned, [], 2);
wolffd@0 69 map_motif_learned = chars(ndx)
wolffd@0 70 back_learned = squeeze(CPDO.CPT(1,1,:))'
wolffd@0 71 %map_back_learned = chars(argmax(back_learned))
wolffd@0 72
wolffd@0 73 CPDO_init = struct(bnet_init.CPD{eclass(Onode,1)});
wolffd@0 74 profile_init = squeeze(CPDO_init.CPT(2,:,:));
wolffd@0 75 back_init = squeeze(CPDO_init.CPT(1,1,:))';