wolffd@0: wolffd@0: seed = 0; wolffd@0: rand('state', seed); wolffd@0: randn('state', seed); wolffd@0: wolffd@0: chars = ['a', 'c', 'g', 't']; wolffd@0: motif = 'accca'; wolffd@0: motif_length = length(motif); wolffd@0: motif_code = zeros(1, motif_length); wolffd@0: for i=1:motif_length wolffd@0: motif_code(i) = find(chars == motif(i)); wolffd@0: end wolffd@0: wolffd@0: [bnet_init, Qnodes, Fnodes, Onode] = mk_motif_hhmm('motif_length', length(motif)); wolffd@0: %[bnet_init, Qnodes, Fnodes, Onode] = mk_motif_hhmm('motif_pattern', motif); wolffd@0: ss = bnet_init.nnodes_per_slice; wolffd@0: wolffd@0: wolffd@0: wolffd@0: % We generate a training set by creating uniform sequences, wolffd@0: % and inserting a single motif at a random location. wolffd@0: ntrain = 100; wolffd@0: T = 20; wolffd@0: cases = cell(1, ntrain); wolffd@0: wolffd@0: if 1 wolffd@0: % uniform background wolffd@0: background_dist = normalise(ones(1, length(chars))); wolffd@0: end wolffd@0: if 0 wolffd@0: % use a constant background wolffd@0: background_dist = zeros(1, length(chars)); wolffd@0: m = find(chars=='t'); wolffd@0: background_dist(m) = 1.0; wolffd@0: end wolffd@0: if 0 wolffd@0: % use a background skewed away from the motif wolffd@0: p = 0.01; q = (1-(2*p))/2; wolffd@0: background_dist = [p p q q]; wolffd@0: end wolffd@0: wolffd@0: unif_pos = normalise(ones(1, T-length(motif))); wolffd@0: cases = cell(1, ntrain); wolffd@0: data = zeros(1,T); wolffd@0: for i=1:ntrain wolffd@0: data = sample_discrete(background_dist, 1, T); wolffd@0: L = sample_discrete(unif_pos, 1, 1); wolffd@0: data(L:L+length(motif)-1) = motif_code; wolffd@0: cases{i} = cell(ss, T); wolffd@0: cases{i}(Onode,:) = num2cell(data); wolffd@0: end wolffd@0: disp('sample training cases') wolffd@0: for i=1:5 wolffd@0: chars(cell2num(cases{i}(Onode,:))) wolffd@0: end wolffd@0: wolffd@0: engine_init = hmm_inf_engine(bnet_init); wolffd@0: wolffd@0: [bnet_learned, LL, engine_learned] = ... wolffd@0: learn_params_dbn_em(engine_init, cases, 'max_iter', 100, 'thresh', 1e-2); wolffd@0: % 'anneal', 1, 'anneal_rate', 0.7); wolffd@0: wolffd@0: % extract the learned motif profile wolffd@0: eclass = bnet_learned.equiv_class; wolffd@0: CPDO=struct(bnet_learned.CPD{eclass(Onode,1)}); wolffd@0: fprintf('columns = chars, rows = states\n'); wolffd@0: profile_learned = squeeze(CPDO.CPT(2,:,:)) wolffd@0: [m,ndx] = max(profile_learned, [], 2); wolffd@0: map_motif_learned = chars(ndx) wolffd@0: back_learned = squeeze(CPDO.CPT(1,1,:))' wolffd@0: %map_back_learned = chars(argmax(back_learned)) wolffd@0: wolffd@0: CPDO_init = struct(bnet_init.CPD{eclass(Onode,1)}); wolffd@0: profile_init = squeeze(CPDO_init.CPT(2,:,:)); wolffd@0: back_init = squeeze(CPDO_init.CPT(1,1,:))';