Mercurial > hg > camir-aes2014
view toolboxes/FullBNT-1.0.7/bnt/examples/static/HME/hmemenu.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line source
% dataset -> (1=>user data) or (2=>toy example) % type -> (1=> Regression model) or (2=>Classification model) % num_glevel -> number of hidden nodes in the net (gating levels) % num_exp -> number of experts in the net % branch_fact -> dimension of the hidden nodes in the net % cov_dim -> root node dimension % res_dim -> output node dimension % nodes_info -> 4 x num_glevel+2 matrix that contain all the info about the nodes: % nodes_info(1,:) = nodes type: (0=>gaussian)or(1=>softmax)or(2=>mlp) % nodes_info(2,:) = nodes size: [cov_dim num_glevel x branch_fact res_dim] % nodes_info(3,:) = hidden units number (for mlp nodes) % |- optimizer iteration number (for softmax & mlp CPD) % nodes_info(4,:) =|- covariance type (for gaussian CPD)-> % | (1=>Full)or(2=>Diagonal)or(3=>Full&Tied)or(4=>Diagonal&Tied) % fh1 -> Figure: data & decizion boundaries; fh2 -> confusion matrix; fh3 -> LL trace % test_data -> test data matrix % train_data -> training data matrix % ntrain -> size(train_data,2) % ntest -> size(test_data,2) % cases -> (cell array) training data formatted for the learning engine % bnet -> bayesian net before learning % bnet2 -> bayesian net after learning % ll -> log-likelihood before learning % LL2 -> log-likelihood trace % onodes -> obs nodes in bnet & bnet2 % max_em_iter -> maximum number of interations of the EM algorithm % train_result -> prediction on the training set (as test_result) % % IMPORTANT: CHECK the loading path (lines 64 & 364) % ---------------------------------------------------------------------------------------------------- % -> pierpaolo_b@hotmail.com or -> pampo@interfree.it % ---------------------------------------------------------------------------------------------------- error('this no longer works with the latest version of BNT') clear all; clc; disp('---------------------------------------------------'); disp(' Hierarchical Mixtures of Experts models builder '); disp('---------------------------------------------------'); disp(' ') disp(' Using this script you can build both an HME model') disp('as in [Wat94] and [Jor94] i.e. with ''softmax'' gating') disp('nodes and ''gaussian'' ( for regression ) or ''softmax''') disp('( for classification ) expert node, and its variants') disp('called ''gated nets'' where we use ''mlp'' models in') disp('place of a number of ''softmax'' ones [Mor98], [Wei95].') disp(' You can decide to train and test the model on your') disp('datasets or to evaluate its performance on a toy') disp('example.') disp(' ') disp('Reference') disp('[Mor98] P. Moerland (1998):') disp(' Localized mixtures of experts. (http://www.idiap.ch/~perry/)') disp('[Jor94] M.I. Jordan, R.A. Jacobs (1994):') disp(' HME and the EM algorithm. (http://www.cs.berkeley.edu/~jordan/)') disp('[Wat94] S.R. Waterhouse, A.J. Robinson (1994):') disp(' Classification using HME. (http://www.oigeeza.com/steve/)') disp('[Wei95] A.S. Weigend, M. Mangeas (1995):') disp(' Nonlinear gated experts for time series.') disp(' ') if 0 disp('(See the figure)') pause(5); %%%%%WARNING!%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% im_path=which('HMEforMatlab.jpg'); fig=imread(im_path, 'jpg'); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% figure('Units','pixels','MenuBar','none','NumberTitle','off', 'Name', 'HME model'); image(fig); axis image; axis off; clear fig; set(gca,'Position',[0 0 1 1]) disp('(Press any key to continue)') pause end clc disp('---------------------------------------------------'); disp(' Specify the Architecture '); disp('---------------------------------------------------'); disp(' '); disp('What kind of model do you need?') disp(' ') disp('1) Regression ') disp('2) Classification') disp(' ') type=input('1 or 2?: '); if (isempty(type)|(~ismember(type,[1 2]))), error('Invalid value'); end clc disp('----------------------------------------------------'); disp(' Specify the Architecture '); disp('----------------------------------------------------'); disp(' ') disp('Now you have to set the number of experts and gating') disp('levels in the net. This script builds only balanced') disp('hierarchy with the same branching factor (>1)at each') disp('(gating) level. So remember that: ') disp(' ') disp(' num_exp = branch_fact^num_glevel ') disp(' ') disp('with branch_fact >=2.') disp('You can also set to zeros the number of gating level') disp('in order to obtain a classical GLM model. ') disp(' ') disp('----------------------------------------------------'); disp(' ') num_glevel=input('Insert the number of gating levels {0,...,20}: '); if (isempty(num_glevel)|(~ismember(num_glevel,[0:20]))), error('Invalid value'); end nodes_info=zeros(4,num_glevel+2); if num_glevel>0, %------------------------------------------------------------------------------------ for i=2:num_glevel+1, clc disp('----------------------------------------------------'); disp(' Specify the Architecture '); disp('----------------------------------------------------'); disp(' ') disp(['-> Gating network ', num2str(i-1), ' is a: ']) disp(' ') disp(' 1) Softmax model'); disp(' 2) Two layer perceptron model') disp(' ') nodes_info(1,i)=input('1 or 2?: '); if (isempty(nodes_info(1,i))|(~ismember(nodes_info(1,i),[1 2]))), error('Invalid value'); end disp(' ') if nodes_info(1,i)==2, nodes_info(3,i)=input('Insert the number of units in the hidden layer: '); if (isempty(nodes_info(3,i))|(floor(nodes_info(3,i))~=nodes_info(3,i))|(nodes_info(3,i)<=0)), error(['Invalid value: ', num2str(nodes_info(3,i)), ' is not a positive integer!']); end disp(' ') end nodes_info(4,i)=input('Insert the optimizer iteration number: '); if (isempty(nodes_info(4,i))|(floor(nodes_info(4,i))~=nodes_info(4,i))|(nodes_info(4,i)<=0)), error(['Invalid value: ', num2str(nodes_info(4,i)), ' is not a positive integer!']); end end clc disp('---------------------------------------------------------'); disp(' Specify the Architecture '); disp('---------------------------------------------------------'); disp(' ') disp('Now you have to set the number of experts in the network'); disp('The value will be adjusted in order to obtain a hierarchy'); disp('as said above.') disp(' '); num_exp=input(['Insert the approximative number of experts (>=', num2str(2^num_glevel), '): ']); if (isempty(num_exp)|(num_exp<=0)|(num_exp<2^num_glevel)), error('Invalid value'); end app1=0; base=2; while app1<num_exp, app1=base^num_glevel; base=base+1; end app2=(base-2)^num_glevel; branch_fact=base-1; if app2>=(2^num_glevel)&(abs(app2-num_exp)<abs(app1-num_exp)), branch_fact=base-2; end clear app1 app2 base; disp(' ') disp(['The effective number of experts in the net is: ', num2str(branch_fact^num_glevel), '.']) disp(' '); else clc disp('---------------------------------------------------------'); disp(' Specify the Architecture (GLM model) '); disp('---------------------------------------------------------'); disp(' ') end % END of: if num_glevel>0------------------------------------------------------------------------- if type==2, disp(['-> Expert node is a: ']) disp(' ') disp(' 1) Softmax model'); disp(' 2) Two layer perceptron model') disp(' ') nodes_info(1,end)=input('1 or 2?: '); if (isempty(nodes_info(1,end))|(~ismember(nodes_info(1,end),[1 2]))), error('Invalid value'); end disp(' ') if nodes_info(1,end)==2, nodes_info(3,end)=input('Insert the number of units in the hidden layer: '); if (isempty(nodes_info(3,end))|(floor(nodes_info(3,end))~=nodes_info(3,end))|(nodes_info(3,end)<=0)), error(['Invalid value: ', num2str(nodes_info(3,end)), ' is not a positive integer!']); end disp(' ') end nodes_info(4,end)=input('Insert the optimizer iteration number: '); if (isempty(nodes_info(4,end))|(floor(nodes_info(4,end))~=nodes_info(4,end))|(nodes_info(4,end)<=0)), error(['Invalid value: ', num2str(nodes_info(4,end)), ' is not a positive integer!']); end elseif type==1, disp('What kind of covariance matrix structure do you want?') disp(' ') disp(' 1) Full'); disp(' 2) Diagonal') disp(' 3) Full & Tied'); disp(' 4) Diagonal & Tied') disp(' ') nodes_info(4,end)=input('1, 2, 3 or 4?: '); if (isempty(nodes_info(4,end))|(~ismember(nodes_info(4,end),[1 2 3 4]))), error('Invalid value'); end end clc disp('----------------------------------------------------'); disp(' Specify the Input '); disp('----------------------------------------------------'); disp(' ') disp('Do you want to...') disp(' ') disp('1) ...use your own dataset?') disp('2) ...apply the model on a toy example?') disp(' ') dataset=input('1 or 2?: '); if (isempty(dataset)|(~ismember(dataset,[1 2]))), error('Invalid value'); end if dataset==1, if type==1, clc disp('-------------------------------------------------------'); disp(' Specify the Input - Regression problem '); disp('-------------------------------------------------------'); disp(' ') disp('Be sure that each row of your data matrix is an example'); disp('with the covariate values that precede the respond ones') disp(' ') disp('-------------------------------------------------------'); disp(' ') cov_dim=input('Insert the covariate space dimension: '); if (isempty(cov_dim)|(floor(cov_dim)~=cov_dim)|(cov_dim<=0)), error(['Invalid value: ', num2str(cov_dim), ' is not a positive integer!']); end disp(' ') res_dim=input('Insert the dimension of the respond variable: '); if (isempty(res_dim)|(floor(res_dim)~=res_dim)|(res_dim<=0)), error(['Invalid value: ', num2str(res_dim), ' is not a positive integer!']); end disp(' '); elseif type==2 clc disp('-------------------------------------------------------'); disp(' Specify the Input - Classification problem '); disp('-------------------------------------------------------'); disp(' ') disp('Be sure that each row of your data matrix is an example'); disp('with the covariate values that precede the class labels'); disp('(integer value >=1). '); disp(' ') disp('-------------------------------------------------------'); disp(' ') cov_dim=input('Insert the covariate space dimension: '); if (isempty(cov_dim)|(floor(cov_dim)~=cov_dim)|(cov_dim<=0)), error(['Invalid value: ', num2str(cov_dim), ' is not a positive integer!']); end disp(' ') res_dim=input('Insert the number of classes: '); if (isempty(res_dim)|(floor(res_dim)~=res_dim)|(res_dim<=0)), error(['Invalid value: ', num2str(res_dim), ' is not a positive integer!']); end disp(' ') end % ------------------------------------------------------------------------------------------------ % Loading training data -------------------------------------------------------------------------- % ------------------------------------------------------------------------------------------------ train_path=input('Insert the complete (with extension) path of the training data file:\n >> ','s'); if isempty(train_path), error('You must specify a data set for training!'); end if ~isempty(findstr('.mat',train_path)), ap=load(train_path); app=fieldnames(ap); train_data=eval(['ap.', app{1,1}]); clear ap app; elseif ~isempty(findstr('.txt',train_path)), train_data=load(train_path, '-ascii'); else error('Invalid data format: not a .mat or a .txt file') end if (size(train_data,2)~=cov_dim+res_dim)&(type==1), error(['Invalid data matrix size: ', num2str(size(train_data,2)), ' columns rather than ',... num2str(cov_dim+res_dim),'!']); elseif (size(train_data,2)~=cov_dim+1)&(type==2), error(['Invalid data matrix size: ', num2str(size(train_data,2)), ' columns rather than ',... num2str(cov_dim+1),'!']); elseif (~isempty(find(ismember(intersect([train_data(:,end)' 1:res_dim],... train_data(:,end)'),[1:res_dim])==0)))&(type==2), error('Invalid class label'); end ntrain=size(train_data,1); train_d=train_data(:,1:cov_dim); if type==2, train_t=zeros(ntrain, res_dim); for m=1:res_dim, train_t((find(train_data(:,end)==m))',m)=1; end else train_t=train_data(:,cov_dim+1:end); end disp(' ') % ------------------------------------------------------------------------------------------------ % Loading test data ------------------------------------------------------------------------------ % ------------------------------------------------------------------------------------------------ disp('(If you don''t want to specify a test-set press ''return'' only)'); test_path=input('Insert the complete (with extension) path of the test data file:\n >> ','s'); if ~isempty(test_path), if ~isempty(findstr('.mat',test_path)), ap=load(test_path); app=fieldnames(ap); test_data=eval(['ap.', app{1,1}]); clear ap app; elseif ~isempty(findstr('.txt',test_path)), test_data=load(test_path, '-ascii'); else error('Invalid data format: not a .mat or a .txt file') end if (size(test_data,2)~=cov_dim)&(size(test_data,2)~=cov_dim+res_dim)&(type==1), error(['Invalid data matrix size: ', num2str(size(test_data,2)), ' columns rather than ',... num2str(cov_dim+res_dim), ' or ', num2str(cov_dim), '!']); elseif (size(test_data,2)~=cov_dim)&(size(test_data,2)~=cov_dim+1)&(type==2), error(['Invalid data matrix size: ', num2str(size(test_data,2)), ' columns rather than ',... num2str(cov_dim+1), ' or ', num2str(cov_dim), '!']); elseif (~isempty(find(ismember(intersect([test_data(:,end)' 1:res_dim],... test_data(:,end)'),[1:res_dim])==0)))&(type==2)&(size(test_data,2)==cov_dim+1), error('Invalid class label'); end ntest=size(test_data,1); test_d=test_data(:,1:cov_dim); if (type==2)&(size(test_data,2)>cov_dim), test_t=zeros(ntest, res_dim); for m=1:res_dim, test_t((find(test_data(:,end)==m))',m)=1; end elseif (type==1)&(size(test_data,2)>cov_dim), test_t=test_data(:,cov_dim+1:end); end disp(' '); end else clc disp('----------------------------------------------------'); disp(' Specify the Input '); disp('----------------------------------------------------'); disp(' ') ntrain = input('Insert the number of examples in training (<500): '); if (isempty(ntrain)|(floor(ntrain)~=ntrain)|(ntrain<=0)|(ntrain>500)), error(['Invalid value: ', num2str(ntrain), ' is not a positive integer <500!']); end disp(' ') test_path='toy'; ntest = input('Insert the number of examples in test (<500): '); if (isempty(ntest)|(floor(ntest)~=ntest)|(ntest<=0)|(ntest>500)), error(['Invalid value: ', num2str(ntest), ' is not a positive integer <500!']); end if type==2, cov_dim=2; res_dim=3; seed = 42; [train_d, ntrain1, ntrain2, train_t]=gen_data(ntrain, seed); for m=1:ntrain q=[]; q = find(train_t(m,:)==1); train_data(m,:)=[train_d(m,:) q]; end [test_d, ntest1, ntest2, test_t]=gen_data(ntest); for m=1:ntest q=[]; q = find(test_t(m,:)==1); test_data(m,:)=[test_d(m,:) q]; end else cov_dim=1; res_dim=1; global HOME %%%%%WARNING!%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% load([HOME '/examples/static/Misc/mixexp_data.txt'], '-ascii'); %%%%%WARNING!%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% train_data = mixexp_data(1:ntrain, :); train_d=train_data(:,1:cov_dim); train_t=train_data(:,cov_dim+1:end); test_data = mixexp_data(ntrain+1:ntrain+ntest, :); test_d=test_data(:,1:cov_dim); if size(test_data,2)>cov_dim, test_t=test_data(:,cov_dim+1:end); end end end % Set the nodes dimension----------------------------------- if num_glevel>0, nodes_info(2,2:num_glevel+1)=branch_fact; end nodes_info(2,1)=cov_dim; nodes_info(2,end)=res_dim; %----------------------------------------------------------- % Prepare the training data for the learning engine--------- %----------------------------------------------------------- cases = cell(size(nodes_info,2), ntrain); for m=1:ntrain, cases{1,m}=train_data(m,1:cov_dim)'; cases{end,m}=train_data(m,cov_dim+1:end)'; end %----------------------------------------------------------------------------------------------------- [bnet onodes]=hme_topobuilder(nodes_info); engine = jtree_inf_engine(bnet, onodes); clc disp('---------------------------------------------------------------------'); disp(' L E A R N I N G '); disp('---------------------------------------------------------------------'); disp(' ') ll = 0; for l=1:ntrain scritta=['example number: ', int2str(l),'---------------------------------------------']; disp(scritta); ev = cases(:,l); [engine, loglik] = enter_evidence(engine, ev); ll = ll + loglik; end disp(' ') disp(['Log-likelihood before learning: ', num2str(ll)]); disp(' ') disp('(Press any key to continue)'); pause %----------------------------------------------------------- clc disp('---------------------------------------------------------------------'); disp(' L E A R N I N G '); disp('---------------------------------------------------------------------'); disp(' ') max_em_iter=input('Insert the maximum number of the EM algorithm iterations: '); if (isempty(max_em_iter)|(floor(max_em_iter)~=max_em_iter)|(max_em_iter<=1)), error(['Invalid value: ', num2str(ntest), ' is not a positive integer >1!']); end disp(' ') disp(['Log-likelihood before learning: ', num2str(ll)]); disp(' ') [bnet2, LL2] = learn_params_em(engine, cases, max_em_iter); disp(' ') fprintf('HME: loglik before learning %f, after %d iters %f\n', ll, length(LL2), LL2(end)); disp(' ') disp('(Press any key to continue)'); pause %----------------------------------------------------------------------------------- % Classification problem: plot data & decision boundaries if the input data size = 2 % Regression problem: plot data & prediction if the input data size = 1 %----------------------------------------------------------------------------------- if (type==2)&(nodes_info(2,1)==2)&(~isempty(test_path)), fh1=hme_class_plot(bnet2, nodes_info, train_data, test_data); disp(' '); disp('(See the figure)'); elseif (type==2)&(nodes_info(2,1)==2)&(isempty(test_path)), fh1=hme_class_plot(bnet2, nodes_info, train_data); disp(' '); disp('(See the figure)'); elseif (type==1)&(nodes_info(2,1)==1)&(~isempty(test_path)), fh1=hme_reg_plot(bnet2, nodes_info, train_data, test_data); disp(' '); disp('(See the figure)'); elseif (type==1)&(nodes_info(2,1)==1)&(isempty(test_path)), fh1=hme_reg_plot(bnet2, nodes_info, train_data); disp(' ') disp('(See the figure)'); end %----------------------------------------------------------------------------------- % Classification problem: plot confusion matrix %----------------------------------------------------------------------------------- if (type==2) ztrain=fhme(bnet2, nodes_info, train_d, size(train_d,1)); [Htrain, trainRate]=confmat(ztrain, train_t); % CM on the training set fh2=figure('Name','Confusion matrix', 'MenuBar', 'none', 'NumberTitle', 'off'); if (~isempty(test_path))&(size(test_data,2)>cov_dim), ztest=fhme(bnet2, nodes_info, test_d, size(test_d,1)); [Htest, testRate]=confmat(ztest, test_t); % CM on the test set subplot(1,2,1); end plotmat(Htrain,'b','k',12) tick=[0.5:1:(0.5+nodes_info(2,end)-1)]; set(gca,'XTick',tick) set(gca,'YTick',tick) grid('off') ylabel('True') xlabel('Prediction') title(['Confusion Matrix: training set (' num2str(trainRate(1)) '%)']) if (~isempty(test_path))&(size(test_data,2)>cov_dim), subplot(1,2,2) plotmat(Htest,'b','k',12) set(gca,'XTick',tick) set(gca,'YTick',tick) grid('off') ylabel('True') xlabel('Prediction') title(['Confusion Matrix: test set (' num2str(testRate(1)) '%)']) end disp(' ') disp('(Press any key to continue)'); pause end %----------------------------------------------------------------------------------- % Regression & Classification problem: calculate the predictions & plot the LL trace %----------------------------------------------------------------------------------- train_result=fhme(bnet2,nodes_info,train_d,size(train_d,1)); if ~isempty(test_path), test_result=fhme(bnet2,nodes_info,test_d,size(test_d,1)); end fh3=figure('Name','Log-likelihood trace', 'MenuBar', 'none', 'NumberTitle', 'off') plot(LL2,'-ro',... 'MarkerEdgeColor','k',... 'MarkerFaceColor',[1 1 0],... 'MarkerSize',4) title('Log-likelihood trace') %----------------------------------------------------------------------------------- % Regression & Classification problem: save the predictions %----------------------------------------------------------------------------------- clc disp('------------------------------------------------------------------'); disp(' Save the results '); disp('------------------------------------------------------------------'); disp(' ') %----------------------------------------------------------------------------------- save_quest_m=input('Do you want to save the HME model (Y/N)? [Y default]: ', 's'); if isempty(save_quest_m), save_quest_m='Y'; end if ~findstr(save_quest_m, ['Y', 'N']), error('Invalid input'); end if save_quest_m=='Y', disp(' '); m_save=input('Insert the complete path for save the HME model (.mat):\n >> ', 's'); if isempty(m_save), error('You must specify a path!'); end save(m_save, 'bnet2'); end %----------------------------------------------------------------------------------- disp(' ') save_quest=input('Do you want to save the HME predictions (Y/N)? [Y default]: ', 's'); disp(' ') if isempty(save_quest), save_quest='Y'; end if ~findstr(save_quest, ['Y', 'N']), error('Invalid input'); end if save_quest=='Y', tr_save=input('Insert the complete path for save the training data prediction (.mat):\n >> ', 's'); if isempty(tr_save), error('You must specify a path!'); end save(tr_save, 'train_result'); if ~isempty(test_path), disp(' ') te_save=input('Insert the complete path for save the test data prediction (.mat):\n >> ', 's'); if isempty(te_save), error('You must specify a path!'); end save(te_save, 'test_result'); end end clc disp('----------------------------------------------------'); disp(' B Y E ! '); disp('----------------------------------------------------'); pause(2) %clear clc