Mercurial > hg > camir-aes2014
diff toolboxes/FullBNT-1.0.7/bnt/examples/static/HME/hmemenu.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/FullBNT-1.0.7/bnt/examples/static/HME/hmemenu.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,552 @@ +% dataset -> (1=>user data) or (2=>toy example) +% type -> (1=> Regression model) or (2=>Classification model) +% num_glevel -> number of hidden nodes in the net (gating levels) +% num_exp -> number of experts in the net +% branch_fact -> dimension of the hidden nodes in the net +% cov_dim -> root node dimension +% res_dim -> output node dimension +% nodes_info -> 4 x num_glevel+2 matrix that contain all the info about the nodes: +% nodes_info(1,:) = nodes type: (0=>gaussian)or(1=>softmax)or(2=>mlp) +% nodes_info(2,:) = nodes size: [cov_dim num_glevel x branch_fact res_dim] +% nodes_info(3,:) = hidden units number (for mlp nodes) +% |- optimizer iteration number (for softmax & mlp CPD) +% nodes_info(4,:) =|- covariance type (for gaussian CPD)-> +% | (1=>Full)or(2=>Diagonal)or(3=>Full&Tied)or(4=>Diagonal&Tied) +% fh1 -> Figure: data & decizion boundaries; fh2 -> confusion matrix; fh3 -> LL trace +% test_data -> test data matrix +% train_data -> training data matrix +% ntrain -> size(train_data,2) +% ntest -> size(test_data,2) +% cases -> (cell array) training data formatted for the learning engine +% bnet -> bayesian net before learning +% bnet2 -> bayesian net after learning +% ll -> log-likelihood before learning +% LL2 -> log-likelihood trace +% onodes -> obs nodes in bnet & bnet2 +% max_em_iter -> maximum number of interations of the EM algorithm +% train_result -> prediction on the training set (as test_result) +% +% IMPORTANT: CHECK the loading path (lines 64 & 364) +% ---------------------------------------------------------------------------------------------------- +% -> pierpaolo_b@hotmail.com or -> pampo@interfree.it +% ---------------------------------------------------------------------------------------------------- + +error('this no longer works with the latest version of BNT') + +clear all; +clc; +disp('---------------------------------------------------'); +disp(' Hierarchical Mixtures of Experts models builder '); +disp('---------------------------------------------------'); +disp(' ') +disp(' Using this script you can build both an HME model') +disp('as in [Wat94] and [Jor94] i.e. with ''softmax'' gating') +disp('nodes and ''gaussian'' ( for regression ) or ''softmax''') +disp('( for classification ) expert node, and its variants') +disp('called ''gated nets'' where we use ''mlp'' models in') +disp('place of a number of ''softmax'' ones [Mor98], [Wei95].') +disp(' You can decide to train and test the model on your') +disp('datasets or to evaluate its performance on a toy') +disp('example.') +disp(' ') +disp('Reference') +disp('[Mor98] P. Moerland (1998):') +disp(' Localized mixtures of experts. (http://www.idiap.ch/~perry/)') +disp('[Jor94] M.I. Jordan, R.A. Jacobs (1994):') +disp(' HME and the EM algorithm. (http://www.cs.berkeley.edu/~jordan/)') +disp('[Wat94] S.R. Waterhouse, A.J. Robinson (1994):') +disp(' Classification using HME. (http://www.oigeeza.com/steve/)') +disp('[Wei95] A.S. Weigend, M. Mangeas (1995):') +disp(' Nonlinear gated experts for time series.') +disp(' ') + +if 0 +disp('(See the figure)') +pause(5); +%%%%%WARNING!%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +im_path=which('HMEforMatlab.jpg'); +fig=imread(im_path, 'jpg'); +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +figure('Units','pixels','MenuBar','none','NumberTitle','off', 'Name', 'HME model'); +image(fig); +axis image; +axis off; +clear fig; +set(gca,'Position',[0 0 1 1]) +disp('(Press any key to continue)') +pause +end + +clc +disp('---------------------------------------------------'); +disp(' Specify the Architecture '); +disp('---------------------------------------------------'); +disp(' '); +disp('What kind of model do you need?') +disp(' ') +disp('1) Regression ') +disp('2) Classification') +disp(' ') +type=input('1 or 2?: '); +if (isempty(type)|(~ismember(type,[1 2]))), error('Invalid value'); end +clc +disp('----------------------------------------------------'); +disp(' Specify the Architecture '); +disp('----------------------------------------------------'); +disp(' ') +disp('Now you have to set the number of experts and gating') +disp('levels in the net. This script builds only balanced') +disp('hierarchy with the same branching factor (>1)at each') +disp('(gating) level. So remember that: ') +disp(' ') +disp(' num_exp = branch_fact^num_glevel ') +disp(' ') +disp('with branch_fact >=2.') +disp('You can also set to zeros the number of gating level') +disp('in order to obtain a classical GLM model. ') +disp(' ') +disp('----------------------------------------------------'); +disp(' ') +num_glevel=input('Insert the number of gating levels {0,...,20}: '); +if (isempty(num_glevel)|(~ismember(num_glevel,[0:20]))), error('Invalid value'); end +nodes_info=zeros(4,num_glevel+2); +if num_glevel>0, %------------------------------------------------------------------------------------ + for i=2:num_glevel+1, + clc + disp('----------------------------------------------------'); + disp(' Specify the Architecture '); + disp('----------------------------------------------------'); + disp(' ') + disp(['-> Gating network ', num2str(i-1), ' is a: ']) + disp(' ') + disp(' 1) Softmax model'); + disp(' 2) Two layer perceptron model') + disp(' ') + nodes_info(1,i)=input('1 or 2?: '); + if (isempty(nodes_info(1,i))|(~ismember(nodes_info(1,i),[1 2]))), error('Invalid value'); end + disp(' ') + if nodes_info(1,i)==2, + nodes_info(3,i)=input('Insert the number of units in the hidden layer: '); + if (isempty(nodes_info(3,i))|(floor(nodes_info(3,i))~=nodes_info(3,i))|(nodes_info(3,i)<=0)), + error(['Invalid value: ', num2str(nodes_info(3,i)), ' is not a positive integer!']); + end + disp(' ') + end + nodes_info(4,i)=input('Insert the optimizer iteration number: '); + if (isempty(nodes_info(4,i))|(floor(nodes_info(4,i))~=nodes_info(4,i))|(nodes_info(4,i)<=0)), + error(['Invalid value: ', num2str(nodes_info(4,i)), ' is not a positive integer!']); + end + end + clc + disp('---------------------------------------------------------'); + disp(' Specify the Architecture '); + disp('---------------------------------------------------------'); + disp(' ') + disp('Now you have to set the number of experts in the network'); + disp('The value will be adjusted in order to obtain a hierarchy'); + disp('as said above.') + disp(' '); + num_exp=input(['Insert the approximative number of experts (>=', num2str(2^num_glevel), '): ']); + if (isempty(num_exp)|(num_exp<=0)|(num_exp<2^num_glevel)), + error('Invalid value'); + end + app1=0; base=2; + while app1<num_exp, + app1=base^num_glevel; + base=base+1; + end + app2=(base-2)^num_glevel; + branch_fact=base-1; + if app2>=(2^num_glevel)&(abs(app2-num_exp)<abs(app1-num_exp)), + branch_fact=base-2; + end + clear app1 app2 base; + disp(' ') + disp(['The effective number of experts in the net is: ', num2str(branch_fact^num_glevel), '.']) + disp(' '); +else + clc + disp('---------------------------------------------------------'); + disp(' Specify the Architecture (GLM model) '); + disp('---------------------------------------------------------'); + disp(' ') +end % END of: if num_glevel>0------------------------------------------------------------------------- + +if type==2, + disp(['-> Expert node is a: ']) + disp(' ') + disp(' 1) Softmax model'); + disp(' 2) Two layer perceptron model') + disp(' ') + nodes_info(1,end)=input('1 or 2?: '); + if (isempty(nodes_info(1,end))|(~ismember(nodes_info(1,end),[1 2]))), + error('Invalid value'); + end + disp(' ') + if nodes_info(1,end)==2, + nodes_info(3,end)=input('Insert the number of units in the hidden layer: '); + if (isempty(nodes_info(3,end))|(floor(nodes_info(3,end))~=nodes_info(3,end))|(nodes_info(3,end)<=0)), + error(['Invalid value: ', num2str(nodes_info(3,end)), ' is not a positive integer!']); + end + disp(' ') + end + nodes_info(4,end)=input('Insert the optimizer iteration number: '); + if (isempty(nodes_info(4,end))|(floor(nodes_info(4,end))~=nodes_info(4,end))|(nodes_info(4,end)<=0)), + error(['Invalid value: ', num2str(nodes_info(4,end)), ' is not a positive integer!']); + end +elseif type==1, + disp('What kind of covariance matrix structure do you want?') + disp(' ') + disp(' 1) Full'); + disp(' 2) Diagonal') + disp(' 3) Full & Tied'); + disp(' 4) Diagonal & Tied') + + disp(' ') + nodes_info(4,end)=input('1, 2, 3 or 4?: '); + if (isempty(nodes_info(4,end))|(~ismember(nodes_info(4,end),[1 2 3 4]))), + error('Invalid value'); + end +end +clc +disp('----------------------------------------------------'); +disp(' Specify the Input '); +disp('----------------------------------------------------'); +disp(' ') +disp('Do you want to...') +disp(' ') +disp('1) ...use your own dataset?') +disp('2) ...apply the model on a toy example?') +disp(' ') +dataset=input('1 or 2?: '); +if (isempty(dataset)|(~ismember(dataset,[1 2]))), error('Invalid value'); end +if dataset==1, + if type==1, + clc + disp('-------------------------------------------------------'); + disp(' Specify the Input - Regression problem '); + disp('-------------------------------------------------------'); + disp(' ') + disp('Be sure that each row of your data matrix is an example'); + disp('with the covariate values that precede the respond ones') + disp(' ') + disp('-------------------------------------------------------'); + disp(' ') + cov_dim=input('Insert the covariate space dimension: '); + if (isempty(cov_dim)|(floor(cov_dim)~=cov_dim)|(cov_dim<=0)), + error(['Invalid value: ', num2str(cov_dim), ' is not a positive integer!']); + end + disp(' ') + res_dim=input('Insert the dimension of the respond variable: '); + if (isempty(res_dim)|(floor(res_dim)~=res_dim)|(res_dim<=0)), + error(['Invalid value: ', num2str(res_dim), ' is not a positive integer!']); + end + disp(' '); + elseif type==2 + clc + disp('-------------------------------------------------------'); + disp(' Specify the Input - Classification problem '); + disp('-------------------------------------------------------'); + disp(' ') + disp('Be sure that each row of your data matrix is an example'); + disp('with the covariate values that precede the class labels'); + disp('(integer value >=1). '); + disp(' ') + disp('-------------------------------------------------------'); + disp(' ') + cov_dim=input('Insert the covariate space dimension: '); + if (isempty(cov_dim)|(floor(cov_dim)~=cov_dim)|(cov_dim<=0)), + error(['Invalid value: ', num2str(cov_dim), ' is not a positive integer!']); + end + disp(' ') + res_dim=input('Insert the number of classes: '); + if (isempty(res_dim)|(floor(res_dim)~=res_dim)|(res_dim<=0)), + error(['Invalid value: ', num2str(res_dim), ' is not a positive integer!']); + end + disp(' ') + end + % ------------------------------------------------------------------------------------------------ + % Loading training data -------------------------------------------------------------------------- + % ------------------------------------------------------------------------------------------------ + train_path=input('Insert the complete (with extension) path of the training data file:\n >> ','s'); + if isempty(train_path), error('You must specify a data set for training!'); end + if ~isempty(findstr('.mat',train_path)), + ap=load(train_path); app=fieldnames(ap); train_data=eval(['ap.', app{1,1}]); + clear ap app; + elseif ~isempty(findstr('.txt',train_path)), + train_data=load(train_path, '-ascii'); + else + error('Invalid data format: not a .mat or a .txt file') + end + if (size(train_data,2)~=cov_dim+res_dim)&(type==1), + error(['Invalid data matrix size: ', num2str(size(train_data,2)), ' columns rather than ',... + num2str(cov_dim+res_dim),'!']); + elseif (size(train_data,2)~=cov_dim+1)&(type==2), + error(['Invalid data matrix size: ', num2str(size(train_data,2)), ' columns rather than ',... + num2str(cov_dim+1),'!']); + elseif (~isempty(find(ismember(intersect([train_data(:,end)' 1:res_dim],... + train_data(:,end)'),[1:res_dim])==0)))&(type==2), + error('Invalid class label'); + end + ntrain=size(train_data,1); + train_d=train_data(:,1:cov_dim); + if type==2, + train_t=zeros(ntrain, res_dim); + for m=1:res_dim, + train_t((find(train_data(:,end)==m))',m)=1; + end + else + train_t=train_data(:,cov_dim+1:end); + end + disp(' ') + % ------------------------------------------------------------------------------------------------ + % Loading test data ------------------------------------------------------------------------------ + % ------------------------------------------------------------------------------------------------ + disp('(If you don''t want to specify a test-set press ''return'' only)'); + test_path=input('Insert the complete (with extension) path of the test data file:\n >> ','s'); + if ~isempty(test_path), + if ~isempty(findstr('.mat',test_path)), + ap=load(test_path); app=fieldnames(ap); test_data=eval(['ap.', app{1,1}]); + clear ap app; + elseif ~isempty(findstr('.txt',test_path)), + test_data=load(test_path, '-ascii'); + else + error('Invalid data format: not a .mat or a .txt file') + end + if (size(test_data,2)~=cov_dim)&(size(test_data,2)~=cov_dim+res_dim)&(type==1), + error(['Invalid data matrix size: ', num2str(size(test_data,2)), ' columns rather than ',... + num2str(cov_dim+res_dim), ' or ', num2str(cov_dim), '!']); + elseif (size(test_data,2)~=cov_dim)&(size(test_data,2)~=cov_dim+1)&(type==2), + error(['Invalid data matrix size: ', num2str(size(test_data,2)), ' columns rather than ',... + num2str(cov_dim+1), ' or ', num2str(cov_dim), '!']); + elseif (~isempty(find(ismember(intersect([test_data(:,end)' 1:res_dim],... + test_data(:,end)'),[1:res_dim])==0)))&(type==2)&(size(test_data,2)==cov_dim+1), + error('Invalid class label'); + end + ntest=size(test_data,1); + test_d=test_data(:,1:cov_dim); + if (type==2)&(size(test_data,2)>cov_dim), + test_t=zeros(ntest, res_dim); + for m=1:res_dim, + test_t((find(test_data(:,end)==m))',m)=1; + end + elseif (type==1)&(size(test_data,2)>cov_dim), + test_t=test_data(:,cov_dim+1:end); + end + disp(' '); + end +else + clc + disp('----------------------------------------------------'); + disp(' Specify the Input '); + disp('----------------------------------------------------'); + disp(' ') + ntrain = input('Insert the number of examples in training (<500): '); + if (isempty(ntrain)|(floor(ntrain)~=ntrain)|(ntrain<=0)|(ntrain>500)), + error(['Invalid value: ', num2str(ntrain), ' is not a positive integer <500!']); + end + disp(' ') + test_path='toy'; + ntest = input('Insert the number of examples in test (<500): '); + if (isempty(ntest)|(floor(ntest)~=ntest)|(ntest<=0)|(ntest>500)), + error(['Invalid value: ', num2str(ntest), ' is not a positive integer <500!']); + end + + if type==2, + cov_dim=2; + res_dim=3; + seed = 42; + [train_d, ntrain1, ntrain2, train_t]=gen_data(ntrain, seed); + for m=1:ntrain + q=[]; q = find(train_t(m,:)==1); + train_data(m,:)=[train_d(m,:) q]; + end + [test_d, ntest1, ntest2, test_t]=gen_data(ntest); + for m=1:ntest + q=[]; q = find(test_t(m,:)==1); + test_data(m,:)=[test_d(m,:) q]; + end + else + cov_dim=1; + res_dim=1; + global HOME + %%%%%WARNING!%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + load([HOME '/examples/static/Misc/mixexp_data.txt'], '-ascii'); + %%%%%WARNING!%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + train_data = mixexp_data(1:ntrain, :); + train_d=train_data(:,1:cov_dim); train_t=train_data(:,cov_dim+1:end); + test_data = mixexp_data(ntrain+1:ntrain+ntest, :); + test_d=test_data(:,1:cov_dim); + if size(test_data,2)>cov_dim, + test_t=test_data(:,cov_dim+1:end); + end + end +end +% Set the nodes dimension----------------------------------- +if num_glevel>0, + nodes_info(2,2:num_glevel+1)=branch_fact; +end +nodes_info(2,1)=cov_dim; nodes_info(2,end)=res_dim; +%----------------------------------------------------------- +% Prepare the training data for the learning engine--------- +%----------------------------------------------------------- +cases = cell(size(nodes_info,2), ntrain); +for m=1:ntrain, + cases{1,m}=train_data(m,1:cov_dim)'; + cases{end,m}=train_data(m,cov_dim+1:end)'; +end +%----------------------------------------------------------------------------------------------------- +[bnet onodes]=hme_topobuilder(nodes_info); +engine = jtree_inf_engine(bnet, onodes); +clc +disp('---------------------------------------------------------------------'); +disp(' L E A R N I N G '); +disp('---------------------------------------------------------------------'); +disp(' ') +ll = 0; +for l=1:ntrain + scritta=['example number: ', int2str(l),'---------------------------------------------']; + disp(scritta); + ev = cases(:,l); + [engine, loglik] = enter_evidence(engine, ev); + ll = ll + loglik; +end +disp(' ') +disp(['Log-likelihood before learning: ', num2str(ll)]); +disp(' ') +disp('(Press any key to continue)'); +pause +%----------------------------------------------------------- +clc +disp('---------------------------------------------------------------------'); +disp(' L E A R N I N G '); +disp('---------------------------------------------------------------------'); +disp(' ') +max_em_iter=input('Insert the maximum number of the EM algorithm iterations: '); +if (isempty(max_em_iter)|(floor(max_em_iter)~=max_em_iter)|(max_em_iter<=1)), + error(['Invalid value: ', num2str(ntest), ' is not a positive integer >1!']); +end +disp(' ') +disp(['Log-likelihood before learning: ', num2str(ll)]); +disp(' ') + +[bnet2, LL2] = learn_params_em(engine, cases, max_em_iter); +disp(' ') +fprintf('HME: loglik before learning %f, after %d iters %f\n', ll, length(LL2), LL2(end)); +disp(' ') +disp('(Press any key to continue)'); +pause +%----------------------------------------------------------------------------------- +% Classification problem: plot data & decision boundaries if the input data size = 2 +% Regression problem: plot data & prediction if the input data size = 1 +%----------------------------------------------------------------------------------- +if (type==2)&(nodes_info(2,1)==2)&(~isempty(test_path)), + fh1=hme_class_plot(bnet2, nodes_info, train_data, test_data); + disp(' '); + disp('(See the figure)'); +elseif (type==2)&(nodes_info(2,1)==2)&(isempty(test_path)), + fh1=hme_class_plot(bnet2, nodes_info, train_data); + disp(' '); + disp('(See the figure)'); +elseif (type==1)&(nodes_info(2,1)==1)&(~isempty(test_path)), + fh1=hme_reg_plot(bnet2, nodes_info, train_data, test_data); + disp(' '); + disp('(See the figure)'); +elseif (type==1)&(nodes_info(2,1)==1)&(isempty(test_path)), + fh1=hme_reg_plot(bnet2, nodes_info, train_data); + disp(' ') + disp('(See the figure)'); +end +%----------------------------------------------------------------------------------- +% Classification problem: plot confusion matrix +%----------------------------------------------------------------------------------- +if (type==2) + ztrain=fhme(bnet2, nodes_info, train_d, size(train_d,1)); + [Htrain, trainRate]=confmat(ztrain, train_t); % CM on the training set + fh2=figure('Name','Confusion matrix', 'MenuBar', 'none', 'NumberTitle', 'off'); + if (~isempty(test_path))&(size(test_data,2)>cov_dim), + ztest=fhme(bnet2, nodes_info, test_d, size(test_d,1)); + [Htest, testRate]=confmat(ztest, test_t); % CM on the test set + subplot(1,2,1); + end + plotmat(Htrain,'b','k',12) + tick=[0.5:1:(0.5+nodes_info(2,end)-1)]; + set(gca,'XTick',tick) + set(gca,'YTick',tick) + grid('off') + ylabel('True') + xlabel('Prediction') + title(['Confusion Matrix: training set (' num2str(trainRate(1)) '%)']) + if (~isempty(test_path))&(size(test_data,2)>cov_dim), + subplot(1,2,2) + plotmat(Htest,'b','k',12) + set(gca,'XTick',tick) + set(gca,'YTick',tick) + grid('off') + ylabel('True') + xlabel('Prediction') + title(['Confusion Matrix: test set (' num2str(testRate(1)) '%)']) + end + disp(' ') + disp('(Press any key to continue)'); + pause +end +%----------------------------------------------------------------------------------- +% Regression & Classification problem: calculate the predictions & plot the LL trace +%----------------------------------------------------------------------------------- +train_result=fhme(bnet2,nodes_info,train_d,size(train_d,1)); +if ~isempty(test_path), + test_result=fhme(bnet2,nodes_info,test_d,size(test_d,1)); +end +fh3=figure('Name','Log-likelihood trace', 'MenuBar', 'none', 'NumberTitle', 'off') +plot(LL2,'-ro',... + 'MarkerEdgeColor','k',... + 'MarkerFaceColor',[1 1 0],... + 'MarkerSize',4) +title('Log-likelihood trace') +%----------------------------------------------------------------------------------- +% Regression & Classification problem: save the predictions +%----------------------------------------------------------------------------------- +clc +disp('------------------------------------------------------------------'); +disp(' Save the results '); +disp('------------------------------------------------------------------'); +disp(' ') +%----------------------------------------------------------------------------------- +save_quest_m=input('Do you want to save the HME model (Y/N)? [Y default]: ', 's'); +if isempty(save_quest_m), + save_quest_m='Y'; +end +if ~findstr(save_quest_m, ['Y', 'N']), error('Invalid input'); end +if save_quest_m=='Y', + disp(' '); + m_save=input('Insert the complete path for save the HME model (.mat):\n >> ', 's'); + if isempty(m_save), error('You must specify a path!'); end + save(m_save, 'bnet2'); +end +%----------------------------------------------------------------------------------- +disp(' ') +save_quest=input('Do you want to save the HME predictions (Y/N)? [Y default]: ', 's'); +disp(' ') +if isempty(save_quest), + save_quest='Y'; +end +if ~findstr(save_quest, ['Y', 'N']), error('Invalid input'); end +if save_quest=='Y', + tr_save=input('Insert the complete path for save the training data prediction (.mat):\n >> ', 's'); + if isempty(tr_save), error('You must specify a path!'); end + save(tr_save, 'train_result'); + if ~isempty(test_path), + disp(' ') + te_save=input('Insert the complete path for save the test data prediction (.mat):\n >> ', 's'); + if isempty(te_save), error('You must specify a path!'); end + save(te_save, 'test_result'); + end +end +clc +disp('----------------------------------------------------'); +disp(' B Y E ! '); +disp('----------------------------------------------------'); +pause(2) +%clear +clc