wolffd@0: function [score,outputs] = evaluate(CPD, fam, data, ns, cnodes) wolffd@0: % Evaluate evaluate the performance of the classification/regression tree on given complete data wolffd@0: % score = evaluate(CPD, fam, data, ns, cnodes) wolffd@0: % wolffd@0: % fam(i) is the node id of the i-th node in the family of nodes, self node is the last one wolffd@0: % data(i,m) is the value of node i in case m (can be cell array). wolffd@0: % ns(i) is the node size for the i-th node in the whold bnet wolffd@0: % cnodes(i) is the node id for the i-th continuous node in the whole bnet wolffd@0: % wolffd@0: % Output wolffd@0: % score is the classification accuracy (for classification) wolffd@0: % or mean square deviation (for regression) wolffd@0: % here for every case we use the mean value at the tree leaf node as its predicted value wolffd@0: % outputs(i) is the predicted output value for case i wolffd@0: % wolffd@0: % Author: yimin.zhang@intel.com wolffd@0: % Last updated: Jan. 19, 2002 wolffd@0: wolffd@0: wolffd@0: if iscell(data) wolffd@0: local_data = cell2num(data(fam,:)); wolffd@0: else wolffd@0: local_data = data(fam, :); wolffd@0: end wolffd@0: wolffd@0: %get local node sizes and node types wolffd@0: node_sizes = ns(fam); wolffd@0: node_types = zeros(1,size(ns,2)); %all nodes are disrete wolffd@0: node_types(cnodes)=1; wolffd@0: node_types=node_types(fam); wolffd@0: wolffd@0: fam_size=size(fam,2); wolffd@0: output_type = node_types(fam_size); wolffd@0: wolffd@0: num_cases=size(local_data,2); wolffd@0: total_error=0; wolffd@0: wolffd@0: outputs=zeros(1,num_cases); wolffd@0: for i=1:num_cases wolffd@0: %class one case using the tree wolffd@0: cur_node=CPD.tree.root; % at the root node of the tree wolffd@0: while (1) wolffd@0: if (CPD.tree.nodes(cur_node).is_leaf==1) wolffd@0: if (output_type==0) %output is discrete wolffd@0: %use the class with max probability as the output wolffd@0: [maxvalue,class_id]=max(CPD.tree.nodes(cur_node).probs); wolffd@0: outputs(i)=class_id; wolffd@0: if (class_id~=local_data(fam_size,i)) wolffd@0: total_error=total_error+1; wolffd@0: end wolffd@0: else %output is continuous wolffd@0: %use the mean as the value wolffd@0: outputs(i)=CPD.tree.nodes(cur_node).mean; wolffd@0: cur_deviation = CPD.tree.nodes(cur_node).mean-local_data(fam_size,i); wolffd@0: total_error=total_error+cur_deviation*cur_deviation; wolffd@0: end wolffd@0: break; wolffd@0: end wolffd@0: cur_attr = CPD.tree.nodes(cur_node).split_id; wolffd@0: attr_val = local_data(cur_attr,i); wolffd@0: if (node_types(cur_attr)==0) %discrete attribute wolffd@0: % goto the attr_val -th child wolffd@0: cur_node = CPD.tree.nodes(cur_node).children(attr_val); wolffd@0: else wolffd@0: if (attr_val <= CPD.tree.nodes(cur_node).split_threshhold) wolffd@0: cur_node = CPD.tree.nodes(cur_node).children(1); wolffd@0: else wolffd@0: cur_node = CPD.tree.nodes(cur_node).children(2); wolffd@0: end wolffd@0: end wolffd@0: if (cur_node > CPD.tree.num_node) wolffd@0: fprintf('Fatal error: Tree structure corrupted.\n'); wolffd@0: return; wolffd@0: end wolffd@0: end wolffd@0: %update the classification error number wolffd@0: end wolffd@0: if (output_type==0) wolffd@0: score=1-total_error/num_cases; wolffd@0: else wolffd@0: score=total_error/num_cases; wolffd@0: end