wolffd@0
|
1 function [score,outputs] = evaluate(CPD, fam, data, ns, cnodes)
|
wolffd@0
|
2 % Evaluate evaluate the performance of the classification/regression tree on given complete data
|
wolffd@0
|
3 % score = evaluate(CPD, fam, data, ns, cnodes)
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % fam(i) is the node id of the i-th node in the family of nodes, self node is the last one
|
wolffd@0
|
6 % data(i,m) is the value of node i in case m (can be cell array).
|
wolffd@0
|
7 % ns(i) is the node size for the i-th node in the whold bnet
|
wolffd@0
|
8 % cnodes(i) is the node id for the i-th continuous node in the whole bnet
|
wolffd@0
|
9 %
|
wolffd@0
|
10 % Output
|
wolffd@0
|
11 % score is the classification accuracy (for classification)
|
wolffd@0
|
12 % or mean square deviation (for regression)
|
wolffd@0
|
13 % here for every case we use the mean value at the tree leaf node as its predicted value
|
wolffd@0
|
14 % outputs(i) is the predicted output value for case i
|
wolffd@0
|
15 %
|
wolffd@0
|
16 % Author: yimin.zhang@intel.com
|
wolffd@0
|
17 % Last updated: Jan. 19, 2002
|
wolffd@0
|
18
|
wolffd@0
|
19
|
wolffd@0
|
20 if iscell(data)
|
wolffd@0
|
21 local_data = cell2num(data(fam,:));
|
wolffd@0
|
22 else
|
wolffd@0
|
23 local_data = data(fam, :);
|
wolffd@0
|
24 end
|
wolffd@0
|
25
|
wolffd@0
|
26 %get local node sizes and node types
|
wolffd@0
|
27 node_sizes = ns(fam);
|
wolffd@0
|
28 node_types = zeros(1,size(ns,2)); %all nodes are disrete
|
wolffd@0
|
29 node_types(cnodes)=1;
|
wolffd@0
|
30 node_types=node_types(fam);
|
wolffd@0
|
31
|
wolffd@0
|
32 fam_size=size(fam,2);
|
wolffd@0
|
33 output_type = node_types(fam_size);
|
wolffd@0
|
34
|
wolffd@0
|
35 num_cases=size(local_data,2);
|
wolffd@0
|
36 total_error=0;
|
wolffd@0
|
37
|
wolffd@0
|
38 outputs=zeros(1,num_cases);
|
wolffd@0
|
39 for i=1:num_cases
|
wolffd@0
|
40 %class one case using the tree
|
wolffd@0
|
41 cur_node=CPD.tree.root; % at the root node of the tree
|
wolffd@0
|
42 while (1)
|
wolffd@0
|
43 if (CPD.tree.nodes(cur_node).is_leaf==1)
|
wolffd@0
|
44 if (output_type==0) %output is discrete
|
wolffd@0
|
45 %use the class with max probability as the output
|
wolffd@0
|
46 [maxvalue,class_id]=max(CPD.tree.nodes(cur_node).probs);
|
wolffd@0
|
47 outputs(i)=class_id;
|
wolffd@0
|
48 if (class_id~=local_data(fam_size,i))
|
wolffd@0
|
49 total_error=total_error+1;
|
wolffd@0
|
50 end
|
wolffd@0
|
51 else %output is continuous
|
wolffd@0
|
52 %use the mean as the value
|
wolffd@0
|
53 outputs(i)=CPD.tree.nodes(cur_node).mean;
|
wolffd@0
|
54 cur_deviation = CPD.tree.nodes(cur_node).mean-local_data(fam_size,i);
|
wolffd@0
|
55 total_error=total_error+cur_deviation*cur_deviation;
|
wolffd@0
|
56 end
|
wolffd@0
|
57 break;
|
wolffd@0
|
58 end
|
wolffd@0
|
59 cur_attr = CPD.tree.nodes(cur_node).split_id;
|
wolffd@0
|
60 attr_val = local_data(cur_attr,i);
|
wolffd@0
|
61 if (node_types(cur_attr)==0) %discrete attribute
|
wolffd@0
|
62 % goto the attr_val -th child
|
wolffd@0
|
63 cur_node = CPD.tree.nodes(cur_node).children(attr_val);
|
wolffd@0
|
64 else
|
wolffd@0
|
65 if (attr_val <= CPD.tree.nodes(cur_node).split_threshhold)
|
wolffd@0
|
66 cur_node = CPD.tree.nodes(cur_node).children(1);
|
wolffd@0
|
67 else
|
wolffd@0
|
68 cur_node = CPD.tree.nodes(cur_node).children(2);
|
wolffd@0
|
69 end
|
wolffd@0
|
70 end
|
wolffd@0
|
71 if (cur_node > CPD.tree.num_node)
|
wolffd@0
|
72 fprintf('Fatal error: Tree structure corrupted.\n');
|
wolffd@0
|
73 return;
|
wolffd@0
|
74 end
|
wolffd@0
|
75 end
|
wolffd@0
|
76 %update the classification error number
|
wolffd@0
|
77 end
|
wolffd@0
|
78 if (output_type==0)
|
wolffd@0
|
79 score=1-total_error/num_cases;
|
wolffd@0
|
80 else
|
wolffd@0
|
81 score=total_error/num_cases;
|
wolffd@0
|
82 end
|