comparison toolboxes/FullBNT-1.0.7/bnt/examples/static/dtree/transform_data_into_bnt_format.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function [bnt_data, old_values] = transform_data_into_bnt_format(data,cnodes)
2 % TRANSFORM_DATA_TO_BNT_FORMAT Ensures discrete variables have values 1,2,..,k
3 % e.g., if the values of a discrete are [0 1 6], they must be mapped to [1 2 3]
4 %
5 % data(i,j) is the value for i-th node in j-th case.
6 % bnt_data(i,j) is the new value.
7 % old_values{i} are the original values for node i.
8 % cnodes is the list of all continous nodes, e.g. [3 5] means the 3rd and 5th node is continuous
9 %
10 % Author: yimin.zhang@intel.com
11 % Last updated: Jan. 22, 2002 by Kevin Murphy.
12
13 num_nodes=size(data,1);
14 num_cases=size(data,2);
15 old_values=cell(1,num_nodes);
16
17 for i=1:num_nodes
18 if (myismember(i,cnodes)==1) %cts nodes no need to be transformed
19 %just copy the data
20 bnt_data(i,:)=data(i,:);
21 continue;
22 end
23 values = data(i,:);
24 sort_v = sort(values);
25 %remove the duplicate values in sort_v
26 v_set = unique(sort_v);
27
28 %transform the values
29 for j=1:size(values,2)
30 index = binary_search(v_set,values(j));
31 if (index==-1)
32 fprintf('value not found in tranforming data to bnt format.\n');
33 return;
34 end
35 bnt_data(i,j)=index;
36 end
37 old_values{i}=v_set;
38 end
39
40
41 %%%%%%%%%%%%
42
43 function index=binary_search(vector, value)
44 % BI_SEARCH do binary search for value in the vector
45 % Author: yimin.zhang@intel.com
46 % Last updated: Jan. 19, 2002
47
48 begin_index=1;
49 end_index=size(vector,2);
50 index=-1;
51 while (begin_index<=end_index)
52 mid=floor((begin_index+end_index)/2);
53 if (isstr(vector(mid)))
54 % need to write a strcmp to return three result (< = >)
55 else
56 if (value==vector(mid))
57 index=mid;
58 return;
59 elseif (value>vector(mid))
60 begin_index=mid+1;
61 else
62 end_index=mid-1;
63 end
64 end
65 end
66 return;