DaveM@6
|
1 function featureVector = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees, featureVector)
|
DaveM@8
|
2 %% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees, featureVector)
|
DaveM@3
|
3 %
|
DaveM@4
|
4 % using random forests to perform feature selection for a given data set
|
DaveM@4
|
5 % data has size (x,y), where x is the number of labels and y, the number of
|
DaveM@4
|
6 % features.
|
DaveM@4
|
7 % labels is the set of labels for the data
|
DaveM@4
|
8 % numFeatures is the dimension of the output vector (default 5)
|
DaveM@4
|
9 % iterMethod is the method for which the features are cut down
|
DaveM@5
|
10 % * 'onePass' will simply select the top (numFeatures) features and
|
DaveM@5
|
11 % report them
|
DaveM@5
|
12 % * 'cutX' will iteratively cut the bottom X percent of
|
DaveM@5
|
13 % features out, and perform random forest feature selection on the
|
DaveM@5
|
14 % new set, until the desired number of features has been returned
|
DaveM@7
|
15 % * 'featureDeltaErr' will cut down the number of features based on
|
DaveM@7
|
16 % the number of features that negatively impact the results, as given
|
DaveM@7
|
17 % by the OOBPermutedVarDeltaError
|
DaveM@6
|
18 % featureVector is a list of the features to use, for recursive purposes.
|
DaveM@3
|
19
|
DaveM@3
|
20 if(length(labels) ~= size(data,1))
|
DaveM@3
|
21 error('labels and data do not match up');
|
DaveM@3
|
22 end
|
DaveM@3
|
23
|
DaveM@3
|
24 if(nargin < 2)
|
DaveM@3
|
25 error('must pass data and labels into function')
|
DaveM@3
|
26 end
|
DaveM@3
|
27 if(nargin < 3)
|
DaveM@3
|
28 numFeatures = 5;
|
DaveM@3
|
29 end
|
DaveM@3
|
30 if(nargin < 4)
|
DaveM@3
|
31 iterMethod = 'onePass';
|
DaveM@3
|
32 end
|
DaveM@3
|
33 if(nargin < 5)
|
DaveM@3
|
34 numTrees = 200;
|
DaveM@3
|
35 end
|
DaveM@6
|
36 if(nargin < 5)
|
DaveM@6
|
37 featureVector = 1:size(data,2);
|
DaveM@6
|
38 end
|
DaveM@3
|
39
|
DaveM@3
|
40
|
DaveM@6
|
41 if(length(featureVector) > numFeatures)
|
DaveM@6
|
42 options = statset('UseParallel', true);
|
DaveM@6
|
43 b = TreeBagger(numTrees, data(:,featureVector), labels,'OOBVarImp','On',...
|
DaveM@6
|
44 'SampleWithReplacement', 'Off','FBoot', 0.632,'Options', options);
|
DaveM@6
|
45 [FI,I] = sort(b.OOBPermutedVarDeltaError,'descend');
|
DaveM@6
|
46 featureVector = featureVector(I);
|
DaveM@3
|
47
|
DaveM@6
|
48 if(strcmp(iterMethod,'onePass'))
|
DaveM@6
|
49 featureVector = featureVector(1:numFeatures);
|
DaveM@6
|
50 elseif(strcmp(iterMethod(1:3),'cut'))
|
DaveM@6
|
51 cutPercentage = str2double(iterMethod(4:end));
|
DaveM@6
|
52 cutSize = max(floor(length(featureVector)*cutPercentage/100),1);
|
DaveM@6
|
53 if(length(featureVector) - cutSize < numFeatures)
|
DaveM@6
|
54 cutSize = length(featureVector) - numFeatures;
|
DaveM@6
|
55 end
|
DaveM@6
|
56 featureVector = featureVector(1:end-cutSize);
|
DaveM@6
|
57 featureVector = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees, featureVector);
|
DaveM@6
|
58 elseif(strcmp(iterMethod,'featureDeltaErr'))
|
DaveM@7
|
59 cutSize = sum(FI<0);
|
DaveM@7
|
60 if(length(featureVector) - cutSize < numFeatures)
|
DaveM@7
|
61 cutSize = length(featureVector) - numFeatures;
|
DaveM@7
|
62 end
|
DaveM@7
|
63 featureVector = featureVector(1:end-cutSize);
|
DaveM@7
|
64 featureVector = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees, featureVector);
|
DaveM@6
|
65 end
|
DaveM@3
|
66 end
|
DaveM@3
|
67 end |