changeset 4:7ec9bd8df111

document random forests and create iteration options
author DaveM
date Thu, 09 Feb 2017 18:11:51 +0000
parents 3ff93b42d454
children 7848d183c7ab
files phase2/rfFeatureSelection.m
diffstat 1 files changed, 28 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/phase2/rfFeatureSelection.m	Thu Feb 09 17:54:49 2017 +0000
+++ b/phase2/rfFeatureSelection.m	Thu Feb 09 18:11:51 2017 +0000
@@ -1,9 +1,22 @@
 function features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees)
-%% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees)
+% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees)
 %
-%
-%
-%
+% using random forests to perform feature selection for a given data set
+% data has size (x,y), where x is the number of labels and y, the number of
+% features. 
+% labels is the set of labels for the data
+% numFeatures is the dimension of the output vector (default 5)
+% iterMethod is the method for which the features are cut down
+%       'onePass' will simply select the top (numFeatures) features and
+%       report them
+%       'cutX' will iteratively cut the bottom X percent of features out,
+%       and perform random forest feature selection on the new set, until
+%       the desired number of features has been returned
+%       'oobErr' will do something with the out-of-bag error, and return
+%       that in some way, but this has not been implemented yet.
+%       'featureDeltaErr' will do something with the feature importance 
+%       prediction error, and return that in some way, but this has not 
+%       been implemented yet.
 
 if(length(labels) ~= size(data,1))
     error('labels and data do not match up');
@@ -30,19 +43,20 @@
 features = I;
 
 if(strcmp(iterMethod,'onePass'))
-    disp('cut')
+    disp('onePass')
     features = features(1:numFeatures);
-elseif(strcmp(iterMethod,'cut10'))
-    disp('cut10')
-    cutSize = max(floor(length(features)/10),1);
+elseif(strcmp(iterMethod(1:3),'cut'))
+    disp(iterMethod)
+    cutPercentage = str2int(iterMethod(4:end));
+    cutSize = max(floor(length(features)*cutPercentage/100),1);
     features = features(1:end-cutSize);
     data = data(:,I);
     features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees);
-elseif(strcmp(iterMethod,'cut5'))
-    disp('cut5')
-    cutSize = max(floor(length(features)/20),1);
-    features = features(1:end-cutSize);
-    data = data(:,I);
-    features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees);
+elseif(strcmp(iterMethod,'oobErr'))
+    warning('This method has not been implemented yet, using onePass to return results')
+	features = features(1:numFeatures);
+elseif(strcmp(iterMethod,'featureDeltaErr'))
+    warning('This method has not been implemented yet, using onePass to return results')
+	features = features(1:numFeatures);
 end
 end
\ No newline at end of file