# HG changeset patch # User DaveM # Date 1486663911 0 # Node ID 7ec9bd8df11100416e4971001e67d1713a9dc4db # Parent 3ff93b42d454ad8d46b0827a441e02f6378d3d49 document random forests and create iteration options diff -r 3ff93b42d454 -r 7ec9bd8df111 phase2/rfFeatureSelection.m --- a/phase2/rfFeatureSelection.m Thu Feb 09 17:54:49 2017 +0000 +++ b/phase2/rfFeatureSelection.m Thu Feb 09 18:11:51 2017 +0000 @@ -1,9 +1,22 @@ function features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees) -%% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees) +% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees) % -% -% -% +% using random forests to perform feature selection for a given data set +% data has size (x,y), where x is the number of labels and y, the number of +% features. +% labels is the set of labels for the data +% numFeatures is the dimension of the output vector (default 5) +% iterMethod is the method for which the features are cut down +% 'onePass' will simply select the top (numFeatures) features and +% report them +% 'cutX' will iteratively cut the bottom X percent of features out, +% and perform random forest feature selection on the new set, until +% the desired number of features has been returned +% 'oobErr' will do something with the out-of-bag error, and return +% that in some way, but this has not been implemented yet. +% 'featureDeltaErr' will do something with the feature importance +% prediction error, and return that in some way, but this has not +% been implemented yet. if(length(labels) ~= size(data,1)) error('labels and data do not match up'); @@ -30,19 +43,20 @@ features = I; if(strcmp(iterMethod,'onePass')) - disp('cut') + disp('onePass') features = features(1:numFeatures); -elseif(strcmp(iterMethod,'cut10')) - disp('cut10') - cutSize = max(floor(length(features)/10),1); +elseif(strcmp(iterMethod(1:3),'cut')) + disp(iterMethod) + cutPercentage = str2int(iterMethod(4:end)); + cutSize = max(floor(length(features)*cutPercentage/100),1); features = features(1:end-cutSize); data = data(:,I); features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees); -elseif(strcmp(iterMethod,'cut5')) - disp('cut5') - cutSize = max(floor(length(features)/20),1); - features = features(1:end-cutSize); - data = data(:,I); - features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees); +elseif(strcmp(iterMethod,'oobErr')) + warning('This method has not been implemented yet, using onePass to return results') + features = features(1:numFeatures); +elseif(strcmp(iterMethod,'featureDeltaErr')) + warning('This method has not been implemented yet, using onePass to return results') + features = features(1:numFeatures); end end \ No newline at end of file