Mercurial > hg > sfx-subgrouping
changeset 4:7ec9bd8df111
document random forests and create iteration options
author | DaveM |
---|---|
date | Thu, 09 Feb 2017 18:11:51 +0000 |
parents | 3ff93b42d454 |
children | 7848d183c7ab |
files | phase2/rfFeatureSelection.m |
diffstat | 1 files changed, 28 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/phase2/rfFeatureSelection.m Thu Feb 09 17:54:49 2017 +0000 +++ b/phase2/rfFeatureSelection.m Thu Feb 09 18:11:51 2017 +0000 @@ -1,9 +1,22 @@ function features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees) -%% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees) +% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees) % -% -% -% +% using random forests to perform feature selection for a given data set +% data has size (x,y), where x is the number of labels and y, the number of +% features. +% labels is the set of labels for the data +% numFeatures is the dimension of the output vector (default 5) +% iterMethod is the method for which the features are cut down +% 'onePass' will simply select the top (numFeatures) features and +% report them +% 'cutX' will iteratively cut the bottom X percent of features out, +% and perform random forest feature selection on the new set, until +% the desired number of features has been returned +% 'oobErr' will do something with the out-of-bag error, and return +% that in some way, but this has not been implemented yet. +% 'featureDeltaErr' will do something with the feature importance +% prediction error, and return that in some way, but this has not +% been implemented yet. if(length(labels) ~= size(data,1)) error('labels and data do not match up'); @@ -30,19 +43,20 @@ features = I; if(strcmp(iterMethod,'onePass')) - disp('cut') + disp('onePass') features = features(1:numFeatures); -elseif(strcmp(iterMethod,'cut10')) - disp('cut10') - cutSize = max(floor(length(features)/10),1); +elseif(strcmp(iterMethod(1:3),'cut')) + disp(iterMethod) + cutPercentage = str2int(iterMethod(4:end)); + cutSize = max(floor(length(features)*cutPercentage/100),1); features = features(1:end-cutSize); data = data(:,I); features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees); -elseif(strcmp(iterMethod,'cut5')) - disp('cut5') - cutSize = max(floor(length(features)/20),1); - features = features(1:end-cutSize); - data = data(:,I); - features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees); +elseif(strcmp(iterMethod,'oobErr')) + warning('This method has not been implemented yet, using onePass to return results') + features = features(1:numFeatures); +elseif(strcmp(iterMethod,'featureDeltaErr')) + warning('This method has not been implemented yet, using onePass to return results') + features = features(1:numFeatures); end end \ No newline at end of file