view phase2/rfFeatureSelection.m @ 5:7848d183c7ab

fixing comments
author DaveM
date Thu, 09 Feb 2017 18:14:44 +0000
parents 7ec9bd8df111
children 54446ca7e6cb
line wrap: on
line source
function features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees)
% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees)
%
% using random forests to perform feature selection for a given data set
% data has size (x,y), where x is the number of labels and y, the number of
% features. 
% labels is the set of labels for the data
% numFeatures is the dimension of the output vector (default 5)
% iterMethod is the method for which the features are cut down
%       * 'onePass' will simply select the top (numFeatures) features and
%       report them 
%       * 'cutX' will iteratively cut the bottom X percent of
%       features out, and perform random forest feature selection on the
%       new set, until the desired number of features has been returned
%       * 'oobErr' will do something with the out-of-bag error, and return
%       that in some way, but this has not been implemented yet.
%       * 'featureDeltaErr' will do something with the feature importance
%       prediction error, and return that in some way, but this has not
%       been implemented yet. The OOBPermutedVarDeltaError property is a
%       numeric array of size 1-by-Nvars containing a measure of importance
%       for each predictor variable (feature). For any variable, the
%       measure is the increase in prediction error if the values of that
%       variable are permuted across the out-of-bag observations. This
%       measure is computed for every tree, then averaged over the entire
%       ensemble and divided by the standard deviation over the entire
%       ensemble.

if(length(labels) ~= size(data,1))
    error('labels and data do not match up');
end

if(nargin < 2)
    error('must pass data and labels into function')
end
if(nargin < 3)
    numFeatures = 5;
end
if(nargin < 4)
    iterMethod = 'onePass';
end
if(nargin < 5)
    numTrees = 200;
end


options = statset('UseParallel', true);
b = TreeBagger(numTrees, data, labels,'OOBVarImp','On',...
    'SampleWithReplacement', 'Off','FBoot', 0.632,'Options', options);
[FI,I] = sort(b.OOBPermutedVarDeltaError,'descend'); 
features = I;

if(strcmp(iterMethod,'onePass'))
    disp('onePass')
    features = features(1:numFeatures);
elseif(strcmp(iterMethod(1:3),'cut'))
    disp(iterMethod)
    cutPercentage = str2int(iterMethod(4:end));
    cutSize = max(floor(length(features)*cutPercentage/100),1);
    features = features(1:end-cutSize);
    data = data(:,I);
    features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees);
elseif(strcmp(iterMethod,'oobErr'))
    warning('This method has not been implemented yet, using onePass to return results')
	features = features(1:numFeatures);
elseif(strcmp(iterMethod,'featureDeltaErr'))
    warning('This method has not been implemented yet, using onePass to return results')
    % this will use variable FI
	features = features(1:numFeatures);
end
end