view phase2/rfFeatureSelection.m @ 4:7ec9bd8df111

document random forests and create iteration options
author DaveM
date Thu, 09 Feb 2017 18:11:51 +0000
parents 3ff93b42d454
children 7848d183c7ab
line wrap: on
line source
function features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees)
% rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees)
%
% using random forests to perform feature selection for a given data set
% data has size (x,y), where x is the number of labels and y, the number of
% features. 
% labels is the set of labels for the data
% numFeatures is the dimension of the output vector (default 5)
% iterMethod is the method for which the features are cut down
%       'onePass' will simply select the top (numFeatures) features and
%       report them
%       'cutX' will iteratively cut the bottom X percent of features out,
%       and perform random forest feature selection on the new set, until
%       the desired number of features has been returned
%       'oobErr' will do something with the out-of-bag error, and return
%       that in some way, but this has not been implemented yet.
%       'featureDeltaErr' will do something with the feature importance 
%       prediction error, and return that in some way, but this has not 
%       been implemented yet.

if(length(labels) ~= size(data,1))
    error('labels and data do not match up');
end

if(nargin < 2)
    error('must pass data and labels into function')
end
if(nargin < 3)
    numFeatures = 5;
end
if(nargin < 4)
    iterMethod = 'onePass';
end
if(nargin < 5)
    numTrees = 200;
end


options = statset('UseParallel', true);
b = TreeBagger(numTrees, data, labels,'OOBVarImp','On',...
    'SampleWithReplacement', 'Off','FBoot', 0.632,'Options', options);
[FI,I] = sort(b.OOBPermutedVarDeltaError,'descend'); 
features = I;

if(strcmp(iterMethod,'onePass'))
    disp('onePass')
    features = features(1:numFeatures);
elseif(strcmp(iterMethod(1:3),'cut'))
    disp(iterMethod)
    cutPercentage = str2int(iterMethod(4:end));
    cutSize = max(floor(length(features)*cutPercentage/100),1);
    features = features(1:end-cutSize);
    data = data(:,I);
    features = rfFeatureSelection(data, labels, numFeatures, iterMethod, numTrees);
elseif(strcmp(iterMethod,'oobErr'))
    warning('This method has not been implemented yet, using onePass to return results')
	features = features(1:numFeatures);
elseif(strcmp(iterMethod,'featureDeltaErr'))
    warning('This method has not been implemented yet, using onePass to return results')
	features = features(1:numFeatures);
end
end