DaveM@2: clearvars; DaveM@2: load('AdobeStratified.mat'); DaveM@2: morefeatures = true; DaveM@2: idxvar = (1:1450); DaveM@2: count = 1; DaveM@2: featuredata = struct('IdxVar', [], 'FeatureNamesRanked', {}, 'FeatureImportance', [], 'OOBError', [], 'LastOOBError', [], 'EMClusters', [], 'AIC', [], 'PreviousAIC', []); DaveM@2: DaveM@2: while(morefeatures) DaveM@2: DataTrain = DataTrain(:, idxvar); DaveM@2: FeatureNames = FeatureNames(idxvar); DaveM@2: idxvar = (1:length(FeatureNames)); DaveM@2: fprintf('\n Growing a Random Forest of 200 trees using %i features\n',length(idxvar)) DaveM@2: DaveM@2: rng(1945,'twister') DaveM@2: tic DaveM@2: options = statset('UseParallel', true); DaveM@2: b = TreeBagger(200, DataTrain, LabelsTrain,'OOBVarImp','On', 'SampleWithReplacement', 'Off', 'FBoot', 0.632, 'Options', options); DaveM@2: toc DaveM@2: DaveM@2: oobErr = oobError(b); DaveM@2: LastoobErr = oobErr(end); DaveM@2: DaveM@2: fprintf('\n The cumulative OOB Error at 200 trees is %f\n', LastoobErr); DaveM@2: DaveM@2: Indices = crossvalind('Kfold', size(DataTrain, 1), 10); DaveM@2: DaveM@2: AICInitial = 1e16; DaveM@2: AICNext = -1e16; DaveM@2: AICAvg = zeros(10, 1); DaveM@2: NumClusters = 1; DaveM@2: DaveM@2: while(AICNext <= AICInitial) DaveM@2: DaveM@2: if(NumClusters ~= 1) DaveM@2: AICInitial = AICNext; DaveM@2: end DaveM@2: NumClusters = NumClusters + 1; DaveM@2: DaveM@2: fprintf('\n Performing EM using 10 fold CV and %i clusters and %i features\n', NumClusters, length(idxvar)) DaveM@2: DaveM@2: for i = 1:10 DaveM@2: DaveM@2: emidx = (Indices == i); emidx = ~emidx; DaveM@2: DaveM@2: EMDataTrain = DataTrain(emidx, :); DaveM@2: GMModelCV = fitgmdist(EMDataTrain, NumClusters, 'RegularizationValue', 1e-5); DaveM@2: AICAvg(i) = GMModelCV.AIC; DaveM@2: end DaveM@2: DaveM@2: AICNext = mean(AICAvg); DaveM@2: fprintf('The average AIC was %f\n', AICNext); DaveM@2: end DaveM@2: DaveM@2: FI = b.OOBPermutedVarDeltaError; DaveM@2: DaveM@2: [FI,I]=sort(FI, 'descend'); DaveM@2: idxvar = idxvar(I); DaveM@2: FeatureNamesRanked = FeatureNames(I); DaveM@2: DaveM@2: featuredata(count).IdxVar = idxvar; DaveM@2: featuredata(count).FeatureNamesRanked = FeatureNamesRanked; DaveM@2: featuredata(count).FeatureImportance = FI; DaveM@2: featuredata(count).OOBError = oobErr; DaveM@2: featuredata(count).LastOOBError = LastoobErr; DaveM@2: featuredata(count).EMClusters = NumClusters; DaveM@2: featuredata(count).AIC = AICNext; DaveM@2: featuredata(count).PreviousAIC = AICInitial; DaveM@2: DaveM@2: idxRemove = round((length(idxvar) / 100)* 1); DaveM@2: fprintf('\n %i features will be removed.\n', idxRemove) DaveM@2: idxRemove = (length(idxvar) - idxRemove); DaveM@2: idxvar = idxvar(1:idxRemove); DaveM@2: count = count + 1; DaveM@2: DaveM@2: save('Results1Percent.mat', 'featuredata'); DaveM@2: DaveM@2: if(length(idxvar) == 2) DaveM@2: morefeatures = false; DaveM@2: end DaveM@2: end