changeset 9:d09b21e43345

add code to create folds
author Dan Stowell <dan.stowell@elec.qmul.ac.uk>
date Thu, 10 Oct 2013 09:18:23 +0100
parents 9becdb4e659b
children 507300d2ed66
files scene_classification/create_folds/RandomString1.m scene_classification/create_folds/sceneClassificationMetrics_createFolds.m
diffstat 2 files changed, 81 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scene_classification/create_folds/RandomString1.m	Thu Oct 10 09:18:23 2013 +0100
@@ -0,0 +1,7 @@
+function String = RandomString1(n)
+
+% generates a random string of lower case letters of length n
+
+LetterStore = char(97:122); % string containing all allowable letters (in this case lower case only)
+String = LetterStore(ceil(length(LetterStore).*rand(1,n)));
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scene_classification/create_folds/sceneClassificationMetrics_createFolds.m	Thu Oct 10 09:18:23 2013 +0100
@@ -0,0 +1,74 @@
+function [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds(wavsourcedir, targetdir)
+
+% [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds('~/data/aasp_chall_FROMRDR2/scenes_stereo/scenes_stereo', 'tmp')
+% [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds('scenes_stereo', 'tmp');
+
+numfolds=5;
+
+wavpaths = dir(fullfile(wavsourcedir, '*.wav'));
+
+% Slightly strange way to index class labels; it's because containers.Map not present in Octave
+classIDs.bus           =  1;
+classIDs.busystreet    =  2;
+classIDs.office        =  3;
+classIDs.openairmarket =  4;
+classIDs.park          =  5;
+classIDs.quietstreet   =  6;
+classIDs.restaurant    =  7;
+classIDs.supermarket   =  8;
+classIDs.tube          =  9;
+classIDs.tubestation   = 10;
+classcollections = cell(10,1);
+
+% For every file, we create an entry in classcollections[id][], also allocating its fold number
+for index = 1:length(wavpaths)
+    wavpathobj = wavpaths(index);
+    wavitem.origpath  = fullfile(wavsourcedir, wavpathobj.name);
+    wavitem.classstr  = wavpathobj.name(:,1:length(wavpathobj.name)-6);  % rm 6 chars eg '09.wav'
+    wavitem.classID   = getfield(classIDs, wavitem.classstr);
+    numsofar = length(classcollections{wavitem.classID});
+    wavitem.whichfold = mod(numsofar, numfolds) + 1;
+    classcollections{wavitem.classID}{numsofar+1} = wavitem;
+end
+
+
+% for each fold, we'll create a folder with a scrambley name, and we'll initialise output files
+for whichfold=1:numfolds
+    foldfolders{whichfold} = fullfile(targetdir, sprintf('%d_%s', whichfold, RandomString1(6)));
+    mkdir(foldfolders{whichfold});
+    trainlists{whichfold}  = fullfile(foldfolders{whichfold}, sprintf('fold%d_train.txt', whichfold));
+    testlists{whichfold}   = fullfile(foldfolders{whichfold}, sprintf('fold%d_test.txt', whichfold));
+    testgtlists{whichfold} = fullfile(targetdir,              sprintf('fold%d_testgt_%s.txt', whichfold, RandomString1(6)));
+    f_trainlists{whichfold}  = fopen(trainlists{ whichfold}, 'w');
+    f_testlists{whichfold}   = fopen(testlists{  whichfold}, 'w');
+    f_testgtlists{whichfold} = fopen(testgtlists{whichfold}, 'w');
+end
+
+for whichclass = 1:length(classcollections)
+    thisclass = classcollections{whichclass};
+    
+    for whichitem = 1:length(thisclass)
+        wavitem = thisclass{whichitem};
+        for whichfold=1:numfolds
+            newfilename = fullfile(foldfolders{whichfold}, sprintf('%d_%s.wav', whichitem, RandomString1(6)));
+            copyfile(wavitem.origpath, newfilename); % BE CAREFUL WITH THIS!!!
+            if whichfold==wavitem.whichfold
+                % this fold is TESTING for this item
+                fprintf(f_testgtlists{whichfold}, '%s\t%s\n', newfilename, wavitem.classstr);
+                fprintf(f_testlists{  whichfold}, '%s\n', newfilename);
+            else
+                % this fold is TRAINING for this item
+                fprintf(f_trainlists{whichfold}, '%s\t%s\n', newfilename, wavitem.classstr);
+            end
+        end
+    end
+end
+
+% close the files
+for whichfold=1:numfolds
+    fclose(f_trainlists{ whichfold});
+    fclose(f_testlists{  whichfold});
+    fclose(f_testgtlists{whichfold});
+end
+
+