# HG changeset patch # User Dan Stowell # Date 1381393127 -3600 # Node ID 507300d2ed661ae8306bab1a7b3ce05b417a5d21 # Parent d09b21e43345076f17c68dde9435b22fe78b41c5# Parent f562f99dbd47fc3a421fde1d102029758a6edfcc merge diff -r f562f99dbd47 -r 507300d2ed66 scene_classification/create_folds/RandomString1.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scene_classification/create_folds/RandomString1.m Thu Oct 10 09:18:47 2013 +0100 @@ -0,0 +1,7 @@ +function String = RandomString1(n) + +% generates a random string of lower case letters of length n + +LetterStore = char(97:122); % string containing all allowable letters (in this case lower case only) +String = LetterStore(ceil(length(LetterStore).*rand(1,n))); + diff -r f562f99dbd47 -r 507300d2ed66 scene_classification/create_folds/sceneClassificationMetrics_createFolds.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scene_classification/create_folds/sceneClassificationMetrics_createFolds.m Thu Oct 10 09:18:47 2013 +0100 @@ -0,0 +1,74 @@ +function [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds(wavsourcedir, targetdir) + +% [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds('~/data/aasp_chall_FROMRDR2/scenes_stereo/scenes_stereo', 'tmp') +% [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds('scenes_stereo', 'tmp'); + +numfolds=5; + +wavpaths = dir(fullfile(wavsourcedir, '*.wav')); + +% Slightly strange way to index class labels; it's because containers.Map not present in Octave +classIDs.bus = 1; +classIDs.busystreet = 2; +classIDs.office = 3; +classIDs.openairmarket = 4; +classIDs.park = 5; +classIDs.quietstreet = 6; +classIDs.restaurant = 7; +classIDs.supermarket = 8; +classIDs.tube = 9; +classIDs.tubestation = 10; +classcollections = cell(10,1); + +% For every file, we create an entry in classcollections[id][], also allocating its fold number +for index = 1:length(wavpaths) + wavpathobj = wavpaths(index); + wavitem.origpath = fullfile(wavsourcedir, wavpathobj.name); + wavitem.classstr = wavpathobj.name(:,1:length(wavpathobj.name)-6); % rm 6 chars eg '09.wav' + wavitem.classID = getfield(classIDs, wavitem.classstr); + numsofar = length(classcollections{wavitem.classID}); + wavitem.whichfold = mod(numsofar, numfolds) + 1; + classcollections{wavitem.classID}{numsofar+1} = wavitem; +end + + +% for each fold, we'll create a folder with a scrambley name, and we'll initialise output files +for whichfold=1:numfolds + foldfolders{whichfold} = fullfile(targetdir, sprintf('%d_%s', whichfold, RandomString1(6))); + mkdir(foldfolders{whichfold}); + trainlists{whichfold} = fullfile(foldfolders{whichfold}, sprintf('fold%d_train.txt', whichfold)); + testlists{whichfold} = fullfile(foldfolders{whichfold}, sprintf('fold%d_test.txt', whichfold)); + testgtlists{whichfold} = fullfile(targetdir, sprintf('fold%d_testgt_%s.txt', whichfold, RandomString1(6))); + f_trainlists{whichfold} = fopen(trainlists{ whichfold}, 'w'); + f_testlists{whichfold} = fopen(testlists{ whichfold}, 'w'); + f_testgtlists{whichfold} = fopen(testgtlists{whichfold}, 'w'); +end + +for whichclass = 1:length(classcollections) + thisclass = classcollections{whichclass}; + + for whichitem = 1:length(thisclass) + wavitem = thisclass{whichitem}; + for whichfold=1:numfolds + newfilename = fullfile(foldfolders{whichfold}, sprintf('%d_%s.wav', whichitem, RandomString1(6))); + copyfile(wavitem.origpath, newfilename); % BE CAREFUL WITH THIS!!! + if whichfold==wavitem.whichfold + % this fold is TESTING for this item + fprintf(f_testgtlists{whichfold}, '%s\t%s\n', newfilename, wavitem.classstr); + fprintf(f_testlists{ whichfold}, '%s\n', newfilename); + else + % this fold is TRAINING for this item + fprintf(f_trainlists{whichfold}, '%s\t%s\n', newfilename, wavitem.classstr); + end + end + end +end + +% close the files +for whichfold=1:numfolds + fclose(f_trainlists{ whichfold}); + fclose(f_testlists{ whichfold}); + fclose(f_testgtlists{whichfold}); +end + +