dan@9: function [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds(wavsourcedir, targetdir) dan@9: dan@9: % [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds('~/data/aasp_chall_FROMRDR2/scenes_stereo/scenes_stereo', 'tmp') dan@9: % [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds('scenes_stereo', 'tmp'); dan@9: dan@9: numfolds=5; dan@9: dan@9: wavpaths = dir(fullfile(wavsourcedir, '*.wav')); dan@9: dan@9: % Slightly strange way to index class labels; it's because containers.Map not present in Octave dan@9: classIDs.bus = 1; dan@9: classIDs.busystreet = 2; dan@9: classIDs.office = 3; dan@9: classIDs.openairmarket = 4; dan@9: classIDs.park = 5; dan@9: classIDs.quietstreet = 6; dan@9: classIDs.restaurant = 7; dan@9: classIDs.supermarket = 8; dan@9: classIDs.tube = 9; dan@9: classIDs.tubestation = 10; dan@9: classcollections = cell(10,1); dan@9: dan@9: % For every file, we create an entry in classcollections[id][], also allocating its fold number dan@9: for index = 1:length(wavpaths) dan@9: wavpathobj = wavpaths(index); dan@9: wavitem.origpath = fullfile(wavsourcedir, wavpathobj.name); dan@9: wavitem.classstr = wavpathobj.name(:,1:length(wavpathobj.name)-6); % rm 6 chars eg '09.wav' dan@9: wavitem.classID = getfield(classIDs, wavitem.classstr); dan@9: numsofar = length(classcollections{wavitem.classID}); dan@9: wavitem.whichfold = mod(numsofar, numfolds) + 1; dan@9: classcollections{wavitem.classID}{numsofar+1} = wavitem; dan@9: end dan@9: dan@9: dan@9: % for each fold, we'll create a folder with a scrambley name, and we'll initialise output files dan@9: for whichfold=1:numfolds dan@9: foldfolders{whichfold} = fullfile(targetdir, sprintf('%d_%s', whichfold, RandomString1(6))); dan@9: mkdir(foldfolders{whichfold}); dan@9: trainlists{whichfold} = fullfile(foldfolders{whichfold}, sprintf('fold%d_train.txt', whichfold)); dan@9: testlists{whichfold} = fullfile(foldfolders{whichfold}, sprintf('fold%d_test.txt', whichfold)); dan@9: testgtlists{whichfold} = fullfile(targetdir, sprintf('fold%d_testgt_%s.txt', whichfold, RandomString1(6))); dan@9: f_trainlists{whichfold} = fopen(trainlists{ whichfold}, 'w'); dan@9: f_testlists{whichfold} = fopen(testlists{ whichfold}, 'w'); dan@9: f_testgtlists{whichfold} = fopen(testgtlists{whichfold}, 'w'); dan@9: end dan@9: dan@9: for whichclass = 1:length(classcollections) dan@9: thisclass = classcollections{whichclass}; dan@9: dan@9: for whichitem = 1:length(thisclass) dan@9: wavitem = thisclass{whichitem}; dan@9: for whichfold=1:numfolds dan@9: newfilename = fullfile(foldfolders{whichfold}, sprintf('%d_%s.wav', whichitem, RandomString1(6))); dan@9: copyfile(wavitem.origpath, newfilename); % BE CAREFUL WITH THIS!!! dan@9: if whichfold==wavitem.whichfold dan@9: % this fold is TESTING for this item dan@9: fprintf(f_testgtlists{whichfold}, '%s\t%s\n', newfilename, wavitem.classstr); dan@9: fprintf(f_testlists{ whichfold}, '%s\n', newfilename); dan@9: else dan@9: % this fold is TRAINING for this item dan@9: fprintf(f_trainlists{whichfold}, '%s\t%s\n', newfilename, wavitem.classstr); dan@9: end dan@9: end dan@9: end dan@9: end dan@9: dan@9: % close the files dan@9: for whichfold=1:numfolds dan@9: fclose(f_trainlists{ whichfold}); dan@9: fclose(f_testlists{ whichfold}); dan@9: fclose(f_testgtlists{whichfold}); dan@9: end dan@9: dan@9: