view scene_classification/create_folds/sceneClassificationMetrics_createFolds.m @ 10:507300d2ed66 tip

merge
author Dan Stowell <dan.stowell@elec.qmul.ac.uk>
date Thu, 10 Oct 2013 09:18:47 +0100
parents d09b21e43345
children
line wrap: on
line source
function [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds(wavsourcedir, targetdir)

% [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds('~/data/aasp_chall_FROMRDR2/scenes_stereo/scenes_stereo', 'tmp')
% [trainlists, testlists, testgtlists] = sceneClassificationMetrics_createFolds('scenes_stereo', 'tmp');

numfolds=5;

wavpaths = dir(fullfile(wavsourcedir, '*.wav'));

% Slightly strange way to index class labels; it's because containers.Map not present in Octave
classIDs.bus           =  1;
classIDs.busystreet    =  2;
classIDs.office        =  3;
classIDs.openairmarket =  4;
classIDs.park          =  5;
classIDs.quietstreet   =  6;
classIDs.restaurant    =  7;
classIDs.supermarket   =  8;
classIDs.tube          =  9;
classIDs.tubestation   = 10;
classcollections = cell(10,1);

% For every file, we create an entry in classcollections[id][], also allocating its fold number
for index = 1:length(wavpaths)
    wavpathobj = wavpaths(index);
    wavitem.origpath  = fullfile(wavsourcedir, wavpathobj.name);
    wavitem.classstr  = wavpathobj.name(:,1:length(wavpathobj.name)-6);  % rm 6 chars eg '09.wav'
    wavitem.classID   = getfield(classIDs, wavitem.classstr);
    numsofar = length(classcollections{wavitem.classID});
    wavitem.whichfold = mod(numsofar, numfolds) + 1;
    classcollections{wavitem.classID}{numsofar+1} = wavitem;
end


% for each fold, we'll create a folder with a scrambley name, and we'll initialise output files
for whichfold=1:numfolds
    foldfolders{whichfold} = fullfile(targetdir, sprintf('%d_%s', whichfold, RandomString1(6)));
    mkdir(foldfolders{whichfold});
    trainlists{whichfold}  = fullfile(foldfolders{whichfold}, sprintf('fold%d_train.txt', whichfold));
    testlists{whichfold}   = fullfile(foldfolders{whichfold}, sprintf('fold%d_test.txt', whichfold));
    testgtlists{whichfold} = fullfile(targetdir,              sprintf('fold%d_testgt_%s.txt', whichfold, RandomString1(6)));
    f_trainlists{whichfold}  = fopen(trainlists{ whichfold}, 'w');
    f_testlists{whichfold}   = fopen(testlists{  whichfold}, 'w');
    f_testgtlists{whichfold} = fopen(testgtlists{whichfold}, 'w');
end

for whichclass = 1:length(classcollections)
    thisclass = classcollections{whichclass};
    
    for whichitem = 1:length(thisclass)
        wavitem = thisclass{whichitem};
        for whichfold=1:numfolds
            newfilename = fullfile(foldfolders{whichfold}, sprintf('%d_%s.wav', whichitem, RandomString1(6)));
            copyfile(wavitem.origpath, newfilename); % BE CAREFUL WITH THIS!!!
            if whichfold==wavitem.whichfold
                % this fold is TESTING for this item
                fprintf(f_testgtlists{whichfold}, '%s\t%s\n', newfilename, wavitem.classstr);
                fprintf(f_testlists{  whichfold}, '%s\n', newfilename);
            else
                % this fold is TRAINING for this item
                fprintf(f_trainlists{whichfold}, '%s\t%s\n', newfilename, wavitem.classstr);
            end
        end
    end
end

% close the files
for whichfold=1:numfolds
    fclose(f_trainlists{ whichfold});
    fclose(f_testlists{  whichfold});
    fclose(f_testgtlists{whichfold});
end