jordan@1: function mrxoutput = collect_all_mirex_algo_output_data(base_directory, dsets, algos)
jordan@1: % function mrxoutput = collect_all_mirex_algo_output_data(base_directory, dsets, algos)
jordan@1: %
jordan@1: % GET ALL THE DATA!
jordan@1: % This function collects the output data (predictions of onset locations and segment
jordan@1: % labels) of the algorithms in the MIREX evaluation.
jordan@1: %
jordan@1: % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb",
jordan@1: % or whatever directory contains all the downloaded MIREX data. For example:
jordan@1: % "/Users/jordan/Desktop/MIREX_data"
jordan@1: %
jordan@1: % DSETS should contain the names of the datasets. The default value is all of them:
jordan@1: %   {'mrx09','mrx10_1','mrx10_2','sal'}
jordan@1: % Keep the DSETS in a consistent order across your work, because the index of the dataset
jordan@1: % is important for some of the other functions.
jordan@1: %
jordan@1: % ALGOS should contain the name of all the algorithms. The default value is all of them:
jordan@1: %   {'KSP1','KSP2','KSP3','MHRAF1','OYZS1','SBV1','SMGA1','SMGA2','SP1'}
jordan@1: % As with the DSETS, keep these names consistent across all work.
jordan@1: %
jordan@1: % The output structure MRXOUTPUT contains the following fields:
jordan@1: %
jordan@1: % MRXOUTPUT(k).ALGO(j).SONG(i) gives the structure of the ith song of the kth dataset
jordan@1: % as predicted by the jth algorithm.
jordan@1: %
jordan@1: % SONG(i).TIM = onset times of annotation
jordan@1: % SONG(i).LAB = labels of sections
jordan@1: % SONG(i).FILE = file from which the above information derives
jordan@1: %
jordan@1: % Dependencies:
jordan@1: %   - load_annotation.m
jordan@1: 
jordan@1: if nargin<2,
jordan@1:     dsets = {'mrx09','mrx10_1','mrx10_2','sal'};
jordan@1: end
jordan@1: if nargin<3,
jordan@1:     algos = {'KSP1','KSP2','KSP3','MHRAF1','OYZS1','SBV1','SMGA1','SMGA2','SP1'};
jordan@1: end
jordan@1: 
jordan@1: % Use the CSV files to discover the names of all the songs.
jordan@1: csv_files = {};
jordan@1: for i=1:length(dsets),
jordan@1:     csv_files{end+1} = fullfile(base_directory,dsets{i},algos{1},'per_track_results.csv');
jordan@1: end
jordan@1: fprintf('About to open some CSV files to extract the names of the songs in MIREX. If you see lots of errors, please ensure that the files exist in the correct location.\n')
jordan@1: for i=1:length(csv_files),
jordan@1:     try
jordan@1:         fid = fopen(csv_files{i});
jordan@1:         names_tmp = textscan(fid,'%s%s%*[^\n]','Delimiter',',');
jordan@1:         fclose(fid);
jordan@1:         year(i).names = names_tmp{2}(2:end);
jordan@1:     catch
jordan@1:         fprintf('Error opening or reading the following CSV file:\n   %s\n',csv_files{i});
jordan@1:     end
jordan@1: end
jordan@4: fprintf('OK, done with that.\n\n')
jordan@1: 
jordan@1: 
jordan@1: fprintf('About to go through all the algorithm outputs and load all the predicted song descriptions. If you see lots of errors, please ensure that the files exist in the correct location.\n')
jordan@1: 
jordan@1: mrxoutput = {};
jordan@1: % For each dataset (DSET), and for each algorith (ALGO), look in turn at each song.
jordan@1: for k=1:length(dsets),
jordan@1:     dset = dsets{k};
jordan@1:     for j=1:length(algos),
jordan@1:         algo = algos{j};
jordan@1:         for i=1:length(year(k).names),
jordan@1:             % FYI: PRED stands for 'prediction', in contrast to GT for 'ground truth'.
jordan@1:             pred = fullfile(base_directory,dset,algo,strcat(year(k).names{i},'_pred.txt'));
jordan@1:             [mrxoutput(k).algo(j).song(i).tim mrxoutput(k).algo(j).song(i).lab] = load_annotation(pred,'two_column');
jordan@1:             mrxoutput(k).algo(j).song(i).file = pred;
jordan@1:             if isempty(mrxoutput(k).algo(j).song(i).tim),
jordan@1:                 fprintf('Screw up on %s?\n',mrxoutput(k).algo(j).song(i).file)
jordan@1:             end
jordan@1:         end
jordan@1:     end
jordan@4: end
jordan@4: fprintf('OK, done with that.\n\n')