jordan@1: function [data dset_origin] = collect_all_mirex_annotations(base_directory, dsets, algos) jordan@1: % function [data dset_origin] = collect_all_mirex_annotations(base_directory, dsets, algos) jordan@1: % jordan@1: % GET ALL THE DATA! jordan@1: % This function collects annotations from all years of MIREX evaluation. jordan@1: % Annotation data (onsets and labels) all go in a single structure, including file jordan@1: % locations. jordan@1: % jordan@1: % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb", jordan@1: % or whatever directory contains all the downloaded MIREX data. For example: jordan@2: % "/Users/me/Desktop/MIREX_data" jordan@1: % jordan@1: % DSETS should contain the names of the datasets. The default value is all of them: jordan@1: % {'mrx09','mrx10_1','mrx10_2','sal'} jordan@1: % Keep the DSETS in a consistent order across your work, because the index of the dataset jordan@1: % is important for some of the other functions. jordan@1: % jordan@1: % ALGOS should contain the name of at least one algorithm, so that the data can be jordan@1: % accessed correctly. (Only the first algo is used, since the annotation is the same jordan@1: % for each.) The default value is {'KSP1'}. jordan@1: % jordan@1: % The output DATA structure contains the following fields: jordan@1: % jordan@1: % DATA(i).TIM = onset times of annotation jordan@1: % DATA(i).LAB = labels of sections jordan@1: % DATA(i).FILE = file from which the above information derives jordan@1: % DATA(i).DSET = numerical index of the dataset jordan@1: % jordan@1: % Dependencies: jordan@1: % - load_annotation.m jordan@1: jordan@1: % YEAR(i).NAMES will contain the NAMES of all the individual song files from YEAR i. jordan@1: year = {}; jordan@1: jordan@1: % MRXTRUTH will contain an entry for every song in the MIREX evaluation, containing information about the jordan@1: mrxtruth = {}; jordan@1: jordan@1: % Collect one CSV file from each year (the song names are identical in each algo CSV, so it is only necessary to have one valid ALGO here) jordan@1: algos = {'KSP1','KSP2','KSP3','MHRAF1','OYZS1','SBV1','SMGA1','SMGA2','SP1'}; jordan@1: dsets = {'mrx09','mrx10_1','mrx10_2','sal'}; jordan@1: csv_files = {}; jordan@1: for i=1:length(dsets), jordan@1: csv_files{end+1} = fullfile(base_directory,dsets{i},algos{1},'per_track_results.csv'); jordan@1: end jordan@1: jordan@1: jordan@1: % Use the CSV files to discover the names of all the songs. jordan@1: fprintf('About to open some CSV files to extract the names of the songs in MIREX. If you see lots of errors, please ensure that the files exist in the correct location.\n') jordan@1: for i=1:length(csv_files), jordan@1: try jordan@1: fid = fopen(csv_files{i}); jordan@1: names_tmp = textscan(fid,'%s%s%*[^\n]','Delimiter',','); jordan@1: fclose(fid); jordan@1: year(i).names = names_tmp{2}(2:end); jordan@1: catch jordan@1: fprintf('Error opening or reading the following CSV file:\n %s\n',csv_files{i}); jordan@1: end jordan@1: end jordan@1: jordan@4: fprintf('OK, done with that.\n\n') jordan@4: jordan@1: % For every dataset (DSET), look through all the names (YEAR(k).NAMES), and load the annotation. jordan@1: fprintf('About to load all the ground truth files published by MIREX. If you see lots of errors, please ensure that the files exist in the correct location, and that the function ''load_annotation'' exists.\n') jordan@1: for k=1:length(dsets), jordan@1: dset = dsets{k}; jordan@1: algo = algos{1}; jordan@1: for i=1:length(year(k).names), jordan@1: % FYI: GT stands for 'ground truth', in contrast to PRED for 'prediction'. jordan@1: gt = fullfile(base_directory,dset,algo,strcat(year(k).names{i},'_gt.txt')); jordan@1: try jordan@1: [mrxtruth(end+1).tim mrxtruth(end+1).lab] = load_annotation(gt,'two_column'); jordan@1: mrxtruth(end).file = gt; jordan@1: mrxtruth(end).dset = k; jordan@1: if isempty(mrxtruth(end).tim), jordan@1: % Sometimes the annotation might be empty! This can be bad news. jordan@1: % If this happens, print out the name of the offending file, and delete this from the structure of annotations. jordan@1: fprintf(mrxtruth(end).file) jordan@1: fprintf('\n') jordan@1: mrxtruth = mrxtruth(1:end-1); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening the following ground truth file:\n %s\n',gt); jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@4: fprintf('OK, done with that.\n\n') jordan@4: jordan@1: % It can be useful to have a separate structure pointing to the index of the dataset. jordan@1: % This is an optional output of the function. jordan@1: jordan@1: data = mrxtruth; jordan@1: dset_origin = zeros(length(data),1); jordan@1: for i=1:length(data), jordan@1: dset_origin(i) = data(i).dset; jordan@1: end jordan@1: jordan@1: jordan@1: % Did this actually happen? That some of the onset times are in the incorrect order? jordan@1: % Why yes, it did. It happens probably due to some floating point error... jordan@1: % What you would see is two boundaries a tiny distance apart, where the later one jordan@1: % appeared first, like: "145.0000468 silence; 145.0000000 end". jordan@1: % In such cases, a perfectly acceptable fix is to just resort the times. They should jordan@1: % be in sorted order anyway! jordan@1: jordan@1: for i=1:length(data), jordan@1: data(i).tim = sort(data(i).tim); jordan@1: end