jordan@1: function [publictruth dset_origin] = collect_all_public_annotations(base_directory) jordan@1: % function function [data dset_origin] = collect_all_public_annotations(base_directory) jordan@1: % jordan@1: % GET ALL THE DATA! jordan@1: % This function collects annotations from many public repositories of structural analyses. jordan@1: % Annotation data (onsets and labels) all go in a single structure, including file jordan@1: % locations. Refer to the README file to see what these repositories are and how to jordan@1: % download them. jordan@1: % jordan@1: % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb", jordan@1: % or whatever directory contains all the downloaded MIREX data. For example: jordan@2: % "/Users/me/Desktop/MIREX_data" jordan@1: % jordan@1: % Before running this script, you must have downloaded the original repositories jordan@1: % to the "mirex_path" directory, and unzipped them. If you did that, then this script jordan@1: % should be able to find and interpret all the annotations. (Except for those in .xml jordan@1: % format, which should be pre-processed. Again, refer to the README.) jordan@1: % jordan@1: % The output DATA structure contains the following fields for the ith song: jordan@1: % jordan@1: % DATA(i).TIM = onset times of annotation jordan@1: % DATA(i).LAB = labels of sections jordan@1: % DATA(i).FILE = file from which the above information derives jordan@1: % DATA(i).DSET = numerical indices of the main dataset (e.g., QM, RWC, etc.) and the jordan@1: % subset (e.g., within QM: 'CaroleKing', 'Queen', etc.) jordan@1: % jordan@1: % Dependencies: jordan@1: % - load_annotation.m jordan@1: jordan@1: public_dir = fullfile(base_directory,'public_data'); jordan@1: jordan@1: % Assemble lists of all the directories where the data live. This section is very hacky!!! jordan@1: jordan@1: % RWC jordan@1: rwc_dirs = {fullfile(public_dir,'AIST.RWC-MDB-C-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-G-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-J-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-P-2001.CHORUS')}; jordan@1: jordan@1: % QM, i.e., Isophonics data from Queen Mary jordan@1: qm_dirs = {fullfile(public_dir,'Carole%20King%20Annotations'), fullfile(public_dir,'Michael%20Jackson%20Annotations'), fullfile(public_dir,'Queen%20Annotations'), fullfile(public_dir,'The%20Beatles%20Annotations'), fullfile(public_dir,'Zweieck%20Annotations')}; jordan@1: jordan@1: % EP, i.e., data released by Ewald Peiszer jordan@1: ep_dir = fullfile(public_dir,'ep_groundtruth_txt/groundtruth'); jordan@1: % Or, you could download the original data, and convert the XML files to LAB files using jordan@2: % the included Ruby script xml2lab.rb. jordan@1: % ep_dir = fullfile(public_dir,'ep_groundtruth/groundtruth'); jordan@1: jordan@1: % IRISA jordan@1: irisa_dirs = {fullfile(public_dir,'IRISA.RWC-MDB-P-2001.BLOCKS'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_full')}; jordan@1: jordan@1: % TUT Beatles jordan@1: fullfile(public_dir,'TUT','*'); jordan@1: [tmp tutfiles] = fileattrib(fullfile(public_dir,'TUT','*')); jordan@1: tut_dirs = {}; jordan@1: for i=1:length(tutfiles), jordan@1: if tutfiles(i).directory==1, jordan@1: tut_dirs{end+1} = tutfiles(i).Name; jordan@1: end jordan@1: end jordan@1: jordan@1: % UPF Beatles jordan@1: upf_dirs = {fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/01_-_Please_please_me_1963'), fullfile(public_dir,' /Users/jordan/Desktop/MIREX_data/public_data/02_-_With_The_Beatles_1963'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/03_-_A_hard_days_night_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/04_-_Beatles_for_sale_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/05_-_Help_1965'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/06_-_Rubber_Soul'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/07_-_Revolver'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/08_-_Sgt._Pepper''s_Lonely_Hearts_Club_Band'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/09_-_Magical_Mystery_Tour'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD1'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD2'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/11_-_Abbey_Road'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/12_-_Let_it_Be')}; jordan@1: jordan@1: % SALAMI jordan@1: salami_dir = fullfile(public_dir,'SALAMI_data_v1.2/data'); jordan@1: jordan@1: dset_origin = []; jordan@1: jordan@1: publictruth = {}; jordan@1: jordan@1: % Load RWC data jordan@1: for i=1:length(rwc_dirs), jordan@1: [tmp all_files tmp1] = fileattrib(strcat(rwc_dirs{i},'/*')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory==0, jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 1 i]; % NB: This '1' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: % NB: many flags will be thrown here because many of the RWC files are empty. jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load Isophonics data jordan@1: for i=1:length(qm_dirs), jordan@1: tmp_dir_name = fullfile(qm_dirs{i},'seglab','*'); jordan@1: [tmp all_files tmp1] = fileattrib(tmp_dir_name); jordan@1: for j=1:length(all_files), jordan@1: [tmp1 tmp2 tmp_file_extension] = fileparts(all_files(j).Name); jordan@1: if all_files(j).directory==0 & all_files(j).GroupRead==1 & isequal(tmp_file_extension,'.lab'), jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 2 i]; % NB: This '2' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load EP data jordan@1: [tmp all_files tmp1] = fileattrib(strcat(ep_dir,'/*.txt')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory==0 & all_files(j).GroupRead==1, jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 3 1]; % NB: This '3' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf(publictruth(end).file) jordan@1: fprintf('\n') jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load IRISA data jordan@1: for i=1:length(irisa_dirs), jordan@1: [tmp all_files tmp1] = fileattrib(strcat(irisa_dirs{i},'/*.lab')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory==0 & all_files(j).GroupRead==1, jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 4 i]; % NB: This '4' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load TUT data jordan@1: for i=1:length(tut_dirs), jordan@1: [tmp all_files tmp1] = fileattrib(strcat(tut_dirs{i},'/*.lab')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory==0 & all_files(j).GroupRead==1, jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 5 i]; % NB: This '5' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load SALAMI data jordan@1: [tmp all_files tmp1] = fileattrib(strcat(salami_dir,'/*')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory == 0 & all_files(j).GroupRead==1, jordan@1: if strcmp(all_files(3).Name(end-12:end),'uppercase.txt'), jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 6 1]; % NB: This '6' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Would you believe that in some of the annotations, two times are in the wrong order? It is simply appalling. jordan@1: % We fix this here. jordan@1: for i=1:length(publictruth), jordan@1: if ~isequal(publictruth(i).tim,sort(publictruth(i).tim)), jordan@1: publictruth(i).tim = sort(publictruth(i).tim); jordan@1: fprintf('Fixed order of time points in this file:%s\n',publictruth(i).file) jordan@1: end jordan@1: end