jordan@1: function [publictruth dset_origin] = collect_all_public_annotations(base_directory) jordan@1: % function function [data dset_origin] = collect_all_public_annotations(base_directory) jordan@1: % jordan@1: % GET ALL THE DATA! jordan@1: % This function collects annotations from many public repositories of structural analyses. jordan@1: % Annotation data (onsets and labels) all go in a single structure, including file jordan@1: % locations. Refer to the README file to see what these repositories are and how to jordan@1: % download them. jordan@1: % jordan@1: % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb", jordan@1: % or whatever directory contains all the downloaded MIREX data. For example: jordan@2: % "/Users/me/Desktop/MIREX_data" jordan@1: % jordan@1: % Before running this script, you must have downloaded the original repositories jordan@1: % to the "mirex_path" directory, and unzipped them. If you did that, then this script jordan@1: % should be able to find and interpret all the annotations. (Except for those in .xml jordan@1: % format, which should be pre-processed. Again, refer to the README.) jordan@1: % jordan@1: % The output DATA structure contains the following fields for the ith song: jordan@1: % jordan@1: % DATA(i).TIM = onset times of annotation jordan@1: % DATA(i).LAB = labels of sections jordan@1: % DATA(i).FILE = file from which the above information derives jordan@1: % DATA(i).DSET = numerical indices of the main dataset (e.g., QM, RWC, etc.) and the jordan@1: % subset (e.g., within QM: 'CaroleKing', 'Queen', etc.) jordan@1: % jordan@1: % Dependencies: jordan@1: % - load_annotation.m jordan@1: jordan@1: public_dir = fullfile(base_directory,'public_data'); jordan@1: jordan@4: fprintf('OK, now we will be collecting all the public annotations into a data structure. You SHOULD expect a number of errors here, because some of the source annotations you just downloaded are actually empty. Namely, a bunch of the AIST ones: \nAIST.RWC-MDB-C-2001.CHORUS/RM-C025_A.CHORUS.TXT\nAIST.RWC-MDB-C-2001.CHORUS/RM-C025_D.CHORUS.TXT\nAIST.RWC-MDB-G-2001.CHORUS/RM-G040.CHORUS.TXT\nAIST.RWC-MDB-G-2001.CHORUS/RM-G042.CHORUS.TXT\nEtc... a whole bunch.\n') jordan@4: fprintf('It might also tell you it cannot read some README files. Do not worry about this. Finally, you will also see the script notify you that it has fixed some of the points in some Beatles annotations. This is because sometimes the numbers in the file are actually out of order. You can look up one of the songs to see an example. It is not really a big issue.\n\n') jordan@4: jordan@1: % Assemble lists of all the directories where the data live. This section is very hacky!!! jordan@1: jordan@1: % RWC jordan@1: rwc_dirs = {fullfile(public_dir,'AIST.RWC-MDB-C-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-G-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-J-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-P-2001.CHORUS')}; jordan@1: jordan@1: % QM, i.e., Isophonics data from Queen Mary jordan@1: qm_dirs = {fullfile(public_dir,'Carole%20King%20Annotations'), fullfile(public_dir,'Michael%20Jackson%20Annotations'), fullfile(public_dir,'Queen%20Annotations'), fullfile(public_dir,'The%20Beatles%20Annotations'), fullfile(public_dir,'Zweieck%20Annotations')}; jordan@1: jordan@1: % EP, i.e., data released by Ewald Peiszer jordan@1: ep_dir = fullfile(public_dir,'ep_groundtruth_txt/groundtruth'); jordan@1: % Or, you could download the original data, and convert the XML files to LAB files using jordan@2: % the included Ruby script xml2lab.rb. jordan@1: % ep_dir = fullfile(public_dir,'ep_groundtruth/groundtruth'); jordan@1: jordan@1: % IRISA jordan@1: irisa_dirs = {fullfile(public_dir,'IRISA.RWC-MDB-P-2001.BLOCKS'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_full')}; jordan@1: jordan@1: % TUT Beatles jordan@1: fullfile(public_dir,'TUT','*'); jordan@1: [tmp tutfiles] = fileattrib(fullfile(public_dir,'TUT','*')); jordan@1: tut_dirs = {}; jordan@1: for i=1:length(tutfiles), jordan@1: if tutfiles(i).directory==1, jordan@1: tut_dirs{end+1} = tutfiles(i).Name; jordan@1: end jordan@1: end jordan@1: jordan@1: % UPF Beatles jordan@1: upf_dirs = {fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/01_-_Please_please_me_1963'), fullfile(public_dir,' /Users/jordan/Desktop/MIREX_data/public_data/02_-_With_The_Beatles_1963'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/03_-_A_hard_days_night_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/04_-_Beatles_for_sale_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/05_-_Help_1965'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/06_-_Rubber_Soul'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/07_-_Revolver'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/08_-_Sgt._Pepper''s_Lonely_Hearts_Club_Band'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/09_-_Magical_Mystery_Tour'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD1'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD2'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/11_-_Abbey_Road'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/12_-_Let_it_Be')}; jordan@1: jordan@1: % SALAMI jordan@1: salami_dir = fullfile(public_dir,'SALAMI_data_v1.2/data'); jordan@1: jordan@1: dset_origin = []; jordan@1: jordan@1: publictruth = {}; jordan@1: jordan@1: % Load RWC data jordan@1: for i=1:length(rwc_dirs), jordan@1: [tmp all_files tmp1] = fileattrib(strcat(rwc_dirs{i},'/*')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory==0, jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 1 i]; % NB: This '1' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: % NB: many flags will be thrown here because many of the RWC files are empty. jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load Isophonics data jordan@1: for i=1:length(qm_dirs), jordan@1: tmp_dir_name = fullfile(qm_dirs{i},'seglab','*'); jordan@1: [tmp all_files tmp1] = fileattrib(tmp_dir_name); jordan@1: for j=1:length(all_files), jordan@1: [tmp1 tmp2 tmp_file_extension] = fileparts(all_files(j).Name); jordan@1: if all_files(j).directory==0 & all_files(j).GroupRead==1 & isequal(tmp_file_extension,'.lab'), jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 2 i]; % NB: This '2' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load EP data jordan@6: % NOTE WELL: if you encounter an error here, are you sure you moved the file ep_groundtruth_txt.zip to your public_data directory and unzipped it? jordan@1: [tmp all_files tmp1] = fileattrib(strcat(ep_dir,'/*.txt')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory==0 & all_files(j).GroupRead==1, jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 3 1]; % NB: This '3' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf(publictruth(end).file) jordan@1: fprintf('\n') jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load IRISA data jordan@1: for i=1:length(irisa_dirs), jordan@1: [tmp all_files tmp1] = fileattrib(strcat(irisa_dirs{i},'/*.lab')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory==0 & all_files(j).GroupRead==1, jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 4 i]; % NB: This '4' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load TUT data jordan@1: for i=1:length(tut_dirs), jordan@1: [tmp all_files tmp1] = fileattrib(strcat(tut_dirs{i},'/*.lab')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory==0 & all_files(j).GroupRead==1, jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 5 i]; % NB: This '5' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Load SALAMI data jordan@6: % NOTE WELL: if you encounter an error here, are you sure you unzipped the data.zip file *within* the SALAMI data file? jordan@1: [tmp all_files tmp1] = fileattrib(strcat(salami_dir,'/*')); jordan@1: for j=1:length(all_files), jordan@1: if all_files(j).directory == 0 & all_files(j).GroupRead==1, jordan@1: if strcmp(all_files(3).Name(end-12:end),'uppercase.txt'), jordan@1: try jordan@1: [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column'); jordan@1: publictruth(end).file = all_files(j).Name; jordan@1: dset_origin = [dset_origin; 6 1]; % NB: This '6' is HARD-CODED. jordan@1: if isempty(publictruth(end).tim), jordan@1: fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file) jordan@1: publictruth = publictruth(1:end-1); jordan@1: dset_origin = dset_origin(1:end-1,:); jordan@1: end jordan@1: catch jordan@1: fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name); jordan@1: end jordan@1: end jordan@1: end jordan@1: end jordan@1: jordan@1: % Would you believe that in some of the annotations, two times are in the wrong order? It is simply appalling. jordan@1: % We fix this here. jordan@1: for i=1:length(publictruth), jordan@1: if ~isequal(publictruth(i).tim,sort(publictruth(i).tim)), jordan@1: publictruth(i).tim = sort(publictruth(i).tim); jordan@1: fprintf('Fixed order of time points in this file:%s\n',publictruth(i).file) jordan@1: end jordan@1: end jordan@4: jordan@4: fprintf('Phew, OK! If you are worried about these errors, be sure to read the comments printed just before it.\n\n')