annotate collect_all_public_annotations.m @ 1:818a4b5f3384

Initial version, includes all files Deleted previous stuff which was junk
author Jordan Smith <jordan.smith@eecs.qmul.ac.uk>
date Fri, 20 Sep 2013 16:36:45 +0100
parents
children 624231da830b
rev   line source
jordan@1 1 function [publictruth dset_origin] = collect_all_public_annotations(base_directory)
jordan@1 2 % function function [data dset_origin] = collect_all_public_annotations(base_directory)
jordan@1 3 %
jordan@1 4 % GET ALL THE DATA!
jordan@1 5 % This function collects annotations from many public repositories of structural analyses.
jordan@1 6 % Annotation data (onsets and labels) all go in a single structure, including file
jordan@1 7 % locations. Refer to the README file to see what these repositories are and how to
jordan@1 8 % download them.
jordan@1 9 %
jordan@1 10 % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb",
jordan@1 11 % or whatever directory contains all the downloaded MIREX data. For example:
jordan@1 12 % "/Users/jordan/Desktop/MIREX_data"
jordan@1 13 %
jordan@1 14 % Before running this script, you must have downloaded the original repositories
jordan@1 15 % to the "mirex_path" directory, and unzipped them. If you did that, then this script
jordan@1 16 % should be able to find and interpret all the annotations. (Except for those in .xml
jordan@1 17 % format, which should be pre-processed. Again, refer to the README.)
jordan@1 18 %
jordan@1 19 % The output DATA structure contains the following fields for the ith song:
jordan@1 20 %
jordan@1 21 % DATA(i).TIM = onset times of annotation
jordan@1 22 % DATA(i).LAB = labels of sections
jordan@1 23 % DATA(i).FILE = file from which the above information derives
jordan@1 24 % DATA(i).DSET = numerical indices of the main dataset (e.g., QM, RWC, etc.) and the
jordan@1 25 % subset (e.g., within QM: 'CaroleKing', 'Queen', etc.)
jordan@1 26 %
jordan@1 27 % Dependencies:
jordan@1 28 % - load_annotation.m
jordan@1 29
jordan@1 30 if nargin<1,
jordan@1 31 base_directory = '/Users/jordan/Desktop/MIREX_data'
jordan@1 32 end
jordan@1 33
jordan@1 34 public_dir = fullfile(base_directory,'public_data');
jordan@1 35
jordan@1 36 % Assemble lists of all the directories where the data live. This section is very hacky!!!
jordan@1 37
jordan@1 38 % RWC
jordan@1 39 rwc_dirs = {fullfile(public_dir,'AIST.RWC-MDB-C-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-G-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-J-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-P-2001.CHORUS')};
jordan@1 40
jordan@1 41 % QM, i.e., Isophonics data from Queen Mary
jordan@1 42 qm_dirs = {fullfile(public_dir,'Carole%20King%20Annotations'), fullfile(public_dir,'Michael%20Jackson%20Annotations'), fullfile(public_dir,'Queen%20Annotations'), fullfile(public_dir,'The%20Beatles%20Annotations'), fullfile(public_dir,'Zweieck%20Annotations')};
jordan@1 43
jordan@1 44 % EP, i.e., data released by Ewald Peiszer
jordan@1 45 ep_dir = fullfile(public_dir,'ep_groundtruth_txt/groundtruth');
jordan@1 46 % Or, you could download the original data, and convert the XML files to LAB files using
jordan@1 47 % the Ruby script xml2lab.rb.
jordan@1 48 % ep_dir = fullfile(public_dir,'ep_groundtruth/groundtruth');
jordan@1 49
jordan@1 50 % IRISA
jordan@1 51 irisa_dirs = {fullfile(public_dir,'IRISA.RWC-MDB-P-2001.BLOCKS'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_full')};
jordan@1 52
jordan@1 53 % TUT Beatles
jordan@1 54 fullfile(public_dir,'TUT','*');
jordan@1 55 [tmp tutfiles] = fileattrib(fullfile(public_dir,'TUT','*'));
jordan@1 56 tut_dirs = {};
jordan@1 57 for i=1:length(tutfiles),
jordan@1 58 if tutfiles(i).directory==1,
jordan@1 59 tut_dirs{end+1} = tutfiles(i).Name;
jordan@1 60 end
jordan@1 61 end
jordan@1 62
jordan@1 63 % UPF Beatles
jordan@1 64 upf_dirs = {fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/01_-_Please_please_me_1963'), fullfile(public_dir,' /Users/jordan/Desktop/MIREX_data/public_data/02_-_With_The_Beatles_1963'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/03_-_A_hard_days_night_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/04_-_Beatles_for_sale_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/05_-_Help_1965'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/06_-_Rubber_Soul'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/07_-_Revolver'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/08_-_Sgt._Pepper''s_Lonely_Hearts_Club_Band'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/09_-_Magical_Mystery_Tour'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD1'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD2'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/11_-_Abbey_Road'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/12_-_Let_it_Be')};
jordan@1 65
jordan@1 66 % SALAMI
jordan@1 67 salami_dir = fullfile(public_dir,'SALAMI_data_v1.2/data');
jordan@1 68
jordan@1 69 dset_origin = [];
jordan@1 70
jordan@1 71 publictruth = {};
jordan@1 72
jordan@1 73 % Load RWC data
jordan@1 74 for i=1:length(rwc_dirs),
jordan@1 75 [tmp all_files tmp1] = fileattrib(strcat(rwc_dirs{i},'/*'));
jordan@1 76 for j=1:length(all_files),
jordan@1 77 if all_files(j).directory==0,
jordan@1 78 try
jordan@1 79 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
jordan@1 80 publictruth(end).file = all_files(j).Name;
jordan@1 81 dset_origin = [dset_origin; 1 i]; % NB: This '1' is HARD-CODED.
jordan@1 82 if isempty(publictruth(end).tim),
jordan@1 83 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
jordan@1 84 publictruth = publictruth(1:end-1);
jordan@1 85 dset_origin = dset_origin(1:end-1,:);
jordan@1 86 end
jordan@1 87 catch
jordan@1 88 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
jordan@1 89 % NB: many flags will be thrown here because many of the RWC files are empty.
jordan@1 90 end
jordan@1 91 end
jordan@1 92 end
jordan@1 93 end
jordan@1 94
jordan@1 95 % Load Isophonics data
jordan@1 96 for i=1:length(qm_dirs),
jordan@1 97 tmp_dir_name = fullfile(qm_dirs{i},'seglab','*');
jordan@1 98 [tmp all_files tmp1] = fileattrib(tmp_dir_name);
jordan@1 99 for j=1:length(all_files),
jordan@1 100 [tmp1 tmp2 tmp_file_extension] = fileparts(all_files(j).Name);
jordan@1 101 if all_files(j).directory==0 & all_files(j).GroupRead==1 & isequal(tmp_file_extension,'.lab'),
jordan@1 102 try
jordan@1 103 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
jordan@1 104 publictruth(end).file = all_files(j).Name;
jordan@1 105 dset_origin = [dset_origin; 2 i]; % NB: This '2' is HARD-CODED.
jordan@1 106 if isempty(publictruth(end).tim),
jordan@1 107 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
jordan@1 108 publictruth = publictruth(1:end-1);
jordan@1 109 dset_origin = dset_origin(1:end-1,:);
jordan@1 110 end
jordan@1 111 catch
jordan@1 112 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
jordan@1 113 end
jordan@1 114 end
jordan@1 115 end
jordan@1 116 end
jordan@1 117
jordan@1 118 % Load EP data
jordan@1 119 [tmp all_files tmp1] = fileattrib(strcat(ep_dir,'/*.txt'));
jordan@1 120 for j=1:length(all_files),
jordan@1 121 if all_files(j).directory==0 & all_files(j).GroupRead==1,
jordan@1 122 try
jordan@1 123 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column');
jordan@1 124 publictruth(end).file = all_files(j).Name;
jordan@1 125 dset_origin = [dset_origin; 3 1]; % NB: This '3' is HARD-CODED.
jordan@1 126 if isempty(publictruth(end).tim),
jordan@1 127 fprintf(publictruth(end).file)
jordan@1 128 fprintf('\n')
jordan@1 129 publictruth = publictruth(1:end-1);
jordan@1 130 dset_origin = dset_origin(1:end-1,:);
jordan@1 131 end
jordan@1 132 catch
jordan@1 133 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
jordan@1 134 end
jordan@1 135 end
jordan@1 136 end
jordan@1 137
jordan@1 138 % Load IRISA data
jordan@1 139 for i=1:length(irisa_dirs),
jordan@1 140 [tmp all_files tmp1] = fileattrib(strcat(irisa_dirs{i},'/*.lab'));
jordan@1 141 for j=1:length(all_files),
jordan@1 142 if all_files(j).directory==0 & all_files(j).GroupRead==1,
jordan@1 143 try
jordan@1 144 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
jordan@1 145 publictruth(end).file = all_files(j).Name;
jordan@1 146 dset_origin = [dset_origin; 4 i]; % NB: This '4' is HARD-CODED.
jordan@1 147 if isempty(publictruth(end).tim),
jordan@1 148 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
jordan@1 149 publictruth = publictruth(1:end-1);
jordan@1 150 dset_origin = dset_origin(1:end-1,:);
jordan@1 151 end
jordan@1 152 catch
jordan@1 153 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
jordan@1 154 end
jordan@1 155 end
jordan@1 156 end
jordan@1 157 end
jordan@1 158
jordan@1 159 % Load TUT data
jordan@1 160 for i=1:length(tut_dirs),
jordan@1 161 [tmp all_files tmp1] = fileattrib(strcat(tut_dirs{i},'/*.lab'));
jordan@1 162 for j=1:length(all_files),
jordan@1 163 if all_files(j).directory==0 & all_files(j).GroupRead==1,
jordan@1 164 try
jordan@1 165 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
jordan@1 166 publictruth(end).file = all_files(j).Name;
jordan@1 167 dset_origin = [dset_origin; 5 i]; % NB: This '5' is HARD-CODED.
jordan@1 168 if isempty(publictruth(end).tim),
jordan@1 169 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
jordan@1 170 publictruth = publictruth(1:end-1);
jordan@1 171 dset_origin = dset_origin(1:end-1,:);
jordan@1 172 end
jordan@1 173 catch
jordan@1 174 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
jordan@1 175 end
jordan@1 176 end
jordan@1 177 end
jordan@1 178 end
jordan@1 179
jordan@1 180 % Load SALAMI data
jordan@1 181 [tmp all_files tmp1] = fileattrib(strcat(salami_dir,'/*'));
jordan@1 182 for j=1:length(all_files),
jordan@1 183 if all_files(j).directory == 0 & all_files(j).GroupRead==1,
jordan@1 184 if strcmp(all_files(3).Name(end-12:end),'uppercase.txt'),
jordan@1 185 try
jordan@1 186 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column');
jordan@1 187 publictruth(end).file = all_files(j).Name;
jordan@1 188 dset_origin = [dset_origin; 6 1]; % NB: This '6' is HARD-CODED.
jordan@1 189 if isempty(publictruth(end).tim),
jordan@1 190 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
jordan@1 191 publictruth = publictruth(1:end-1);
jordan@1 192 dset_origin = dset_origin(1:end-1,:);
jordan@1 193 end
jordan@1 194 catch
jordan@1 195 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
jordan@1 196 end
jordan@1 197 end
jordan@1 198 end
jordan@1 199 end
jordan@1 200
jordan@1 201 % Would you believe that in some of the annotations, two times are in the wrong order? It is simply appalling.
jordan@1 202 % We fix this here.
jordan@1 203 for i=1:length(publictruth),
jordan@1 204 if ~isequal(publictruth(i).tim,sort(publictruth(i).tim)),
jordan@1 205 publictruth(i).tim = sort(publictruth(i).tim);
jordan@1 206 fprintf('Fixed order of time points in this file:%s\n',publictruth(i).file)
jordan@1 207 end
jordan@1 208 end