jordan@1
|
1 function [publictruth dset_origin] = collect_all_public_annotations(base_directory)
|
jordan@1
|
2 % function function [data dset_origin] = collect_all_public_annotations(base_directory)
|
jordan@1
|
3 %
|
jordan@1
|
4 % GET ALL THE DATA!
|
jordan@1
|
5 % This function collects annotations from many public repositories of structural analyses.
|
jordan@1
|
6 % Annotation data (onsets and labels) all go in a single structure, including file
|
jordan@1
|
7 % locations. Refer to the README file to see what these repositories are and how to
|
jordan@1
|
8 % download them.
|
jordan@1
|
9 %
|
jordan@1
|
10 % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb",
|
jordan@1
|
11 % or whatever directory contains all the downloaded MIREX data. For example:
|
jordan@2
|
12 % "/Users/me/Desktop/MIREX_data"
|
jordan@1
|
13 %
|
jordan@1
|
14 % Before running this script, you must have downloaded the original repositories
|
jordan@1
|
15 % to the "mirex_path" directory, and unzipped them. If you did that, then this script
|
jordan@1
|
16 % should be able to find and interpret all the annotations. (Except for those in .xml
|
jordan@1
|
17 % format, which should be pre-processed. Again, refer to the README.)
|
jordan@1
|
18 %
|
jordan@1
|
19 % The output DATA structure contains the following fields for the ith song:
|
jordan@1
|
20 %
|
jordan@1
|
21 % DATA(i).TIM = onset times of annotation
|
jordan@1
|
22 % DATA(i).LAB = labels of sections
|
jordan@1
|
23 % DATA(i).FILE = file from which the above information derives
|
jordan@1
|
24 % DATA(i).DSET = numerical indices of the main dataset (e.g., QM, RWC, etc.) and the
|
jordan@1
|
25 % subset (e.g., within QM: 'CaroleKing', 'Queen', etc.)
|
jordan@1
|
26 %
|
jordan@1
|
27 % Dependencies:
|
jordan@1
|
28 % - load_annotation.m
|
jordan@1
|
29
|
jordan@1
|
30 public_dir = fullfile(base_directory,'public_data');
|
jordan@1
|
31
|
jordan@4
|
32 fprintf('OK, now we will be collecting all the public annotations into a data structure. You SHOULD expect a number of errors here, because some of the source annotations you just downloaded are actually empty. Namely, a bunch of the AIST ones: \nAIST.RWC-MDB-C-2001.CHORUS/RM-C025_A.CHORUS.TXT\nAIST.RWC-MDB-C-2001.CHORUS/RM-C025_D.CHORUS.TXT\nAIST.RWC-MDB-G-2001.CHORUS/RM-G040.CHORUS.TXT\nAIST.RWC-MDB-G-2001.CHORUS/RM-G042.CHORUS.TXT\nEtc... a whole bunch.\n')
|
jordan@4
|
33 fprintf('It might also tell you it cannot read some README files. Do not worry about this. Finally, you will also see the script notify you that it has fixed some of the points in some Beatles annotations. This is because sometimes the numbers in the file are actually out of order. You can look up one of the songs to see an example. It is not really a big issue.\n\n')
|
jordan@4
|
34
|
jordan@1
|
35 % Assemble lists of all the directories where the data live. This section is very hacky!!!
|
jordan@1
|
36
|
jordan@1
|
37 % RWC
|
jordan@1
|
38 rwc_dirs = {fullfile(public_dir,'AIST.RWC-MDB-C-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-G-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-J-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-P-2001.CHORUS')};
|
jordan@1
|
39
|
jordan@1
|
40 % QM, i.e., Isophonics data from Queen Mary
|
jordan@1
|
41 qm_dirs = {fullfile(public_dir,'Carole%20King%20Annotations'), fullfile(public_dir,'Michael%20Jackson%20Annotations'), fullfile(public_dir,'Queen%20Annotations'), fullfile(public_dir,'The%20Beatles%20Annotations'), fullfile(public_dir,'Zweieck%20Annotations')};
|
jordan@1
|
42
|
jordan@1
|
43 % EP, i.e., data released by Ewald Peiszer
|
jordan@1
|
44 ep_dir = fullfile(public_dir,'ep_groundtruth_txt/groundtruth');
|
jordan@1
|
45 % Or, you could download the original data, and convert the XML files to LAB files using
|
jordan@2
|
46 % the included Ruby script xml2lab.rb.
|
jordan@1
|
47 % ep_dir = fullfile(public_dir,'ep_groundtruth/groundtruth');
|
jordan@1
|
48
|
jordan@1
|
49 % IRISA
|
jordan@1
|
50 irisa_dirs = {fullfile(public_dir,'IRISA.RWC-MDB-P-2001.BLOCKS'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_full')};
|
jordan@1
|
51
|
jordan@1
|
52 % TUT Beatles
|
jordan@1
|
53 fullfile(public_dir,'TUT','*');
|
jordan@1
|
54 [tmp tutfiles] = fileattrib(fullfile(public_dir,'TUT','*'));
|
jordan@1
|
55 tut_dirs = {};
|
jordan@1
|
56 for i=1:length(tutfiles),
|
jordan@1
|
57 if tutfiles(i).directory==1,
|
jordan@1
|
58 tut_dirs{end+1} = tutfiles(i).Name;
|
jordan@1
|
59 end
|
jordan@1
|
60 end
|
jordan@1
|
61
|
jordan@1
|
62 % UPF Beatles
|
jordan@1
|
63 upf_dirs = {fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/01_-_Please_please_me_1963'), fullfile(public_dir,' /Users/jordan/Desktop/MIREX_data/public_data/02_-_With_The_Beatles_1963'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/03_-_A_hard_days_night_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/04_-_Beatles_for_sale_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/05_-_Help_1965'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/06_-_Rubber_Soul'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/07_-_Revolver'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/08_-_Sgt._Pepper''s_Lonely_Hearts_Club_Band'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/09_-_Magical_Mystery_Tour'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD1'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD2'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/11_-_Abbey_Road'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/12_-_Let_it_Be')};
|
jordan@1
|
64
|
jordan@1
|
65 % SALAMI
|
jordan@1
|
66 salami_dir = fullfile(public_dir,'SALAMI_data_v1.2/data');
|
jordan@1
|
67
|
jordan@1
|
68 dset_origin = [];
|
jordan@1
|
69
|
jordan@1
|
70 publictruth = {};
|
jordan@1
|
71
|
jordan@1
|
72 % Load RWC data
|
jordan@1
|
73 for i=1:length(rwc_dirs),
|
jordan@1
|
74 [tmp all_files tmp1] = fileattrib(strcat(rwc_dirs{i},'/*'));
|
jordan@1
|
75 for j=1:length(all_files),
|
jordan@1
|
76 if all_files(j).directory==0,
|
jordan@1
|
77 try
|
jordan@1
|
78 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
|
jordan@1
|
79 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
80 dset_origin = [dset_origin; 1 i]; % NB: This '1' is HARD-CODED.
|
jordan@1
|
81 if isempty(publictruth(end).tim),
|
jordan@1
|
82 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
83 publictruth = publictruth(1:end-1);
|
jordan@1
|
84 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
85 end
|
jordan@1
|
86 catch
|
jordan@1
|
87 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
88 % NB: many flags will be thrown here because many of the RWC files are empty.
|
jordan@1
|
89 end
|
jordan@1
|
90 end
|
jordan@1
|
91 end
|
jordan@1
|
92 end
|
jordan@1
|
93
|
jordan@1
|
94 % Load Isophonics data
|
jordan@1
|
95 for i=1:length(qm_dirs),
|
jordan@1
|
96 tmp_dir_name = fullfile(qm_dirs{i},'seglab','*');
|
jordan@1
|
97 [tmp all_files tmp1] = fileattrib(tmp_dir_name);
|
jordan@1
|
98 for j=1:length(all_files),
|
jordan@1
|
99 [tmp1 tmp2 tmp_file_extension] = fileparts(all_files(j).Name);
|
jordan@1
|
100 if all_files(j).directory==0 & all_files(j).GroupRead==1 & isequal(tmp_file_extension,'.lab'),
|
jordan@1
|
101 try
|
jordan@1
|
102 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
|
jordan@1
|
103 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
104 dset_origin = [dset_origin; 2 i]; % NB: This '2' is HARD-CODED.
|
jordan@1
|
105 if isempty(publictruth(end).tim),
|
jordan@1
|
106 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
107 publictruth = publictruth(1:end-1);
|
jordan@1
|
108 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
109 end
|
jordan@1
|
110 catch
|
jordan@1
|
111 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
112 end
|
jordan@1
|
113 end
|
jordan@1
|
114 end
|
jordan@1
|
115 end
|
jordan@1
|
116
|
jordan@1
|
117 % Load EP data
|
jordan@6
|
118 % NOTE WELL: if you encounter an error here, are you sure you moved the file ep_groundtruth_txt.zip to your public_data directory and unzipped it?
|
jordan@1
|
119 [tmp all_files tmp1] = fileattrib(strcat(ep_dir,'/*.txt'));
|
jordan@1
|
120 for j=1:length(all_files),
|
jordan@1
|
121 if all_files(j).directory==0 & all_files(j).GroupRead==1,
|
jordan@1
|
122 try
|
jordan@1
|
123 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column');
|
jordan@1
|
124 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
125 dset_origin = [dset_origin; 3 1]; % NB: This '3' is HARD-CODED.
|
jordan@1
|
126 if isempty(publictruth(end).tim),
|
jordan@1
|
127 fprintf(publictruth(end).file)
|
jordan@1
|
128 fprintf('\n')
|
jordan@1
|
129 publictruth = publictruth(1:end-1);
|
jordan@1
|
130 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
131 end
|
jordan@1
|
132 catch
|
jordan@1
|
133 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
134 end
|
jordan@1
|
135 end
|
jordan@1
|
136 end
|
jordan@1
|
137
|
jordan@1
|
138 % Load IRISA data
|
jordan@1
|
139 for i=1:length(irisa_dirs),
|
jordan@1
|
140 [tmp all_files tmp1] = fileattrib(strcat(irisa_dirs{i},'/*.lab'));
|
jordan@1
|
141 for j=1:length(all_files),
|
jordan@1
|
142 if all_files(j).directory==0 & all_files(j).GroupRead==1,
|
jordan@1
|
143 try
|
jordan@1
|
144 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
|
jordan@1
|
145 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
146 dset_origin = [dset_origin; 4 i]; % NB: This '4' is HARD-CODED.
|
jordan@1
|
147 if isempty(publictruth(end).tim),
|
jordan@1
|
148 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
149 publictruth = publictruth(1:end-1);
|
jordan@1
|
150 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
151 end
|
jordan@1
|
152 catch
|
jordan@1
|
153 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
154 end
|
jordan@1
|
155 end
|
jordan@1
|
156 end
|
jordan@1
|
157 end
|
jordan@1
|
158
|
jordan@1
|
159 % Load TUT data
|
jordan@1
|
160 for i=1:length(tut_dirs),
|
jordan@1
|
161 [tmp all_files tmp1] = fileattrib(strcat(tut_dirs{i},'/*.lab'));
|
jordan@1
|
162 for j=1:length(all_files),
|
jordan@1
|
163 if all_files(j).directory==0 & all_files(j).GroupRead==1,
|
jordan@1
|
164 try
|
jordan@1
|
165 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
|
jordan@1
|
166 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
167 dset_origin = [dset_origin; 5 i]; % NB: This '5' is HARD-CODED.
|
jordan@1
|
168 if isempty(publictruth(end).tim),
|
jordan@1
|
169 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
170 publictruth = publictruth(1:end-1);
|
jordan@1
|
171 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
172 end
|
jordan@1
|
173 catch
|
jordan@1
|
174 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
175 end
|
jordan@1
|
176 end
|
jordan@1
|
177 end
|
jordan@1
|
178 end
|
jordan@1
|
179
|
jordan@1
|
180 % Load SALAMI data
|
jordan@6
|
181 % NOTE WELL: if you encounter an error here, are you sure you unzipped the data.zip file *within* the SALAMI data file?
|
jordan@1
|
182 [tmp all_files tmp1] = fileattrib(strcat(salami_dir,'/*'));
|
jordan@1
|
183 for j=1:length(all_files),
|
jordan@1
|
184 if all_files(j).directory == 0 & all_files(j).GroupRead==1,
|
jordan@1
|
185 if strcmp(all_files(3).Name(end-12:end),'uppercase.txt'),
|
jordan@1
|
186 try
|
jordan@1
|
187 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column');
|
jordan@1
|
188 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
189 dset_origin = [dset_origin; 6 1]; % NB: This '6' is HARD-CODED.
|
jordan@1
|
190 if isempty(publictruth(end).tim),
|
jordan@1
|
191 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
192 publictruth = publictruth(1:end-1);
|
jordan@1
|
193 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
194 end
|
jordan@1
|
195 catch
|
jordan@1
|
196 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
197 end
|
jordan@1
|
198 end
|
jordan@1
|
199 end
|
jordan@1
|
200 end
|
jordan@1
|
201
|
jordan@1
|
202 % Would you believe that in some of the annotations, two times are in the wrong order? It is simply appalling.
|
jordan@1
|
203 % We fix this here.
|
jordan@1
|
204 for i=1:length(publictruth),
|
jordan@1
|
205 if ~isequal(publictruth(i).tim,sort(publictruth(i).tim)),
|
jordan@1
|
206 publictruth(i).tim = sort(publictruth(i).tim);
|
jordan@1
|
207 fprintf('Fixed order of time points in this file:%s\n',publictruth(i).file)
|
jordan@1
|
208 end
|
jordan@1
|
209 end
|
jordan@4
|
210
|
jordan@4
|
211 fprintf('Phew, OK! If you are worried about these errors, be sure to read the comments printed just before it.\n\n') |