jordan@1
|
1 function [publictruth dset_origin] = collect_all_public_annotations(base_directory)
|
jordan@1
|
2 % function function [data dset_origin] = collect_all_public_annotations(base_directory)
|
jordan@1
|
3 %
|
jordan@1
|
4 % GET ALL THE DATA!
|
jordan@1
|
5 % This function collects annotations from many public repositories of structural analyses.
|
jordan@1
|
6 % Annotation data (onsets and labels) all go in a single structure, including file
|
jordan@1
|
7 % locations. Refer to the README file to see what these repositories are and how to
|
jordan@1
|
8 % download them.
|
jordan@1
|
9 %
|
jordan@1
|
10 % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb",
|
jordan@1
|
11 % or whatever directory contains all the downloaded MIREX data. For example:
|
jordan@1
|
12 % "/Users/jordan/Desktop/MIREX_data"
|
jordan@1
|
13 %
|
jordan@1
|
14 % Before running this script, you must have downloaded the original repositories
|
jordan@1
|
15 % to the "mirex_path" directory, and unzipped them. If you did that, then this script
|
jordan@1
|
16 % should be able to find and interpret all the annotations. (Except for those in .xml
|
jordan@1
|
17 % format, which should be pre-processed. Again, refer to the README.)
|
jordan@1
|
18 %
|
jordan@1
|
19 % The output DATA structure contains the following fields for the ith song:
|
jordan@1
|
20 %
|
jordan@1
|
21 % DATA(i).TIM = onset times of annotation
|
jordan@1
|
22 % DATA(i).LAB = labels of sections
|
jordan@1
|
23 % DATA(i).FILE = file from which the above information derives
|
jordan@1
|
24 % DATA(i).DSET = numerical indices of the main dataset (e.g., QM, RWC, etc.) and the
|
jordan@1
|
25 % subset (e.g., within QM: 'CaroleKing', 'Queen', etc.)
|
jordan@1
|
26 %
|
jordan@1
|
27 % Dependencies:
|
jordan@1
|
28 % - load_annotation.m
|
jordan@1
|
29
|
jordan@1
|
30 if nargin<1,
|
jordan@1
|
31 base_directory = '/Users/jordan/Desktop/MIREX_data'
|
jordan@1
|
32 end
|
jordan@1
|
33
|
jordan@1
|
34 public_dir = fullfile(base_directory,'public_data');
|
jordan@1
|
35
|
jordan@1
|
36 % Assemble lists of all the directories where the data live. This section is very hacky!!!
|
jordan@1
|
37
|
jordan@1
|
38 % RWC
|
jordan@1
|
39 rwc_dirs = {fullfile(public_dir,'AIST.RWC-MDB-C-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-G-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-J-2001.CHORUS'), fullfile(public_dir,'AIST.RWC-MDB-P-2001.CHORUS')};
|
jordan@1
|
40
|
jordan@1
|
41 % QM, i.e., Isophonics data from Queen Mary
|
jordan@1
|
42 qm_dirs = {fullfile(public_dir,'Carole%20King%20Annotations'), fullfile(public_dir,'Michael%20Jackson%20Annotations'), fullfile(public_dir,'Queen%20Annotations'), fullfile(public_dir,'The%20Beatles%20Annotations'), fullfile(public_dir,'Zweieck%20Annotations')};
|
jordan@1
|
43
|
jordan@1
|
44 % EP, i.e., data released by Ewald Peiszer
|
jordan@1
|
45 ep_dir = fullfile(public_dir,'ep_groundtruth_txt/groundtruth');
|
jordan@1
|
46 % Or, you could download the original data, and convert the XML files to LAB files using
|
jordan@1
|
47 % the Ruby script xml2lab.rb.
|
jordan@1
|
48 % ep_dir = fullfile(public_dir,'ep_groundtruth/groundtruth');
|
jordan@1
|
49
|
jordan@1
|
50 % IRISA
|
jordan@1
|
51 irisa_dirs = {fullfile(public_dir,'IRISA.RWC-MDB-P-2001.BLOCKS'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_reduced'), fullfile(public_dir,'IRISA.RWC-MDB-P-2012.SEMLAB_v003_full')};
|
jordan@1
|
52
|
jordan@1
|
53 % TUT Beatles
|
jordan@1
|
54 fullfile(public_dir,'TUT','*');
|
jordan@1
|
55 [tmp tutfiles] = fileattrib(fullfile(public_dir,'TUT','*'));
|
jordan@1
|
56 tut_dirs = {};
|
jordan@1
|
57 for i=1:length(tutfiles),
|
jordan@1
|
58 if tutfiles(i).directory==1,
|
jordan@1
|
59 tut_dirs{end+1} = tutfiles(i).Name;
|
jordan@1
|
60 end
|
jordan@1
|
61 end
|
jordan@1
|
62
|
jordan@1
|
63 % UPF Beatles
|
jordan@1
|
64 upf_dirs = {fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/01_-_Please_please_me_1963'), fullfile(public_dir,' /Users/jordan/Desktop/MIREX_data/public_data/02_-_With_The_Beatles_1963'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/03_-_A_hard_days_night_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/04_-_Beatles_for_sale_1964'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/05_-_Help_1965'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/06_-_Rubber_Soul'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/07_-_Revolver'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/08_-_Sgt._Pepper''s_Lonely_Hearts_Club_Band'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/09_-_Magical_Mystery_Tour'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD1'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/10_-_The_Beatles\ \(White\ Album\)\ CD2'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/11_-_Abbey_Road'), fullfile(public_dir,'/Users/jordan/Desktop/MIREX_data/public_data/12_-_Let_it_Be')};
|
jordan@1
|
65
|
jordan@1
|
66 % SALAMI
|
jordan@1
|
67 salami_dir = fullfile(public_dir,'SALAMI_data_v1.2/data');
|
jordan@1
|
68
|
jordan@1
|
69 dset_origin = [];
|
jordan@1
|
70
|
jordan@1
|
71 publictruth = {};
|
jordan@1
|
72
|
jordan@1
|
73 % Load RWC data
|
jordan@1
|
74 for i=1:length(rwc_dirs),
|
jordan@1
|
75 [tmp all_files tmp1] = fileattrib(strcat(rwc_dirs{i},'/*'));
|
jordan@1
|
76 for j=1:length(all_files),
|
jordan@1
|
77 if all_files(j).directory==0,
|
jordan@1
|
78 try
|
jordan@1
|
79 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
|
jordan@1
|
80 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
81 dset_origin = [dset_origin; 1 i]; % NB: This '1' is HARD-CODED.
|
jordan@1
|
82 if isempty(publictruth(end).tim),
|
jordan@1
|
83 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
84 publictruth = publictruth(1:end-1);
|
jordan@1
|
85 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
86 end
|
jordan@1
|
87 catch
|
jordan@1
|
88 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
89 % NB: many flags will be thrown here because many of the RWC files are empty.
|
jordan@1
|
90 end
|
jordan@1
|
91 end
|
jordan@1
|
92 end
|
jordan@1
|
93 end
|
jordan@1
|
94
|
jordan@1
|
95 % Load Isophonics data
|
jordan@1
|
96 for i=1:length(qm_dirs),
|
jordan@1
|
97 tmp_dir_name = fullfile(qm_dirs{i},'seglab','*');
|
jordan@1
|
98 [tmp all_files tmp1] = fileattrib(tmp_dir_name);
|
jordan@1
|
99 for j=1:length(all_files),
|
jordan@1
|
100 [tmp1 tmp2 tmp_file_extension] = fileparts(all_files(j).Name);
|
jordan@1
|
101 if all_files(j).directory==0 & all_files(j).GroupRead==1 & isequal(tmp_file_extension,'.lab'),
|
jordan@1
|
102 try
|
jordan@1
|
103 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
|
jordan@1
|
104 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
105 dset_origin = [dset_origin; 2 i]; % NB: This '2' is HARD-CODED.
|
jordan@1
|
106 if isempty(publictruth(end).tim),
|
jordan@1
|
107 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
108 publictruth = publictruth(1:end-1);
|
jordan@1
|
109 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
110 end
|
jordan@1
|
111 catch
|
jordan@1
|
112 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
113 end
|
jordan@1
|
114 end
|
jordan@1
|
115 end
|
jordan@1
|
116 end
|
jordan@1
|
117
|
jordan@1
|
118 % Load EP data
|
jordan@1
|
119 [tmp all_files tmp1] = fileattrib(strcat(ep_dir,'/*.txt'));
|
jordan@1
|
120 for j=1:length(all_files),
|
jordan@1
|
121 if all_files(j).directory==0 & all_files(j).GroupRead==1,
|
jordan@1
|
122 try
|
jordan@1
|
123 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column');
|
jordan@1
|
124 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
125 dset_origin = [dset_origin; 3 1]; % NB: This '3' is HARD-CODED.
|
jordan@1
|
126 if isempty(publictruth(end).tim),
|
jordan@1
|
127 fprintf(publictruth(end).file)
|
jordan@1
|
128 fprintf('\n')
|
jordan@1
|
129 publictruth = publictruth(1:end-1);
|
jordan@1
|
130 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
131 end
|
jordan@1
|
132 catch
|
jordan@1
|
133 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
134 end
|
jordan@1
|
135 end
|
jordan@1
|
136 end
|
jordan@1
|
137
|
jordan@1
|
138 % Load IRISA data
|
jordan@1
|
139 for i=1:length(irisa_dirs),
|
jordan@1
|
140 [tmp all_files tmp1] = fileattrib(strcat(irisa_dirs{i},'/*.lab'));
|
jordan@1
|
141 for j=1:length(all_files),
|
jordan@1
|
142 if all_files(j).directory==0 & all_files(j).GroupRead==1,
|
jordan@1
|
143 try
|
jordan@1
|
144 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
|
jordan@1
|
145 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
146 dset_origin = [dset_origin; 4 i]; % NB: This '4' is HARD-CODED.
|
jordan@1
|
147 if isempty(publictruth(end).tim),
|
jordan@1
|
148 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
149 publictruth = publictruth(1:end-1);
|
jordan@1
|
150 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
151 end
|
jordan@1
|
152 catch
|
jordan@1
|
153 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
154 end
|
jordan@1
|
155 end
|
jordan@1
|
156 end
|
jordan@1
|
157 end
|
jordan@1
|
158
|
jordan@1
|
159 % Load TUT data
|
jordan@1
|
160 for i=1:length(tut_dirs),
|
jordan@1
|
161 [tmp all_files tmp1] = fileattrib(strcat(tut_dirs{i},'/*.lab'));
|
jordan@1
|
162 for j=1:length(all_files),
|
jordan@1
|
163 if all_files(j).directory==0 & all_files(j).GroupRead==1,
|
jordan@1
|
164 try
|
jordan@1
|
165 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'lab');
|
jordan@1
|
166 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
167 dset_origin = [dset_origin; 5 i]; % NB: This '5' is HARD-CODED.
|
jordan@1
|
168 if isempty(publictruth(end).tim),
|
jordan@1
|
169 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
170 publictruth = publictruth(1:end-1);
|
jordan@1
|
171 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
172 end
|
jordan@1
|
173 catch
|
jordan@1
|
174 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
175 end
|
jordan@1
|
176 end
|
jordan@1
|
177 end
|
jordan@1
|
178 end
|
jordan@1
|
179
|
jordan@1
|
180 % Load SALAMI data
|
jordan@1
|
181 [tmp all_files tmp1] = fileattrib(strcat(salami_dir,'/*'));
|
jordan@1
|
182 for j=1:length(all_files),
|
jordan@1
|
183 if all_files(j).directory == 0 & all_files(j).GroupRead==1,
|
jordan@1
|
184 if strcmp(all_files(3).Name(end-12:end),'uppercase.txt'),
|
jordan@1
|
185 try
|
jordan@1
|
186 [publictruth(end+1).tim publictruth(end+1).lab] = load_annotation(all_files(j).Name,'two_column');
|
jordan@1
|
187 publictruth(end).file = all_files(j).Name;
|
jordan@1
|
188 dset_origin = [dset_origin; 6 1]; % NB: This '6' is HARD-CODED.
|
jordan@1
|
189 if isempty(publictruth(end).tim),
|
jordan@1
|
190 fprintf('The following file appears to be empty:\n %s\n',publictruth(end).file)
|
jordan@1
|
191 publictruth = publictruth(1:end-1);
|
jordan@1
|
192 dset_origin = dset_origin(1:end-1,:);
|
jordan@1
|
193 end
|
jordan@1
|
194 catch
|
jordan@1
|
195 fprintf('Error opening or reading the following file. (It might be empty, or not a song file.)\n %s\n',all_files(j).Name);
|
jordan@1
|
196 end
|
jordan@1
|
197 end
|
jordan@1
|
198 end
|
jordan@1
|
199 end
|
jordan@1
|
200
|
jordan@1
|
201 % Would you believe that in some of the annotations, two times are in the wrong order? It is simply appalling.
|
jordan@1
|
202 % We fix this here.
|
jordan@1
|
203 for i=1:length(publictruth),
|
jordan@1
|
204 if ~isequal(publictruth(i).tim,sort(publictruth(i).tim)),
|
jordan@1
|
205 publictruth(i).tim = sort(publictruth(i).tim);
|
jordan@1
|
206 fprintf('Fixed order of time points in this file:%s\n',publictruth(i).file)
|
jordan@1
|
207 end
|
jordan@1
|
208 end
|