changeset 2:624231da830b

Removed name from comments. Updated readme extensively. Renamed 2 files without significant changes. Added EP data as a bonus.
author Jordan Smith <jordan.smith@eecs.qmul.ac.uk>
date Fri, 20 Sep 2013 17:05:34 +0100
parents 818a4b5f3384
children c5c97558fb2f
files 1-get_mirex_estimates.rb 2-generate_smith2013_ismir.m collect_all_mirex_algo_output_data.m collect_all_mirex_annotations.m collect_all_mirex_results.m collect_all_public_annotations.m do_correlation.m do_correlation3.m do_correlation3_fig3_only.m do_correlation_analyses.m do_correlation_fig3_only.m mirex_data/ep_groundtruth_txt.zip readme.txt xml2txt.rb
diffstat 14 files changed, 302 insertions(+), 304 deletions(-) [+]
line wrap: on
line diff
--- a/1-get_mirex_estimates.rb	Fri Sep 20 16:36:45 2013 +0100
+++ b/1-get_mirex_estimates.rb	Fri Sep 20 17:05:34 2013 +0100
@@ -1,7 +1,8 @@
 require "CSV"
 require "open-uri"
 # require "simplexml"
-mirex_path = "/Users/jordan/Desktop/MIREX_data"    # EDIT THIS TO BE YOUR OWN DESIRED PATH
+mirex_path = "/Users/me/Desktop/MIREX_data"    # EDIT THIS TO BE YOUR OWN DESIRED PATH.
+                                               # IT WILL NEED TO HOLD ROUGHLY 70 MB OF DATA.
 
 def url_download(uri, filename=".")
     tmp = File.open(filename,'w')
--- a/2-generate_smith2013_ismir.m	Fri Sep 20 16:36:45 2013 +0100
+++ b/2-generate_smith2013_ismir.m	Fri Sep 20 17:05:34 2013 +0100
@@ -17,12 +17,26 @@
 % Name the MIREX datasets and algorithms desired.
 dsets = {'mrx09','mrx10_1','mrx10_2','sal'};
 algos = {'KSP1','KSP2','KSP3','MHRAF1','OYZS1','SBV1','SMGA1','SMGA2','SP1'};
-base_directory = '/Users/jordan/Documents/classes/mirex_data/2012';
-base_directory = '/Users/jordan/Desktop/MIREX_data';
+
+% YOU MUST SET THE FOLLOWING PATH YOURSELF!
+% Set it to be the same as the path given at the top of '1-get_mirex_estimates.rb'.
+base_directory = '/Users/me/Desktop/MIREX_data';
+
 % You should get a copy of the evalution scripts in the Code.SoundSoftware
-% repository. Again, please see the README...
-addpath('/Users/jordan/Documents/structural_analysis_evaluation')
+% repository. Wherever you put it, set the following path accordingly:
+addpath('/Users/me/Desktop/whereiputmymatlabfiles/structural_analysis_evaluation')
 
+% Check that we have access to the correct dependencies.
+
+if exist('compare_structures.m')~=2,
+    fprintf('I could not locate ''compare_structures.m'', part of the Structural Analysis Evaluation project. Please read the help for this file before proceeding.\n')
+end
+if exist('load_annotation.m')~=2,
+    fprintf('I could not locate ''load_annotation.m'', part of the Structural Analysis Evaluation project. Please read the help for this file before proceeding.\n')
+end
+if exist('collect_all_mirex_annotations')~=2,
+    fprintf('I could not locate ''collect_all_mirex_annotations.m'', which should be in the same folder as this file. Something really screwed up has happened, clearly! Please read the help for this file before proceeding.\n')
+end
 
 %%
 % STEP 1: Download data from MIREX website:
--- a/collect_all_mirex_algo_output_data.m	Fri Sep 20 16:36:45 2013 +0100
+++ b/collect_all_mirex_algo_output_data.m	Fri Sep 20 17:05:34 2013 +0100
@@ -30,10 +30,6 @@
 % Dependencies:
 %   - load_annotation.m
 
-
-if nargin<1,
-    base_directory = '/Users/jordan/Desktop/MIREX_data';
-end
 if nargin<2,
     dsets = {'mrx09','mrx10_1','mrx10_2','sal'};
 end
--- a/collect_all_mirex_annotations.m	Fri Sep 20 16:36:45 2013 +0100
+++ b/collect_all_mirex_annotations.m	Fri Sep 20 17:05:34 2013 +0100
@@ -8,7 +8,7 @@
 %
 % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb",
 % or whatever directory contains all the downloaded MIREX data. For example:
-% "/Users/jordan/Desktop/MIREX_data"
+% "/Users/me/Desktop/MIREX_data"
 %
 % DSETS should contain the names of the datasets. The default value is all of them:
 %   {'mrx09','mrx10_1','mrx10_2','sal'}
@@ -29,10 +29,6 @@
 % Dependencies:
 %   - load_annotation.m
 
-if nargin<1,
-    base_directory = '/Users/jordan/Desktop/MIREX_data';
-end
-
 % YEAR(i).NAMES will contain the NAMES of all the individual song files from YEAR i.
 year = {};
 
--- a/collect_all_mirex_results.m	Fri Sep 20 16:36:45 2013 +0100
+++ b/collect_all_mirex_results.m	Fri Sep 20 17:05:34 2013 +0100
@@ -6,7 +6,7 @@
 %
 % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb",
 % or whatever directory contains all the downloaded MIREX data. For example:
-% "/Users/jordan/Desktop/MIREX_data"
+% "/Users/me/Desktop/MIREX_data"
 %
 % DSETS should contain the names of the datasets. The default value is all of them:
 %   {'mrx09','mrx10_1','mrx10_2','sal'}
@@ -22,9 +22,6 @@
 % DATA(k).ALGO(j).RESULTS is a matrix giving the results for the kth dataset and
 % the jth algorithm.
 
-if nargin<1,
-    base_directory = '/Users/jordan/Desktop/MIREX_data';
-end
 if nargin<2,
     dsets = {'mrx09','mrx10_1','mrx10_2','sal'};
 end
--- a/collect_all_public_annotations.m	Fri Sep 20 16:36:45 2013 +0100
+++ b/collect_all_public_annotations.m	Fri Sep 20 17:05:34 2013 +0100
@@ -9,7 +9,7 @@
 %
 % BASE_DIRECTORY should be the "mirex_path" specified in "get_mirex_estimates.rb",
 % or whatever directory contains all the downloaded MIREX data. For example:
-% "/Users/jordan/Desktop/MIREX_data"
+% "/Users/me/Desktop/MIREX_data"
 %
 % Before running this script, you must have downloaded the original repositories
 % to the "mirex_path" directory, and unzipped them. If you did that, then this script
@@ -27,10 +27,6 @@
 % Dependencies:
 %   - load_annotation.m
 
-if nargin<1,
-    base_directory = '/Users/jordan/Desktop/MIREX_data'
-end
-
 public_dir = fullfile(base_directory,'public_data');
 
 % Assemble lists of all the directories where the data live. This section is very hacky!!!
@@ -44,7 +40,7 @@
 % EP, i.e., data released by Ewald Peiszer
 ep_dir = fullfile(public_dir,'ep_groundtruth_txt/groundtruth');
 % Or, you could download the original data, and convert the XML files to LAB files using
-% the Ruby script xml2lab.rb.
+% the included Ruby script xml2lab.rb.
 % ep_dir = fullfile(public_dir,'ep_groundtruth/groundtruth');
 
 % IRISA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/do_correlation.m	Fri Sep 20 17:05:34 2013 +0100
@@ -0,0 +1,111 @@
+function [asig pval a a_] = do_correlation(megacube, songs, metrics, algos, algo_groups, merge_algos, merge_songs, merge_dsets, metric_labels, bonferroni)
+
+% function [asig pval a a_] = do_correlation(megacube, songs, metrics, algos, algo_groups, merge_algos, merge_songs, merge_dsets, metric_labels, bonferroni)
+%
+% Script to make and analyze correlation plot.
+% Example usage:
+%    To run your first experiment (Fig 1a) request:
+%    do_correlation(megacube, lab_measures, sind_manual1, [1:9], -1, 0, 1, -1, s_manual1)
+%
+% MEGACUBE is the giant (N songs) x (M metrics) x (L algorithms) matrix of evaluation results.
+% SONGS, METRICS and ALGOS are the indices into these three dimensions desired.
+% ALGO_GROUPS indicates groups of algorithms that should be averaged together rather than counted separately.
+%    (this has not yet been implemented)
+% Set MERGE_ALGOS > 0 in order to compute the median score across algorithms.
+% Set MERGE_SONGS > 0 in order to compute the median score across songs.
+% MERGE_DSETS is also not yet implemented.
+% METRIC_LABELS is a matrix of strings, one for each of the METRICS, for use in plotting.
+% Set BONFERRONI > 0 in order to apply a bonferroni correction of BONFERRONI. (Default value: 0.05.)
+% Note a few hard-coded decisions, such as:
+%   - significance level hard coded as 0.05.
+%   - in the image, decision that tau > 0.8 is strong, tau > 0.33 is weak, and tau < 0.33 is nothing.
+
+% Defaults and hard coding values:
+if nargin<10,
+    bonferroni = 0.05;
+end
+significant_p = 0.05;
+maxtau = 0.8;
+mintau = 0.33;
+
+
+
+tmpcube = megacube(songs,metrics,algos);
+
+% if exist('algo_groups'),
+%     for i=1:length(algo_groups),
+%         merge the groups somehow...
+%     end
+% end
+
+if merge_algos>0,                   % If we merge algorithms, take the median score across algorithms.
+    tmpcube = median(tmpcube,3);
+elseif merge_songs>0,               % If we merge songs, take the median score across songs.
+    tmpcube = median(tmpcube,1);    % Then, resize the matrix to be 2-d:
+    tmpcube = transpose(reshape(tmpcube,size(tmpcube,2),size(tmpcube,3)));
+end
+
+% Compute Kendall tau correlation:
+[a pval] = corr(tmpcube,'type','Kendall');
+% Apply bonferroni correction:
+m = length(a)*(length(a)-1)/2;
+asig = pval<significant_p;
+if bonferroni>0,
+    fprintf('Bonferroni applied.\n')
+    asig = (pval*m)<bonferroni; % This is the matrix of values that are significant.
+end
+a_ = (abs(a)>=maxtau) + (abs(a)>=mintau);
+a_ = tril(a_,-1);
+
+% A contains the correlation values themselves.
+% ASIG is a binary matrix that states whether the correlation is statistically significant.
+% A_ is a matrix of -2, -1, 0, 1 and 2s that says whether a correlation is qualitatively strong (2), qualitatively weak (1), or nada (0).
+% Sometimes values will be statistically significant, but qualitatively insignificant. We do not want to bother looking at these, so
+% let us make our pretty picture carefully.
+
+% The values we display will always be straight from A. The colour we display, to emphasize the strong correlations,
+% should be the element-wise product of A, ASIG, and A_.
+% Also:
+%   Iff tau>0.33 (a_>0), include text.
+%   Iff tau is significant (asig=1), include background.
+%   Iff tau>0.8 (a_=2), put in bold.
+%   Iff tau>0.8 AND tau is significant, invert the color of the text (because the colour will be darker).
+
+img = a_.*a.*asig;
+img = img(2:end,1:end-1);   % ignore the diagonal
+clf
+imagesc(img, [-1 1])
+for i=1:length(a_),
+    for j=1:length(a_),
+        if a_(i,j)>0,
+            % tau is >0.33 so we definitely write the value. need to determine fontface and colour.
+            % if tau>.8, put in bold
+            if abs(a_(i,j))>1,
+                fontw = 'bold';
+            else
+                fontw = 'normal';
+            end
+            if abs(a_(i,j))>1 & asig(i,j)==1,
+                textcolor = [1 1 1];
+            else
+                textcolor = [0 0 0];
+            end
+            % h = text(j-.35,i-1,num2str(a(i,j),2),'Color',textcolor);
+            h = text(j,i-1,sprintf('%.2f',a(i,j)),'Color',textcolor,'FontWeight',fontw,'FontSize',12,'HorizontalAlignment','center');
+            set(h,'HorizontalAlignment','center')
+        end
+    end
+end
+cmap_el = transpose([linspace(.3,1,50)]);
+cmap = repmat(cmap_el,1,3);
+cmap = [cmap; flipud(cmap)];
+% Alternatively:
+cmap = [ones(size(cmap_el)) cmap_el cmap_el; flipud([cmap_el cmap_el ones(size(cmap_el))])];
+colormap(cmap);
+
+set(gca,'YTickLabel',metric_labels(2:end),'YTick',(1:length(a)-1),'FontAngle','italic','FontSize',12)
+set(gca,'XTickLabel',metric_labels(1:end-1),'XTick',(1:length(a)-1),'FontAngle','italic','FontSize',12)
+% set(gcf,'Position',[1000,1000,700,300])
+% set(gca,'XTickLabel',metric_labels(2:2:end),'YTick',(1:length(a)/2))
+
+% axis([0.5, length(a)-.5, 1.5, length(a)+.5])
\ No newline at end of file
--- a/do_correlation3.m	Fri Sep 20 16:36:45 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-function [asig pval a a_] = do_correlation(megacube, songs, metrics, algos, algo_groups, merge_algos, merge_songs, merge_dsets, metric_labels, bonferroni)
-
-% function [asig pval a a_] = do_correlation(megacube, songs, metrics, algos, algo_groups, merge_algos, merge_songs, merge_dsets, metric_labels, bonferroni)
-%
-% Script to make and analyze correlation plot.
-% Example usage:
-%    To run your first experiment (Fig 1a) request:
-%    do_correlation(megacube, lab_measures, sind_manual1, [1:9], -1, 0, 1, -1, s_manual1)
-%
-% MEGACUBE is the giant (N songs) x (M metrics) x (L algorithms) matrix of evaluation results.
-% SONGS, METRICS and ALGOS are the indices into these three dimensions desired.
-% ALGO_GROUPS indicates groups of algorithms that should be averaged together rather than counted separately.
-%    (this has not yet been implemented)
-% Set MERGE_ALGOS > 0 in order to compute the median score across algorithms.
-% Set MERGE_SONGS > 0 in order to compute the median score across songs.
-% MERGE_DSETS is also not yet implemented.
-% METRIC_LABELS is a matrix of strings, one for each of the METRICS, for use in plotting.
-% Set BONFERRONI > 0 in order to apply a bonferroni correction of BONFERRONI. (Default value: 0.05.)
-% Note a few hard-coded decisions, such as:
-%   - significance level hard coded as 0.05.
-%   - in the image, decision that tau > 0.8 is strong, tau > 0.33 is weak, and tau < 0.33 is nothing.
-
-% Defaults and hard coding values:
-if nargin<10,
-    bonferroni = 0.05;
-end
-significant_p = 0.05;
-maxtau = 0.8;
-mintau = 0.33;
-
-
-
-tmpcube = megacube(songs,metrics,algos);
-
-% if exist('algo_groups'),
-%     for i=1:length(algo_groups),
-%         merge the groups somehow...
-%     end
-% end
-
-if merge_algos>0,                   % If we merge algorithms, take the median score across algorithms.
-    tmpcube = median(tmpcube,3);
-elseif merge_songs>0,               % If we merge songs, take the median score across songs.
-    tmpcube = median(tmpcube,1);    % Then, resize the matrix to be 2-d:
-    tmpcube = transpose(reshape(tmpcube,size(tmpcube,2),size(tmpcube,3)));
-end
-
-% Compute Kendall tau correlation:
-[a pval] = corr(tmpcube,'type','Kendall');
-% Apply bonferroni correction:
-m = length(a)*(length(a)-1)/2;
-asig = pval<significant_p;
-if bonferroni>0,
-    fprintf('Bonferroni applied.\n')
-    asig = (pval*m)<bonferroni; % This is the matrix of values that are significant.
-end
-a_ = (abs(a)>=maxtau) + (abs(a)>=mintau);
-a_ = tril(a_,-1);
-
-% A contains the correlation values themselves.
-% ASIG is a binary matrix that states whether the correlation is statistically significant.
-% A_ is a matrix of -2, -1, 0, 1 and 2s that says whether a correlation is qualitatively strong (2), qualitatively weak (1), or nada (0).
-% Sometimes values will be statistically significant, but qualitatively insignificant. We do not want to bother looking at these, so
-% let us make our pretty picture carefully.
-
-% The values we display will always be straight from A. The colour we display, to emphasize the strong correlations,
-% should be the element-wise product of A, ASIG, and A_.
-% Also:
-%   Iff tau>0.33 (a_>0), include text.
-%   Iff tau is significant (asig=1), include background.
-%   Iff tau>0.8 (a_=2), put in bold.
-%   Iff tau>0.8 AND tau is significant, invert the color of the text (because the colour will be darker).
-
-img = a_.*a.*asig;
-img = img(2:end,1:end-1);   % ignore the diagonal
-clf
-imagesc(img, [-1 1])
-for i=1:length(a_),
-    for j=1:length(a_),
-        if a_(i,j)>0,
-            % tau is >0.33 so we definitely write the value. need to determine fontface and colour.
-            % if tau>.8, put in bold
-            if abs(a_(i,j))>1,
-                fontw = 'bold';
-            else
-                fontw = 'normal';
-            end
-            if abs(a_(i,j))>1 & asig(i,j)==1,
-                textcolor = [1 1 1];
-            else
-                textcolor = [0 0 0];
-            end
-            % h = text(j-.35,i-1,num2str(a(i,j),2),'Color',textcolor);
-            h = text(j,i-1,sprintf('%.2f',a(i,j)),'Color',textcolor,'FontWeight',fontw,'FontSize',12,'HorizontalAlignment','center');
-            set(h,'HorizontalAlignment','center')
-        end
-    end
-end
-cmap_el = transpose([linspace(.3,1,50)]);
-cmap = repmat(cmap_el,1,3);
-cmap = [cmap; flipud(cmap)];
-% Alternatively:
-cmap = [ones(size(cmap_el)) cmap_el cmap_el; flipud([cmap_el cmap_el ones(size(cmap_el))])];
-colormap(cmap);
-
-set(gca,'YTickLabel',metric_labels(2:end),'YTick',(1:length(a)-1),'FontAngle','italic','FontSize',12)
-set(gca,'XTickLabel',metric_labels(1:end-1),'XTick',(1:length(a)-1),'FontAngle','italic','FontSize',12)
-% set(gcf,'Position',[1000,1000,700,300])
-% set(gca,'XTickLabel',metric_labels(2:2:end),'YTick',(1:length(a)/2))
-
-% axis([0.5, length(a)-.5, 1.5, length(a)+.5])
\ No newline at end of file
--- a/do_correlation3_fig3_only.m	Fri Sep 20 16:36:45 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-function [asig pval a a_] = do_correlation3_fig3_only(megacube, songs, metrics, algos, algo_groups, merge_algos, merge_songs, merge_dsets, metric_labels, bonferroni, metrics2, metric_labels2)
-
-% Script to make and analyze correlation plot.
-% Example usage:
-%    To run your first experiment (Fig 1a) request:
-%    do_correlation(megacube, lab_measures, sind_manual1, [1:9], -1, 0, 1, -1, s_manual1)
-%
-% Note a few hard-coded decisions, such as:
-%   - use of 0.05 significance level with Bonferroni correction
-%   - in the image, decision that tau > 0.8 is strong, tau > 0.4 is weak, and tau < 0.4 is nothing.
-
-maxtau = 0.8;
-mintau = 0.33;
-
-
-
-tmpcube1 = megacube(songs,[metrics metrics2],algos);
-tmpcube2 = megacube(songs,metrics2,algos);
-
-
-if merge_algos>0,                   % If we merge algorithms, take the median score across algorithms.
-    tmpcube1 = median(tmpcube1,3);
-    tmpcube2 = median(tmpcube2,3);
-elseif merge_songs>0,               % If we merge songs, take the median score across songs.
-    tmpcube1 = median(tmpcube1,1);
-    tmpcube2 = median(tmpcube2,1);
-    tmpcube1 = transpose(reshape(tmpcube1,size(tmpcube1,2),size(tmpcube1,3)));
-    tmpcube2 = transpose(reshape(tmpcube2,size(tmpcube2,2),size(tmpcube2,3)));
-end
-
-
-% Accept a matrix and its pvalues, determine which values are significant.
-% Matrix is A, pvalues are PVAL
-tic
-[a pval] = corr(tmpcube2, tmpcube1,'type','Kendall');
-toc
-% Apply bonferroni correction:
-m = sum(sum(tril(ones(size(a)), length(metrics)-1)))
-asig = pval<0.05;
-if bonferroni==1,
-    fprintf('Bonferroni applied.\n')
-    asig = (pval*m)<0.05; % This is the matrix of values that are significant.
-end
-
-% Make a pretty picture:
-a_ = (abs(a)>=maxtau) + (abs(a)>=mintau);
-a_ = tril(a_,length(metrics)-1);
-% bg = 2*triu(ones(size(a_)));
-
-
-% A contains the correlation values themselves.
-% ASIG is a binary matrix that states whether the correlation is statistically significant.
-% A_ is a matrix of -2, -1, 0, 1 and 2s that says whether a correlation is qualitatively strong (2), qualitatively weak (1), or nada (0).
-
-% The values we display will always be straight from A. The image we display, though, to emphasize the strong correlations,
-% should be the element-wise product of A, ASIG, and A_.
-
-% So we will only display colours for values that are statistically significant.
-% In addition, we will only put in inverted text those that are qualitatively large (>0.8).
-% However, this leaves the possibility of large correlations (>0.8) that are insignificant, which show up as white text on white background.
-% Therefore, let us change tacks:
-%
-% If tau>0.33 (a_>0), include text.
-% If tau is significant (asig=1), include background.
-% If tau>0.8 (a_=2), put in bold.
-% If tau>0.8 AND significant, invert the color of the text.
-
-img = a_.*a.*asig;
-img = img(:,1:end-1);
-figure,imagesc(img, [-1 1])
-for i=1:size(a_,1),
-    for j=1:size(a_,2),
-        if a_(i,j)>0,
-            % tau is >0.33 so we definitely write the value. need to determine fontface and colour.
-            % if tau>.8, put in bold
-            if abs(a_(i,j))>1,
-                fontw = 'bold';
-            else
-                fontw = 'normal';
-            end
-            if abs(a_(i,j))>1 & asig(i,j)==1,
-                textcolor = [1 1 1];
-            else
-                textcolor = [0 0 0];
-            end
-            % h = text(j-.35,i,num2str(a(i,j),2),'Color',textcolor);
-            h = text(j,i,sprintf('%.2f',a(i,j)),'Color',textcolor,'FontWeight',fontw,'FontSize',8,'HorizontalAlignment','center');
-            set(h,'HorizontalAlignment','center','Rotation',90)
-        end
-    end
-end
-cmap_el = transpose([linspace(.3,1,50)]);
-cmap = repmat(cmap_el,1,3);
-cmap = [cmap; flipud(cmap)];
-% Alternatively:
-cmap = [ones(size(cmap_el)) cmap_el cmap_el; flipud([cmap_el cmap_el ones(size(cmap_el))])];
-colormap(cmap);
-
-set(gca,'YTickLabel',metric_labels2,'YTick',1:size(a,1),'FontAngle','italic','FontSize',10)
-% set(gca,'XTickLabel',[metric_labels metric_labels2],'XTick',1:size(a,2),'FontAngle','italic','FontSize',10)
-
-set(gca,'XTickLabel',[],'XTick',1:size(a,2)-1);
-t = text((1:size(a,2)-1)-.5,size(a,1)*ones(1,size(a,2)-1)+.7,[metric_labels metric_labels2(1:end-1)]);
-set(t,'HorizontalAlignment','right','VerticalAlignment','top', 'Rotation',90,'FontAngle','italic');
-
-
-% set(gcf,'Position',[1000,1000,700,300])
-% set(gca,'XTickLabel',metric_labels(2:2:end),'YTick',(1:length(a)/2))
-
-% axis([0.5, length(a)-.5, 1.5, length(a)+.5])
\ No newline at end of file
--- a/do_correlation_analyses.m	Fri Sep 20 16:36:45 2013 +0100
+++ b/do_correlation_analyses.m	Fri Sep 20 17:05:34 2013 +0100
@@ -12,23 +12,23 @@
 % is that we select the songs, metrics and algorithms to compare, and then choose
 % whether to take the median across all songs or across all algorithms.
 
-[asig pval a a_] = do_correlation3(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9],...
+[asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9],...
     0, 0, 1, 0, indexing_info(1).labels, 0.05);
 saveas(gcf,'./plots/fig1a.jpg')
 
-[asig pval a a_] = do_correlation3(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9],...
+[asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9],...
     0, 1, 0, 0, indexing_info(1).labels, 0.05);
 saveas(gcf,'./plots/fig1b.jpg')
 
-[asig pval a a_] = do_correlation3(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9],...
+[asig pval a a_] = do_correlation(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9],...
     0, 0, 1, 0, indexing_info(2).labels, 0.05);
 saveas(gcf,'./plots/fig2a.jpg')
 
-[asig pval a a_] = do_correlation3(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9],...
+[asig pval a a_] = do_correlation(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9],...
     0, 1, 0, 0, indexing_info(2).labels, 0.05);
 saveas(gcf,'./plots/fig2b.jpg')
 
-[asig pval a a_] = do_correlation3_fig3_only(megadatacube, lab_measures, [indexing_info(1).manual_set indexing_info(2).manual_set], [1:9], 0, 1, 0, 0, indexing_info(2).all_labels([indexing_info(1).manual_set indexing_info(2).manual_set]), 1, indexing_info(3).manual_set, indexing_info(3).labels);
+[asig pval a a_] = do_correlation_fig3_only(megadatacube, lab_measures, [indexing_info(1).manual_set indexing_info(2).manual_set], [1:9], 0, 1, 0, 0, indexing_info(2).all_labels([indexing_info(1).manual_set indexing_info(2).manual_set]), 1, indexing_info(3).manual_set, indexing_info(3).labels);
 saveas(gcf,'./plots/fig3.jpg')
 
 
@@ -37,27 +37,27 @@
 
 % Are the trends qualitatively similar across datasets?
 % Fig 1a
-figure,[asig pval a a_] = do_correlation3(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
-figure,[asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,1), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
-figure,[asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,3), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
-figure,[asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,4), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
+figure,[asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
+figure,[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
+figure,[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
+figure,[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
 % Fig 1b
-figure, [asig pval a a_] = do_correlation3(megadatacube, lab_measures, sind_manual1, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,1), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,3), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,4), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, lab_measures, sind_manual1, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
 % Fig 2a
-figure, [asig pval a a_] = do_correlation3(megadatacube, seg_measures, sind_manual2, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,1), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,2), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,3), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,4), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, seg_measures, sind_manual2, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,2), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
 % Fig 2b
-figure, [asig pval a a_] = do_correlation3(megadatacube, seg_measures, sind_manual2, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,1), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,2), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,3), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
-figure, [asig pval a a_] = do_correlation3(megadatacube, ismember(mirex_dset_origin,4), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, seg_measures, sind_manual2, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,2), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
+figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
 
 
 % "Does this indicate that the algorithms are better at boundary precision than recall? In fact, the opposite is the case: average bp6 bp.5 was simply consistently worse for most algorithms."
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/do_correlation_fig3_only.m	Fri Sep 20 17:05:34 2013 +0100
@@ -0,0 +1,110 @@
+function [asig pval a a_] = do_correlation3_fig3_only(megacube, songs, metrics, algos, algo_groups, merge_algos, merge_songs, merge_dsets, metric_labels, bonferroni, metrics2, metric_labels2)
+
+% Script to make and analyze correlation plot.
+% Example usage:
+%    To run your first experiment (Fig 1a) request:
+%    do_correlation(megacube, lab_measures, sind_manual1, [1:9], -1, 0, 1, -1, s_manual1)
+%
+% Note a few hard-coded decisions, such as:
+%   - use of 0.05 significance level with Bonferroni correction
+%   - in the image, decision that tau > 0.8 is strong, tau > 0.4 is weak, and tau < 0.4 is nothing.
+
+maxtau = 0.8;
+mintau = 0.33;
+
+
+
+tmpcube1 = megacube(songs,[metrics metrics2],algos);
+tmpcube2 = megacube(songs,metrics2,algos);
+
+
+if merge_algos>0,                   % If we merge algorithms, take the median score across algorithms.
+    tmpcube1 = median(tmpcube1,3);
+    tmpcube2 = median(tmpcube2,3);
+elseif merge_songs>0,               % If we merge songs, take the median score across songs.
+    tmpcube1 = median(tmpcube1,1);
+    tmpcube2 = median(tmpcube2,1);
+    tmpcube1 = transpose(reshape(tmpcube1,size(tmpcube1,2),size(tmpcube1,3)));
+    tmpcube2 = transpose(reshape(tmpcube2,size(tmpcube2,2),size(tmpcube2,3)));
+end
+
+
+% Accept a matrix and its pvalues, determine which values are significant.
+% Matrix is A, pvalues are PVAL
+tic
+[a pval] = corr(tmpcube2, tmpcube1,'type','Kendall');
+toc
+% Apply bonferroni correction:
+m = sum(sum(tril(ones(size(a)), length(metrics)-1)))
+asig = pval<0.05;
+if bonferroni==1,
+    fprintf('Bonferroni applied.\n')
+    asig = (pval*m)<0.05; % This is the matrix of values that are significant.
+end
+
+% Make a pretty picture:
+a_ = (abs(a)>=maxtau) + (abs(a)>=mintau);
+a_ = tril(a_,length(metrics)-1);
+% bg = 2*triu(ones(size(a_)));
+
+
+% A contains the correlation values themselves.
+% ASIG is a binary matrix that states whether the correlation is statistically significant.
+% A_ is a matrix of -2, -1, 0, 1 and 2s that says whether a correlation is qualitatively strong (2), qualitatively weak (1), or nada (0).
+
+% The values we display will always be straight from A. The image we display, though, to emphasize the strong correlations,
+% should be the element-wise product of A, ASIG, and A_.
+
+% So we will only display colours for values that are statistically significant.
+% In addition, we will only put in inverted text those that are qualitatively large (>0.8).
+% However, this leaves the possibility of large correlations (>0.8) that are insignificant, which show up as white text on white background.
+% Therefore, let us change tacks:
+%
+% If tau>0.33 (a_>0), include text.
+% If tau is significant (asig=1), include background.
+% If tau>0.8 (a_=2), put in bold.
+% If tau>0.8 AND significant, invert the color of the text.
+
+img = a_.*a.*asig;
+img = img(:,1:end-1);
+figure,imagesc(img, [-1 1])
+for i=1:size(a_,1),
+    for j=1:size(a_,2),
+        if a_(i,j)>0,
+            % tau is >0.33 so we definitely write the value. need to determine fontface and colour.
+            % if tau>.8, put in bold
+            if abs(a_(i,j))>1,
+                fontw = 'bold';
+            else
+                fontw = 'normal';
+            end
+            if abs(a_(i,j))>1 & asig(i,j)==1,
+                textcolor = [1 1 1];
+            else
+                textcolor = [0 0 0];
+            end
+            % h = text(j-.35,i,num2str(a(i,j),2),'Color',textcolor);
+            h = text(j,i,sprintf('%.2f',a(i,j)),'Color',textcolor,'FontWeight',fontw,'FontSize',8,'HorizontalAlignment','center');
+            set(h,'HorizontalAlignment','center','Rotation',90)
+        end
+    end
+end
+cmap_el = transpose([linspace(.3,1,50)]);
+cmap = repmat(cmap_el,1,3);
+cmap = [cmap; flipud(cmap)];
+% Alternatively:
+cmap = [ones(size(cmap_el)) cmap_el cmap_el; flipud([cmap_el cmap_el ones(size(cmap_el))])];
+colormap(cmap);
+
+set(gca,'YTickLabel',metric_labels2,'YTick',1:size(a,1),'FontAngle','italic','FontSize',10)
+% set(gca,'XTickLabel',[metric_labels metric_labels2],'XTick',1:size(a,2),'FontAngle','italic','FontSize',10)
+
+set(gca,'XTickLabel',[],'XTick',1:size(a,2)-1);
+t = text((1:size(a,2)-1)-.5,size(a,1)*ones(1,size(a,2)-1)+.7,[metric_labels metric_labels2(1:end-1)]);
+set(t,'HorizontalAlignment','right','VerticalAlignment','top', 'Rotation',90,'FontAngle','italic');
+
+
+% set(gcf,'Position',[1000,1000,700,300])
+% set(gca,'XTickLabel',metric_labels(2:2:end),'YTick',(1:length(a)/2))
+
+% axis([0.5, length(a)-.5, 1.5, length(a)+.5])
\ No newline at end of file
Binary file mirex_data/ep_groundtruth_txt.zip has changed
--- a/readme.txt	Fri Sep 20 16:36:45 2013 +0100
+++ b/readme.txt	Fri Sep 20 17:05:34 2013 +0100
@@ -8,54 +8,32 @@
 
 1: You will need Ruby and Matlab and a connection to the Internet.
 
-2: You will need to download a version of the Structural Analysis Evaluation project, also hosted on SoundSoftware. You can donwload it here:
+2: You will need to download a version of the Structural Analysis Evaluation project, also hosted on SoundSoftware. You can do so here:
 <https://code.soundsoftware.ac.uk/projects/structural_analysis_evaluation/repository>
 
-3: You will need to edit some of the Ruby and Matlab files you have downloaded, in order to point the program to the desired folders.
+3: You will need to edit some of the Ruby and Matlab files you have downloaded, in order to point the program to the desired folders:
+   >> In "1-get_mirex_estimates.rb", set the path to download all the data
+NOTE: We recommend making this the "./mirex_data" path, since some of the data is already there!
+   >> In "2-generate_smith2013_ismir", set the exact same path
+   >> In "2-generate_smith2013_ismir", set the path for the "structural analysis evaluation" repository
 
+4. Run the Ruby script "1-get_mirex_estimates.rb" and wait a while for all the data to download.
 
+5. Unzip all the folders that you obtained.
+	Note: in this version, one of the repositories, the Ewald Peiszer repository, is included already as a zip file ("ep_groundtruth_txt.zip"). Please move this to 
 
-2. 
-- Download data from MIREX website:
-	- Ground truth files
-	- Algorithm output
-	- Reported evaluation results
-
-File: get_mirex_estimates.rb
-Instructions: set local directory for download. Download repositories, unzip, set directories. [Can I write the script to do this?]
-
-(Pre-process this data.)
-
-- Assemble MIREX ground truth file data in Matlab.
-- Assemble MIREX algorithm output data in Matlab.
-- Assemble MIREX evaluation results in Matlab.
-
-- Download public repositories of annotations.
-
-- Assemble public ground truth data in Matlab.
-
-- Compute extra evaluation measures using MIREX algorithm output.
-
-- Compute extra features of the annotations (song length, mean segment length, etc.).
-
-- Compute correlations between all these parameters.
-
-- Display correlation figures.
-
-- Search for matches between MIREX and public ground truth.
-
-- Display analysis result figure.
+6. Run the Matlab script "2-generate_smith2013_ismir" and wait for all the data to be assembled, and for the figures to be generated.
 
+7. You're done! Hey, that wasn't so bad.
 
 
 
+===== The MIT License (MIT) =====
 
-The MIT License (MIT)
-
-Copyright (c) 2013 DDMAL
+Copyright (c) 2013 Jordan B. L. Smith
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
+The software is provided "as is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In no event shall the authors or copyright holders be liable for any claim, damages or other liability, whether in an action of contract, tort or otherwise, arising from, out of or in connection with the software or the use or other dealings in the software.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xml2txt.rb	Fri Sep 20 17:05:34 2013 +0100
@@ -0,0 +1,20 @@
+require 'xmlsimple'
+
+# Script to convert Ewald Peiszer's XML annotations to my TXT format annotations.
+
+a = XmlSimple.xml_in(ARGV[0])
+rows = []
+a["segmentation"][0]["segment"].each do |item|
+    rows << [item["start_sec"],item["label"]].join("\t")
+end
+rows << [a["segmentation"][0]["segment"][-1]["end_sec"],"end"].join("\t")
+
+if ARGV[1].nil? then
+    filename = ARGV[0].split(".")[0..-2].join(".")
+else
+    filename = ARGV[1].chomp
+end
+
+c = File.open(filename+".txt",'w')
+c.write(rows)
+c.close
\ No newline at end of file