annotate do_correlation_analyses.m @ 6:e2337cd691b1 tip

Finishing writing the matlab code to replicate all observations made in the article. Added the article to the repository. Renamed the two main scripts ("1-get_mirex_estimates.rb" and "2-generate_smith2013_ismir.m") to not have dashes (since this was annoying within Matlab) Added new Michael Jackson figure.
author Jordan Smith <jordan.smith@eecs.qmul.ac.uk>
date Wed, 05 Mar 2014 01:02:26 +0000
parents 92b5a46bc67b
children
rev   line source
jordan@1 1 % Assuming we have followed the factory settings so far, we now have four datasets,
jordan@1 2 % and a whole lot of evaluation metrics. But note that in one evaluation (no. 2, mrx10_1),
jordan@1 3 % we do not want to consider any metrics related to labels, since the ground truth in this
jordan@1 4 % case had arbitrary labels. (It was done using boundary-only IRISA annotations.)
jordan@1 5 % So, we make two sets of indices, LAB_MEASURES and SEG_MEASURES. They are handy.
jordan@1 6 lab_measures = ismember(mirex_dset_origin,[1 3 4]);
jordan@1 7 seg_measures = ismember(mirex_dset_origin,[1 2 3 4]);
jordan@1 8
jordan@1 9 % Now we can do our correlation studies!
jordan@1 10 % First, generate figure 1a. For that, we call the function DO_CORRELATION.
jordan@1 11 % Type HELP DO_CORRELATION to understand what all the arguments mean... The short of it
jordan@1 12 % is that we select the songs, metrics and algorithms to compare, and then choose
jordan@1 13 % whether to take the median across all songs or across all algorithms.
jordan@1 14
jordan@4 15 fprintf('We are making Figure 1a now.\n')
jordan@4 16
jordan@2 17 [asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9],...
jordan@1 18 0, 0, 1, 0, indexing_info(1).labels, 0.05);
jordan@1 19 saveas(gcf,'./plots/fig1a.jpg')
jordan@1 20
jordan@4 21 fprintf('We are making Figure 1b now.\n')
jordan@4 22
jordan@2 23 [asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9],...
jordan@1 24 0, 1, 0, 0, indexing_info(1).labels, 0.05);
jordan@1 25 saveas(gcf,'./plots/fig1b.jpg')
jordan@1 26
jordan@4 27 fprintf('We are making Figure 2a now. (This one usually takes a while.)\n')
jordan@4 28
jordan@2 29 [asig pval a a_] = do_correlation(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9],...
jordan@1 30 0, 0, 1, 0, indexing_info(2).labels, 0.05);
jordan@1 31 saveas(gcf,'./plots/fig2a.jpg')
jordan@1 32
jordan@4 33 fprintf('We are making Figure 2b now.)\n')
jordan@4 34
jordan@2 35 [asig pval a a_] = do_correlation(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9],...
jordan@1 36 0, 1, 0, 0, indexing_info(2).labels, 0.05);
jordan@1 37 saveas(gcf,'./plots/fig2b.jpg')
jordan@1 38
jordan@4 39 fprintf('We are making Figure 3 now.\n')
jordan@4 40
jordan@2 41 [asig pval a a_] = do_correlation_fig3_only(megadatacube, lab_measures, [indexing_info(1).manual_set indexing_info(2).manual_set], [1:9], 0, 1, 0, 0, indexing_info(2).all_labels([indexing_info(1).manual_set indexing_info(2).manual_set]), 1, indexing_info(3).manual_set, indexing_info(3).labels);
jordan@1 42 saveas(gcf,'./plots/fig3.jpg')
jordan@1 43
jordan@1 44
jordan@4 45
jordan@4 46 % Now we are done making figures. The following sequences of commands generate output to validate some of the statements in the article.
jordan@4 47
jordan@4 48
jordan@4 49
jordan@6 50 fprintf('Section 3.1: ''Does this indicate that the algorithms are better at boundary precision than recall? In fact, the opposite is the case: average bp6 bp.5 was simply consistently worse for most algorithms.''\n')
jordan@6 51 fprintf('For all algos:\n')
jordan@4 52 mean(median(megadatacube(:,indexing_info(2).manual_set([3 4 7 8]),:),3),1)
jordan@6 53 fprintf('For each algo:\n')
jordan@4 54 mean(megadatacube(:,indexing_info(2).manual_set([3 4 7 8]),:),1)
jordan@6 55 fprintf('Recall (the second pair of values) surpass precision (the first pair of values) for most of the algorithm runs. There are two exceptions: algorithms 4 (R a little less than P) and 5 (P much better than R).\n')
jordan@4 56
jordan@6 57 fprintf('Are the trends qualitatively similar across datasets? (Section 3.1: ''...the findings of this section were consistent across the datasets, albeit with some variation in significance levels.'')\n')
jordan@6 58 fprintf('Fig 1a\n')
jordan@6 59 fprintf('All the datasets:\n')
jordan@6 60 figure(1),[asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
jordan@6 61 fprintf('Isophonics et al.:\n')
jordan@6 62 figure(2),[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
jordan@6 63 fprintf('RWC (AIST):\n')
jordan@6 64 figure(3),[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
jordan@6 65 fprintf('SALAMI:\n')
jordan@6 66 figure(4),[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
jordan@6 67 fprintf('Fig 1b\n')
jordan@6 68 fprintf('All the datasets:\n')
jordan@6 69 figure(1), [asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
jordan@6 70 fprintf('Isophonics et al.:\n')
jordan@6 71 figure(2), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
jordan@6 72 fprintf('RWC (AIST):\n')
jordan@6 73 figure(3), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
jordan@6 74 fprintf('SALAMI:\n')
jordan@6 75 figure(4), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
jordan@6 76 fprintf('Fig 2a\n')
jordan@6 77 fprintf('All the datasets:\n')
jordan@6 78 figure(1), [asig pval a a_] = do_correlation(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@6 79 fprintf('Isophonics et al.:\n')
jordan@6 80 figure(2), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@6 81 fprintf('RWC (INRIA):\n')
jordan@6 82 figure(3), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,2), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@6 83 fprintf('RWC (AIST):\n')
jordan@6 84 figure(4), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@6 85 fprintf('SALAMI:\n')
jordan@6 86 figure(5), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@6 87 fprintf('Fig 2b\n')
jordan@6 88 fprintf('All the datasets:\n')
jordan@6 89 figure(1), [asig pval a a_] = do_correlation(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@6 90 fprintf('Isophonics et al.:\n')
jordan@6 91 figure(2), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@6 92 fprintf('RWC (INRIA):\n')
jordan@6 93 figure(3), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,2), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@6 94 fprintf('RWC (AIST):\n')
jordan@6 95 figure(4), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@6 96 fprintf('SALAMI:\n')
jordan@6 97 figure(5), [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@4 98
jordan@4 99
jordan@6 100 fprintf('Section 3.2: ''While the middle half of the values of nsa [number of segments in annotation] ranges from 7 to 13 segments, the middle values for nse [number of segments for estimated description] for most algorithms range from 11 to 20 segments. The two exceptions are MHRAF and OYZS [algorithms 4 and 5], for which both msle and nse match the distributions seen in the annotations.''\n')
jordan@4 101
jordan@6 102 fprintf('Index 17 gives the number of segments in the annotation; 21 gives the number of segments in the estimated description of the algorithm.\n')
jordan@6 103 fprintf('Boxplot shows general trend of overestimating number of segments.\n')
jordan@4 104 H = boxplot(megadatacube(:,[17 21],:))
jordan@6 105 fprintf('Take the middle half of the data for annotated and estimated segments. Look at the range.\n')
jordan@4 106
jordan@4 107 tmp = sort(megadatacube(:,17,:));
jordan@4 108 tmp = sort(tmp(:));
jordan@4 109 tmp(round(length(tmp)/4)), tmp(3*round(length(tmp)/4))
jordan@6 110 fprintf('The middle half of the annotated descriptions have 7 to 13 segments.\n')
jordan@4 111
jordan@4 112 tmp2 = sort(megadatacube(:,21,:));
jordan@4 113 [tmp2(round(length(tmp2)/4),:,:), tmp2(round(length(tmp2)*3/4),:,:)]
jordan@6 114 fprintf('Setting aside algorithms 4 and 5, the others all have middle ranges of roughly 11 to 24.\n')
jordan@4 115 tmp2 = sort(tmp2(:));
jordan@4 116 tmp2(round(length(tmp2)/4)), tmp2(3*round(length(tmp2)/4))
jordan@6 117 fprintf('Averaging the other algorithms together, the middle range is exactly 10 to 20.\n')
jordan@4 118
jordan@4 119
jordan@6 120 % Of all the songs that have been pinpointed in one dataset or another, sort them by pw_f, and look at the best and worst performing songs.
jordan@6 121 % PW_F is the 3rd element of the megadatacube.
jordan@6 122 % Take the mean PW_F across all the algorithms.
jordan@6 123 tmp = mean(megadatacube(:,3,:),3);
jordan@6 124 % Now we will rank by song, and look at the origin of the top and bottom PW_Fs.
jordan@4 125
jordan@6 126 find(mirex_dset_origin==1)
jordan@1 127
jordan@4 128 % Look at best 10 and worst 10 songs in each dataset, according to PW_F metric.
jordan@4 129 % Average results across algorithms for this one.
jordan@6 130
jordan@6 131 % First, we will not let the fact that many versions of some algorithms exist skew the results.
jordan@6 132 % So, we replace algo no. 3 with the mean of algos 1, 2, 3 and 9 (KSP1, KSP2, KSP3, and SP1)
jordan@6 133 tmp_datacube = datacube;
jordan@6 134 tmp_datacube(:,:,3) = mean(tmp_datacube(:,:,[1:3,9]),3);
jordan@6 135 % And replace algo 7 with algos 7 and 8 (SMGA1 and SMGA2)
jordan@6 136 tmp_datacube(:,:,7) = mean(tmp_datacube(:,:,7:8),3);
jordan@6 137 % Now there are just 5 unique algorithms:
jordan@4 138 unique_algorithms = [3 4 5 6 7];
jordan@6 139 % Let TMP be the average performance across the algorithms of the main set of metrics (those in DATACUBE) for all the songs in the first dataset, i.e., Isophonics and Beatles.
jordan@6 140 tmp_dc_results = mean(tmp_datacube(mirex_dset_origin==1,:,unique_algorithms),3);
jordan@6 141 % Sort the algorithms in decreasing order of the third metric (which is PW_F)
jordan@6 142 [tmp_dc_results order] = sortrows(tmp_dc_results,-3);
jordan@6 143 % order1 = lab_measures(order);
jordan@6 144 pub_songids = mir2pub(order); % These are the matched IDs of the songs
jordan@6 145 values = tmp_dc_results((pub_songids>0),3); % We want the match to be >0 --- i.e., we only care about positively identified songs
jordan@6 146 % Now scoop up all the filenames of the songs.
jordan@4 147 filenames = {};
jordan@4 148 for i=1:length(pub_songids),
jordan@4 149 if pub_songids(i)>0,
jordan@4 150 filenames{end+1} = public_truth(pub_songids(i)).file;
jordan@4 151 end
jordan@4 152 end
jordan@1 153
jordan@1 154
jordan@6 155 fprintf('Section 4: ''The piece with the highest median pwf is The Beatles'' ''''Her Majesty''''...''\n')
jordan@6 156 fprintf('''The next-best Beatles song, ''''I Will'''', is an instance where both the states and sequences hypotheses apply well...''\n')
jordan@6 157 fprintf('(Note: due to a change in the script, the song ''''Her Majesty'''' is no longer identified properly, and hence does not show up here in the results. Instead, the top two songs are ''''I Will'''' and ''''Penny Lane''''.')
jordan@6 158 fprintf('%f, %s\n',tmp_dc_results(1,3),filenames{1})
jordan@6 159 fprintf('%f, %s\n',tmp_dc_results(2,3),filenames{2})
jordan@1 160
jordan@6 161 fprintf('Section 4: ''At the bottom is Jackson''s ''''They Don''t Care About Us''''.''\n')
jordan@6 162 fprintf('%f, %s\n',tmp_dc_results(end,3),filenames{end})
jordan@1 163
jordan@6 164 fprintf('Section 4: ''Conspicuously, 17 of the easiest 20 songs (again, with respect to pwf) are Beatles tunes, while only 2 of the most difficult 20 songs are---the rest being Michael Jackson, Queen and Carole King songs.''\n')
jordan@6 165 fprintf('The easiest 20 songs:\n')
jordan@6 166 for i=1:20,
jordan@6 167 fprintf('%s\n',filenames{i})
jordan@6 168 end
jordan@6 169 fprintf('The hardest 20 songs:\n')
jordan@6 170 for i=1:20,
jordan@6 171 fprintf('%s\n',filenames{end+1-i})
jordan@6 172 end
jordan@6 173
jordan@6 174 values = tmp_dc_results(:,3);
jordan@6 175 values = values(pub_songids>0);
jordan@6 176 groups = public_dset_origin(pub_songids(pub_songids>0),:);
jordan@6 177 artists = zeros(size(values));
jordan@6 178 for i=1:length(values),
jordan@6 179 if groups(i,1)~=2,
jordan@6 180 artists(i) = 4; % Beatles
jordan@6 181 else
jordan@6 182 artists(i) = groups(i,2);
jordan@4 183 end
jordan@4 184 end
jordan@6 185 % Kruskal-Wallis test:
jordan@6 186 fprintf('Section 5: ''Taking the median pwf across the algorithms and comparing this value for the 274 annotations identified as one of these four artists, a Kruskal-Wallis test confirms that the groups differ.''\n')
jordan@6 187 [P, anovatab, stats] = kruskalwallis(values, artists);
jordan@6 188 fprintf('Section 5: ''A multiple comparison test reveals that pwf is significantly greater for the Beatles group than the three others.''\n')
jordan@6 189 multcompare(stats)
jordan@6 190 fprintf('Note: in the version created for the article, the Zweick songs were not identified, and these sentences refer to 4 artists when in fact these comparisons refer to 5 artists.\n')
jordan@1 191
jordan@6 192 % Create composite structure diagram for Michael Jackon song:
jordan@6 193 make_structure_image(pub2mir(pub_songids(end)),mirex_truth, mirex_output, mirex_results, mirex_dset_origin)
jordan@1 194 saveas(gcf,'./plots/MJ_dont_care.jpg')