jordan@1: % Assuming we have followed the factory settings so far, we now have four datasets,
jordan@1: % and a whole lot of evaluation metrics. But note that in one evaluation (no. 2, mrx10_1),
jordan@1: % we do not want to consider any metrics related to labels, since the ground truth in this
jordan@1: % case had arbitrary labels. (It was done using boundary-only IRISA annotations.)
jordan@1: % So, we make two sets of indices, LAB_MEASURES and SEG_MEASURES. They are handy.
jordan@1: lab_measures = ismember(mirex_dset_origin,[1 3 4]);
jordan@1: seg_measures = ismember(mirex_dset_origin,[1 2 3 4]);
jordan@1: 
jordan@1: % Now we can do our correlation studies!
jordan@1: % First, generate figure 1a. For that, we call the function DO_CORRELATION.
jordan@1: % Type HELP DO_CORRELATION to understand what all the arguments mean... The short of it
jordan@1: % is that we select the songs, metrics and algorithms to compare, and then choose
jordan@1: % whether to take the median across all songs or across all algorithms.
jordan@1: 
jordan@2: [asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9],...
jordan@1:     0, 0, 1, 0, indexing_info(1).labels, 0.05);
jordan@1: saveas(gcf,'./plots/fig1a.jpg')
jordan@1: 
jordan@2: [asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9],...
jordan@1:     0, 1, 0, 0, indexing_info(1).labels, 0.05);
jordan@1: saveas(gcf,'./plots/fig1b.jpg')
jordan@1: 
jordan@2: [asig pval a a_] = do_correlation(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9],...
jordan@1:     0, 0, 1, 0, indexing_info(2).labels, 0.05);
jordan@1: saveas(gcf,'./plots/fig2a.jpg')
jordan@1: 
jordan@2: [asig pval a a_] = do_correlation(megadatacube, seg_measures, indexing_info(2).manual_set, [1:9],...
jordan@1:     0, 1, 0, 0, indexing_info(2).labels, 0.05);
jordan@1: saveas(gcf,'./plots/fig2b.jpg')
jordan@1: 
jordan@2: [asig pval a a_] = do_correlation_fig3_only(megadatacube, lab_measures, [indexing_info(1).manual_set indexing_info(2).manual_set], [1:9], 0, 1, 0, 0, indexing_info(2).all_labels([indexing_info(1).manual_set indexing_info(2).manual_set]), 1, indexing_info(3).manual_set, indexing_info(3).labels);
jordan@1: saveas(gcf,'./plots/fig3.jpg')
jordan@1: 
jordan@1: 
jordan@1: do blah
jordan@1: % % % % % % % % % % % % The rest of this is still under construction, so I have inserted an error in the previous line to halt the script.
jordan@1: 
jordan@1: % Are the trends qualitatively similar across datasets?
jordan@1: % Fig 1a
jordan@2: figure,[asig pval a a_] = do_correlation(megadatacube, lab_measures, indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
jordan@2: figure,[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
jordan@2: figure,[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
jordan@2: figure,[asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(1).manual_set, [1:9], -1, 0, 1, -1, indexing_info(1).labels, 1);
jordan@1: % Fig 1b
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, lab_measures, sind_manual1, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(1).manual_set, [1:9], -1, 1, 0, -1, indexing_info(1).labels, 1);
jordan@1: % Fig 2a
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, seg_measures, sind_manual2, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,2), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(2).manual_set, [1:9], -1, 0, 1, -1, indexing_info(2).labels, 1);
jordan@1: % Fig 2b
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, seg_measures, sind_manual2, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,1), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,2), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,3), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@2: figure, [asig pval a a_] = do_correlation(megadatacube, ismember(mirex_dset_origin,4), indexing_info(2).manual_set, [1:9], -1, 1, 0, -1, indexing_info(2).labels, 1);
jordan@1: 
jordan@1: 
jordan@1: % "Does this indicate that the algorithms are better at boundary precision than recall? In fact, the opposite is the case: average bp6 bp.5 was simply consistently worse for most algorithms."
jordan@1: % For all algos:
jordan@1: mean(median(megadatacube(:,sind_manual2,:),3),1)
jordan@1: % For each algo:
jordan@1: mean(megadatacube(:,sind_manual2,:),1)
jordan@1: 
jordan@1: 
jordan@1: H = boxplot(megadatacube(:,[17 21],:))
jordan@1: 
jordan@1: tmp = sort(megadatacube(:,17,:));
jordan@1: tmp2 = sort(megadatacube(:,21,:));
jordan@1: tmp2(round(length(tmp2)/4),:,:), tmp2(round(length(tmp2)*3/4),:,:)
jordan@1: 
jordan@1: tmp2 = sort(tmp2(:));
jordan@1: tmp2(round(length(tmp2)/4)), tmp2(3*round(length(tmp2)/4))
jordan@1: 
jordan@1: 
jordan@1: %   %   %   %   %   %   %   %   %   %   %   ENd OF REAL WORK AREA   %   %   %   %   %   %   %   %   %   %   %   %   %
jordan@1: 
jordan@1: 
jordan@1: clf,imagesc(a.*(abs(a)>.7))
jordan@1: set(gca,'XTickLabel',[],'XTick',(1:50)-.5)
jordan@1: set(gca,'YTickLabel',s,'YTick',(1:50))
jordan@1: t = text((1:50)-.5,51*ones(1,50),s);
jordan@1: set(t,'HorizontalAlignment','right','VerticalAlignment','top', 'Rotation',90);
jordan@1: hold on
jordan@1: for i=1:9,
jordan@1:     plot([0 50],[i*5 i*5],'w')
jordan@1:     plot([i*5 i*5],[0 50],'w')
jordan@1: end
jordan@1: 
jordan@1: % a = corr([datacube(1:300,:,1) newcube(1:300,:,1) newmetriccube(1:300,:,1)]);
jordan@1: 
jordan@1: a = corr([datacube(lab_measures,:,1) newcube(lab_measures,:,1) newmetriccube(lab_measures,:,1)]);
jordan@1: b = corr([datacube(seg_measures,:,1) newcube(seg_measures,:,1) newmetriccube(seg_measures,:,1)]);
jordan@1: 
jordan@1: % Look at label measures only in this case.
jordan@1: imagesc(sortrows(transpose(sortrows((abs(a)>0.7)))))
jordan@1: [t1 t2] = (sortrows(transpose(sortrows((abs(a)>0.7)))));
jordan@1: 
jordan@1: 
jordan@1: b = zeros(size(a));
jordan@1: for j=[3,4,5,6,7,9],
jordan@1:     b = b+corr([datacube(:,:,j) newcube(:,:,j) newmetriccube(:,:,j)]);
jordan@1: end
jordan@1: b=b/6;
jordan@1: 
jordan@1: 
jordan@1: % Look at correlations among all figures, but pay attention to pvalues too.
jordan@1: % Only plot those less than 0.05, with conservative bonferroni correction.
jordan@1: megadatacube_l = [datacube(lab_measures,:,:) newcube(lab_measures,:,:) newmetriccube(lab_measures,:,:)];
jordan@1: megadatacube_s = [datacube(seg_measures,:,:) newcube(seg_measures,:,:) newmetriccube(seg_measures,:,:)];
jordan@1: % megadatacube_l = median(megadatacube_l(:,use_these_labels,:),3);
jordan@1: % megadatacube_s = median(megadatacube_s(:,use_these_segs,:),3);
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: megadatacube_all = median(megadatacube_l(:,[use_these_labels use_these_segs use_these_extras],:),3);
jordan@1: megadatacube_all(:,16:17) = 1 - megadatacube_all(:,16:17);
jordan@1: [al pval] = corr(megadatacube_all);
jordan@1: m = length(al)*(length(al)-1)/2;
jordan@1: imagesc(al.*((pval*m)<0.05))
jordan@1: al_ = al.*((pval*m)<0.05);
jordan@1: al_ = tril(al_ .* (abs(al_)>.5));
jordan@1: imagesc(al_)
jordan@1: for i=1:length(al_),
jordan@1:     for j=1:length(al_),
jordan@1:         if (al_(i,j)~=0) & (i~=j),
jordan@1:             text(j-.35,i,num2str(al_(i,j),2))
jordan@1:         end
jordan@1:     end
jordan@1: end
jordan@1: % [bl pvbl] = corr(megadatacube_all,'type','Kendall');
jordan@1: m = length(bl)*(length(bl)-1)/2;
jordan@1: imagesc(bl.*((pvbl*m)<0.05))
jordan@1: bl_ = bl.*((pvbl*m)<0.05);
jordan@1: bl_ = tril(bl_) % .* (abs(bl_)>.0));
jordan@1: imagesc(bl_)
jordan@1: for i=1:length(bl_),
jordan@1:     for j=1:length(bl_),
jordan@1:         if (bl_(i,j)~=0) & (i~=j),
jordan@1:             text(j-.35,i,num2str(bl_(i,j),2))
jordan@1:         end
jordan@1:     end
jordan@1: end
jordan@1: 
jordan@1: % Or, we could do this: Take all the computed Kendall taus, i.e., the non-diagonal elements of bl.
jordan@1: taus = bl(find(bl<1));
jordan@1: taus = taus-mean(taus);
jordan@1: taus = taus/std(taus);
jordan@1: P = normcdf(-abs(taus));
jordan@1: ind = find(P<=0.05);
jordan@1: taus = bl(find(bl<1));
jordan@1: taus(ind)
jordan@1: 
jordan@1: c = colormap;
jordan@1: c(32,:) = [1 1 1];
jordan@1: c(31,:) = [1 1 1];
jordan@1: c = min(1,c*1.6);
jordan@1: colormap(c)
jordan@1: set(gca,'XTickLabel',[],'XTick',(1:length(al_))-.4)
jordan@1: set(gca,'YTickLabel',s([use_these_labels use_these_segs use_these_extras]),'YTick',(1:length(al_)))
jordan@1: t = text((1:length(al_))-.3,(length(al_)+1)*ones(1,length(al_))+.3,s([use_these_labels use_these_segs use_these_extras]));
jordan@1: set(t,'HorizontalAlignment','right','VerticalAlignment','top', 'Rotation',90);
jordan@1: axis([0 31 0 31])
jordan@1: saveas(gcf,'./plots/all_correlations.jpg')
jordan@1: 
jordan@1: s = {'S_o','S_u','pw_f','pw_p','pw_r','rand','bf1','bp1','br1','bf6','bp6','br6','mt2c','mc2t','ds','len','nsa','nla','msla','nspla','nse','nle','msle','nsple','ob','ol','pw_f_x','pw_p_x','pw_r_x','K','asp','acp','I_AE_x','H_EA_x','H_AE_x','S_o_x','S_u_x','rand','mt2c_x','mc2t_x','m','f','d_ae_x','d_ea_x','b_f1_x','b_p1_x','b_r1_x','b_f6_x','b_p6_x','b_r6_x'};
jordan@1: s_type = [1,2,3,1,2,3,6,4,5,6,4,5,4,5, 7,7,7,7,7,7,7,7,7,7,7,7,3,1,2,3,2,1,3,1,2,1,2, 3,4,5,5,4,7,7,3,1,2,3,1,2];
jordan@1: megadatacube_s(:,40:41,:) = 1 - megadatacube_s(:,40:41,:);
jordan@1: megadatacube_s(:,51,:) = 2*megadatacube_s(:,38,:).*megadatacube_s(:,39,:)./(megadatacube_s(:,38,:)+megadatacube_s(:,39,:));
jordan@1: % This makes a new 51st metric which is a combination of m and f.
jordan@1: s_type(51) = 6;
jordan@1: s{51} = 'mf';
jordan@1: 
jordan@1: 
jordan@1: % [a pval] = corr(median([datacube(lab_measures,:,1) newcube(lab_measures,:,1) newmetriccube(lab_measures,:,1)],3));
jordan@1: [a pval] = corr(mean(megadatacube_l,3));
jordan@1: m = length(a)*(length(a)-1)/2;
jordan@1: imagesc(a.*((pval*m)<0.05))
jordan@1: a_ = a.*((pval*m)<0.05);
jordan@1: c = colormap;
jordan@1: c(32,:) = [1 1 1];
jordan@1: colormap(c)
jordan@1: 
jordan@1: % I want to make a claim about song length correlating to the algorithms or not. Let us make sure it is valid across all algorithms, and is not just applicable to the median:
jordan@1: for j=1:9,
jordan@1:     a = corr([datacube(lab_measures,:,j) newcube(lab_measures,:,j) newmetriccube(lab_measures,:,j)]);
jordan@1:     a(16,[17 19 21 23])
jordan@1: end
jordan@1: 
jordan@1: % BoxPlot of the number of segments in each algorithm output
jordan@1: boxplot(reshape(newcube(:,7,:),[length(newcube),9,1]))
jordan@1: 
jordan@1: % Look at best 10 and worst 10 songs in each dataset, according to PW_F metric.
jordan@1: % Average results across algorithms for this one.
jordan@1: unique_algorithms = [3 4 5 6 7];
jordan@1: tmp = datacube;
jordan@1: tmp(:,:,3) = mean(tmp(:,:,[1:3,9]),3);
jordan@1: tmp(:,:,7) = mean(tmp(:,:,7:8),3);
jordan@1: tmp = mean(tmp(lab_measures,:,unique_algorithms),3);
jordan@1: [tmp1 order] = sortrows(tmp,-3);
jordan@1: order1 = lab_measures(order);
jordan@1: pub_songids = X.mir2pub(order1);
jordan@1: values = tmp1((pub_songids>0),3);
jordan@1: filenames = {};
jordan@1: for i=1:length(pub_songids),
jordan@1:     if pub_songids(i)>0,
jordan@1:         filenames{end+1} = X.pubanns(pub_songids(i)).file;
jordan@1:     end
jordan@1: end
jordan@1: 
jordan@1: mirid = pub2mir(336);
jordan@1: make_structure_image(mirid, miranns, MD, mirdset, X, MR)
jordan@1: saveas(gcf,'./plots/MJ_dont_care.jpg')
jordan@1: make_structure_image(121, miranns, MD, mirdset, X, MR)
jordan@1: saveas(gcf,'./plots/play_the_game.jpg')
jordan@1: 
jordan@1: % Plot difficulty by album:
jordan@1: 
jordan@1: 
jordan@1: genres = {};
jordan@1: subgenres = {};
jordan@1: issalami = zeros(length(filenames),1);
jordan@1: for i=1:length(filenames),
jordan@1:     file = filenames{i};
jordan@1:     if strfind(file,'SALAMI_data'),
jordan@1:         issalami(i)=1;
jordan@1:         salami_id = file(79:85);
jordan@1:         salami_id = salami_id(1:strfind(salami_id,'/')-1);
jordan@1:         salami_row = find(aaux.metadata{1}==str2num(salami_id));
jordan@1:         genres{end+1} = cell2mat(aaux.metadata{15}(salami_row));
jordan@1:         subgenres{end+1} = cell2mat(aaux.metadata{16}(salami_row));
jordan@1:     end
jordan@1: end
jordan@1: gs = grp2idx(genres);
jordan@1: subgs = grp2idx(subgenres);
jordan@1: boxplot(values(find(issalami)),transpose(genres))
jordan@1: axis([0.5 5.5 0 1])
jordan@1: saveas(gcf,'salami_breakdown.png')
jordan@1: boxplot(values(find(issalami)),transpose(subgenres),'colors',cmap(round(gs*63/6),:),'orientation','horizontal')
jordan@1: 
jordan@1: [tmp1 tmp2] = hist(subgs,max(subgs)-1);
jordan@1: tmp1 = find(tmp1>5);  % do these subgenres only
jordan@1: tmp1 = ismember(subgs,tmp1);
jordan@1: tmp2 = find(issalami);
jordan@1: boxplot(values(tmp2(tmp1)),transpose(subgenres(tmp1)),'colors',cmap(round(gs(tmp1)*63/6),:),'orientation','horizontal')
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: % Look at scatter plots so that we can qualitatively attribute the correlations to things (e.g., low-precision variance).
jordan@1: tmpcube = mean(datacube,3);
jordan@1: for i=1:4,
jordan@1:     for j=i+1:5,
jordan@1:         subplot(5,5,i+(j-1)*5)
jordan@1:         scatter(tmpcube(:,i),tmpcube(:,j),'x')
jordan@1:     end
jordan@1: end
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: % Now again, we will want to run the correlation study by taking medians across algorithms (do the metrics rank the songs the same way?) and medians across songs (do the metrics rank the algorithms the same way?).
jordan@1: 
jordan@1: % Take the label metrics only, take median across songs:
jordan@1: % tmpcube = median(megadatacube_l(:,sind_manual1,:),1);
jordan@1: % tmpcube = transpose(reshape(tmpcube,size(tmpcube,2),size(tmpcube,3)));
jordan@1: % [a pval] = corr(tmpcube,'type','Kendall');
jordan@1: % m = length(a)*(length(a)-1)/2;
jordan@1: % a.*((pval*m)<0.05); % This is the matrix of values that are significant.
jordan@1: % Alternatively, we can plot all the metrics, treat them as random normal variables, and select only those that stand out.
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: % [asig pval a] = do_correlation(megadatacube, songs, metrics, algos, algo_groups, merge_algos (1 = do, 0 = do not), merge_songs, merge_dsets, metric_labels)
jordan@1: [asig pval a] = do_correlation(megadatacube, lab_measures, sind_manual1, [1:9], -1, 0, 1, -1, s_manual1)
jordan@1: [asig pval a] = do_correlation(megadatacube, lab_measures, [use_these_labels use_these_segs], [1:9], -1, 0, 1, -1, s([use_these_labels use_these_segs]))
jordan@1: 
jordan@1: [asig pval a] = do_correlation(megadatacube, lab_measures, [1:12], [1:9], -1, 0, 1, -1, s(1:12))
jordan@1: 
jordan@1: 
jordan@1: [a pval] = corr(megadatacube_l(:,:,1),'type','Kendall');
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: % Take the label metrics only, take median across algorithms:
jordan@1: tmpcube = median(megadatacube_l(:,sind_manual1,:),3);
jordan@1: [a pval] = corr(tmpcube); %,'type','Kendall');
jordan@1: m = length(a)*(length(a)-1)/2;
jordan@1: a.*((pval*m)<0.05); % This is the matrix of values that are significant.
jordan@1: % However, with so many data points (over 1400) it is very easy to be significant...
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: imagesc(a.*((pval*m)<0.05))
jordan@1: al_ = al.*((pval*m)<0.05);
jordan@1: al_ = tril(al_ .* (abs(al_)>.5));
jordan@1: imagesc(al_)
jordan@1: for i=1:length(al_),
jordan@1:     for j=1:length(al_),
jordan@1:         if (al_(i,j)~=0) & (i~=j),
jordan@1:             text(j-.35,i,num2str(al_(i,j),2))
jordan@1:         end
jordan@1:     end
jordan@1: end
jordan@1: 
jordan@1: 
jordan@1: clf,imagesc(a.*(abs(a)>.7))
jordan@1: set(gca,'XTickLabel',[],'XTick',(1:50)-.5)
jordan@1: set(gca,'YTickLabel',s,'YTick',(1:50))
jordan@1: t = text((1:50)-.5,51*ones(1,50),s);
jordan@1: set(t,'HorizontalAlignment','right','VerticalAlignment','top', 'Rotation',90);
jordan@1: hold on
jordan@1: for i=1:9,
jordan@1:     plot([0 50],[i*5 i*5],'w')
jordan@1:     plot([i*5 i*5],[0 50],'w')
jordan@1: end
jordan@1: 
jordan@1: % a = corr([datacube(1:300,:,1) newcube(1:300,:,1) extracube(1:300,:,1)]);
jordan@1: 
jordan@1: a = corr([datacube(lab_measures,:,1) newcube(lab_measures,:,1) extracube(lab_measures,:,1)]);
jordan@1: b = corr([datacube(seg_measures,:,1) newcube(seg_measures,:,1) extracube(seg_measures,:,1)]);
jordan@1: 
jordan@1: % Look at label measures only in this case.
jordan@1: imagesc(sortrows(transpose(sortrows((abs(a)>0.7)))))
jordan@1: [t1 t2] = (sortrows(transpose(sortrows((abs(a)>0.7)))));
jordan@1: 
jordan@1: 
jordan@1: b = zeros(size(a));
jordan@1: for j=[3,4,5,6,7,9],
jordan@1:     b = b+corr([datacube(:,:,j) newcube(:,:,j) extracube(:,:,j)]);
jordan@1: end
jordan@1: b=b/6;
jordan@1: 
jordan@1: 
jordan@1: % Look at correlations among all figures, but pay attention to pvalues too.
jordan@1: % Only plot those less than 0.05, with conservative bonferroni correction.
jordan@1: megadatacube_l = [datacube(lab_measures,:,:) newcube(lab_measures,:,:) extracube(lab_measures,:,:)];
jordan@1: megadatacube_s = [datacube(seg_measures,:,:) newcube(seg_measures,:,:) extracube(seg_measures,:,:)];
jordan@1: % megadatacube_l = median(megadatacube_l(:,use_these_labels,:),3);
jordan@1: % megadatacube_s = median(megadatacube_s(:,use_these_segs,:),3);
jordan@1: 
jordan@1: 
jordan@1: 
jordan@1: megadatacube_all = median(megadatacube_l(:,[use_these_labels use_these_segs use_these_extras],:),3);
jordan@1: megadatacube_all(:,16:17) = 1 - megadatacube_all(:,16:17);
jordan@1: [al pval] = corr(megadatacube_all);
jordan@1: m = length(al)*(length(al)-1)/2;
jordan@1: imagesc(al.*((pval*m)<0.05))
jordan@1: al_ = al.*((pval*m)<0.05);
jordan@1: al_ = tril(al_ .* (abs(al_)>.5));
jordan@1: imagesc(al_)
jordan@1: for i=1:length(al_),
jordan@1:     for j=1:length(al_),
jordan@1:         if (al_(i,j)~=0) & (i~=j),
jordan@1:             text(j-.35,i,num2str(al_(i,j),2))
jordan@1:         end
jordan@1:     end
jordan@1: end
jordan@1: % [bl pvbl] = corr(megadatacube_all,'type','Kendall');
jordan@1: m = length(bl)*(length(bl)-1)/2;
jordan@1: imagesc(bl.*((pvbl*m)<0.05))
jordan@1: bl_ = bl.*((pvbl*m)<0.05);
jordan@1: bl_ = tril(bl_) % .* (abs(bl_)>.0));
jordan@1: imagesc(bl_)
jordan@1: for i=1:length(bl_),
jordan@1:     for j=1:length(bl_),
jordan@1:         if (bl_(i,j)~=0) & (i~=j),
jordan@1:             text(j-.35,i,num2str(bl_(i,j),2))
jordan@1:         end
jordan@1:     end
jordan@1: end
jordan@1: 
jordan@1: % Or, we could do this: Take all the computed Kendall taus, i.e., the non-diagonal elements of bl.
jordan@1: taus = bl(find(bl<1));
jordan@1: taus = taus-mean(taus);
jordan@1: taus = taus/std(taus);
jordan@1: P = normcdf(-abs(taus));
jordan@1: ind = find(P<=0.05);
jordan@1: taus = bl(find(bl<1));
jordan@1: taus(ind)
jordan@1: 
jordan@1: c = colormap;
jordan@1: c(32,:) = [1 1 1];
jordan@1: c(31,:) = [1 1 1];
jordan@1: c = min(1,c*1.6);
jordan@1: colormap(c)
jordan@1: set(gca,'XTickLabel',[],'XTick',(1:length(al_))-.4)
jordan@1: set(gca,'YTickLabel',s([use_these_labels use_these_segs use_these_extras]),'YTick',(1:length(al_)))
jordan@1: t = text((1:length(al_))-.3,(length(al_)+1)*ones(1,length(al_))+.3,s([use_these_labels use_these_segs use_these_extras]));
jordan@1: set(t,'HorizontalAlignment','right','VerticalAlignment','top', 'Rotation',90);
jordan@1: axis([0 31 0 31])
jordan@1: saveas(gcf,'./plots/all_correlations.jpg')
jordan@1: 
jordan@1: s = {'S_o','S_u','pw_f','pw_p','pw_r','rand','bf1','bp1','br1','bf6','bp6','br6','mt2c','mc2t','ds','len','nsa','nla','msla','nspla','nse','nle','msle','nsple','ob','ol','pw_f_x','pw_p_x','pw_r_x','K','asp','acp','I_AE_x','H_EA_x','H_AE_x','S_o_x','S_u_x','rand','mt2c_x','mc2t_x','m','f','d_ae_x','d_ea_x','b_f1_x','b_p1_x','b_r1_x','b_f6_x','b_p6_x','b_r6_x'};
jordan@1: s_type = [1,2,3,1,2,3,6,4,5,6,4,5,4,5, 7,7,7,7,7,7,7,7,7,7,7,7,3,1,2,3,2,1,3,1,2,1,2, 3,4,5,5,4,7,7,3,1,2,3,1,2];
jordan@1: megadatacube_s(:,38:39,:) = 1 - megadatacube_s(:,38:39,:);
jordan@1: megadatacube_s(:,51,:) = 2*megadatacube_s(:,38,:).*megadatacube_s(:,39,:)./(megadatacube_s(:,38,:)+megadatacube_s(:,39,:));
jordan@1: % This makes a new 51st metric which is a combination of m and f.
jordan@1: s_type(51) = 6;
jordan@1: s{51} = 'mf';
jordan@1: 
jordan@1: 
jordan@1: % [a pval] = corr(median([datacube(lab_measures,:,1) newcube(lab_measures,:,1) extracube(lab_measures,:,1)],3));
jordan@1: [a pval] = corr(mean(megadatacube_l,3));
jordan@1: m = length(a)*(length(a)-1)/2;
jordan@1: imagesc(a.*((pval*m)<0.05))
jordan@1: a_ = a.*((pval*m)<0.05);
jordan@1: c = colormap;
jordan@1: c(32,:) = [1 1 1];
jordan@1: colormap(c)
jordan@1: 
jordan@1: % I want to make a claim about song length correlating to the algorithms or not. Let us make sure it is valid across all algorithms, and is not just applicable to the median:
jordan@1: for j=1:9,
jordan@1:     a = corr([datacube(lab_measures,:,j) newcube(lab_measures,:,j) extracube(lab_measures,:,j)]);
jordan@1:     a(16,[17 19 21 23])
jordan@1: end
jordan@1: 
jordan@1: % BoxPlot of the number of segments in each algorithm output
jordan@1: boxplot(reshape(newcube(:,7,:),[length(newcube),9,1]))
jordan@1: 
jordan@1: % Look at best 10 and worst 10 songs in each dataset, according to PW_F metric.
jordan@1: % Average results across algorithms for this one.
jordan@1: unique_algorithms = [3 4 5 6 7];
jordan@1: tmp = datacube;
jordan@1: tmp(:,:,3) = mean(tmp(:,:,[1:3,9]),3);
jordan@1: tmp(:,:,7) = mean(tmp(:,:,7:8),3);
jordan@1: tmp = mean(tmp(lab_measures,:,unique_algorithms),3);
jordan@1: [tmp1 order] = sortrows(tmp,-3);
jordan@1: order1 = lab_measures(order);
jordan@1: pub_songids = X.mir2pub(order1);
jordan@1: values = tmp1((pub_songids>0),3);
jordan@1: filenames = {};
jordan@1: for i=1:length(pub_songids),
jordan@1:     if pub_songids(i)>0,
jordan@1:         filenames{end+1} = public_truth(pub_songids(i)).file;
jordan@1:     end
jordan@1: end
jordan@1: 
jordan@1: mirid = pub2mir(336);
jordan@1: make_structure_image(mirid, mirex_truth, mirex_output, mirex_dset_origin, X, mirex_results)
jordan@1: saveas(gcf,'./plots/MJ_dont_care.jpg')
jordan@1: make_structure_image(121, mirex_truth, mirex_output, mirex_dset_origin, X, mirex_results)
jordan@1: saveas(gcf,'./plots/play_the_game.jpg')
jordan@1: 
jordan@1: % Plot difficulty by album:
jordan@1: 
jordan@1: 
jordan@1: genres = {};
jordan@1: subgenres = {};
jordan@1: issalami = zeros(length(filenames),1);
jordan@1: for i=1:length(filenames),
jordan@1:     file = filenames{i};
jordan@1:     if strfind(file,'SALAMI_data'),
jordan@1:         issalami(i)=1;
jordan@1:         salami_id = file(79:85);
jordan@1:         salami_id = salami_id(1:strfind(salami_id,'/')-1);
jordan@1:         salami_row = find(aaux.metadata{1}==str2num(salami_id));
jordan@1:         genres{end+1} = cell2mat(aaux.metadata{15}(salami_row));
jordan@1:         subgenres{end+1} = cell2mat(aaux.metadata{16}(salami_row));
jordan@1:     end
jordan@1: end
jordan@1: gs = grp2idx(genres);
jordan@1: subgs = grp2idx(subgenres);
jordan@1: boxplot(values(find(issalami)),transpose(genres))
jordan@1: axis([0.5 5.5 0 1])
jordan@1: saveas(gcf,'salami_breakdown.png')
jordan@1: boxplot(values(find(issalami)),transpose(subgenres),'colors',cmap(round(gs*63/6),:),'orientation','horizontal')
jordan@1: 
jordan@1: [tmp1 tmp2] = hist(subgs,max(subgs)-1);
jordan@1: tmp1 = find(tmp1>5);  % do these subgenres only
jordan@1: tmp1 = ismember(subgs,tmp1);
jordan@1: tmp2 = find(issalami);
jordan@1: boxplot(values(tmp2(tmp1)),transpose(subgenres(tmp1)),'colors',cmap(round(gs(tmp1)*63/6),:),'orientation','horizontal')