function [num_compares] = get_comparison_stats()
% [num_compares] = get_comparison_stats()
% 
% - retrieves number of comparisons for each pair of data clips
% - extracts genre associations and statistics

global db_magnaclips;
global comparison;
global comparison_ids;

num_compares = sparse(numel(comparison_ids), numel(comparison_ids));
 
for i = 1:size(comparison,1)
    abc = sort(comparison(i,1:3));
    a = abc(1);
    b = abc(2);
    c = abc(3);
    num_compares(a,b) = num_compares(a,b) +1;
    num_compares(b,c) = num_compares(b,c) +1;
    num_compares(a,c) = num_compares(a,c) +1;
end

% ---
% check for repeated pairs
% ---
% [i,j] = ind2sub(size(num_compares), find(num_compares > 1))
% num = 300;
% a = find((comparison(:,1) == i(num) | comparison(:,2) == i(num) | comparison(:,3) == i(num))...)
% & (comparison(:,1) == j(num) | comparison(:,2) == j(num) | comparison(:,3) == j(num)))
%
% comparison(a,:)


% ------------------------------------------------------------
% compare genre frequency
% ---
clips = MTTClip(comparison_ids);

% comparison genres
[genres, scores, gid] = clips.genres();

% all genres
[genres2, scores2, gid2] = db_magnaclips.genredb.stats;

% ---
% plot top genres
% ---

figure;
bar(scores(1:20))
set(gca, 'XTick',1:20,'XTickLabel',genres(1:20))

figure;
subplot(2,1,1)
bar(scores(1:10) / max(scores))
set(gca, 'XTickLabel',genres)
title 'comparison subset'

subplot(2,1,2)
bar(scores2(1:10) / max(scores2))
set(gca, 'XTickLabel',genres2,'FontSize', 8)
title 'Full MTT data set'

% ---
% evaluate differences in distribution for all genres
% shown are changes in relation to the whole database(genres2)
%  ( relative to the summed tags )
% ---
genrediff = zeros(numel(genres2),1);
for i=1:numel(genres2)
    
%     mgen = strcellfind(genres, genres2(i));
    mgen = find(gid == gid2(i));
    
    if ~isempty(mgen)
        % genrediff(i,1) = scores(mgen)/sum(scores) - scores2(i)/sum(scores2);
        genrediff(i,1) = 1 - ( (scores2(i)/sum(scores2)) / (scores(mgen)/sum(scores)));

    else 
%         genrediff(i) = inf;
        error 'genre ids not consistent'
    end
end
genrediff = genrediff * 100;

% visualise difference
figure
bar(genrediff);
set(gca, 'XTick',1:44,'XTickLabel',genres2)
axis([0 45 -200 100]);
title 'relative loss for each genre considering comparison as an excerpt from MTT'
ylabel 'loss(%)'

% ---
% get distinc genre music sets:
% we try the following sets of 'similar' genre tags
% ---
cgdb = db_magnaclips.genredb.subset(clips.id);

cids = cgdb.owner({'Classical', 'Baroque'}, 'or');
cgdb = cgdb.exclude(cids);

eids = cgdb.owner({'Electronica', 'New Age', 'Ambient'}, 'or');
cgdb = cgdb.exclude(eids);

rids = cgdb.owner({'Rock', 'Alt Rock', 'Hard Rock', 'Metal'}, 'or');
cgdb = cgdb.exclude(rids);

% get reverse indexing for comparisons
rev_compid = sparse(comparison_ids, 1, 1:numel(comparison_ids));

% 8 triples entirely classical 
cfit = get_comparison_linfits(comparison, rev_compid(cids));

% 43 triples entirely electronic
efit = get_comparison_linfits(comparison, rev_compid(eids));

% 6 triples entirely rock
rfit = get_comparison_linfits(comparison, rev_compid(rids));

cgdb.stats

end
% this function returns for each comparison line 
% the num. of appearance of given selection of clip ids
function out = get_comparison_linfits(comparison, goodset)
    
    out = zeros(1, size(comparison,1));
    % for each line
    for i = 1:size(comparison,1)
    
        out(i) = numel(intersect(comparison(i,1:3), goodset'));
    end
end


