diff core/magnatagatune/get_comparison_stats.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/magnatagatune/get_comparison_stats.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,136 @@
+function [num_compares] = get_comparison_stats()
+% [num_compares] = get_comparison_stats()
+% 
+% - retrieves number of comparisons for each pair of data clips
+% - extracts genre associations and statistics
+
+global db_magnaclips;
+global comparison;
+global comparison_ids;
+
+num_compares = sparse(numel(comparison_ids), numel(comparison_ids));
+ 
+for i = 1:size(comparison,1)
+    abc = sort(comparison(i,1:3));
+    a = abc(1);
+    b = abc(2);
+    c = abc(3);
+    num_compares(a,b) = num_compares(a,b) +1;
+    num_compares(b,c) = num_compares(b,c) +1;
+    num_compares(a,c) = num_compares(a,c) +1;
+end
+
+% ---
+% check for repeated pairs
+% ---
+% [i,j] = ind2sub(size(num_compares), find(num_compares > 1))
+% num = 300;
+% a = find((comparison(:,1) == i(num) | comparison(:,2) == i(num) | comparison(:,3) == i(num))...)
+% & (comparison(:,1) == j(num) | comparison(:,2) == j(num) | comparison(:,3) == j(num)))
+%
+% comparison(a,:)
+
+
+% ------------------------------------------------------------
+% compare genre frequency
+% ---
+clips = MTTClip(comparison_ids);
+
+% comparison genres
+[genres, scores, gid] = clips.genres();
+
+% all genres
+[genres2, scores2, gid2] = db_magnaclips.genredb.stats;
+
+% ---
+% plot top genres
+% ---
+
+figure;
+bar(scores(1:20))
+set(gca, 'XTick',1:20,'XTickLabel',genres(1:20))
+
+figure;
+subplot(2,1,1)
+bar(scores(1:10) / max(scores))
+set(gca, 'XTickLabel',genres)
+title 'comparison subset'
+
+subplot(2,1,2)
+bar(scores2(1:10) / max(scores2))
+set(gca, 'XTickLabel',genres2,'FontSize', 8)
+title 'Full MTT data set'
+
+% ---
+% evaluate differences in distribution for all genres
+% shown are changes in relation to the whole database(genres2)
+%  ( relative to the summed tags )
+% ---
+genrediff = zeros(numel(genres2),1);
+for i=1:numel(genres2)
+    
+%     mgen = strcellfind(genres, genres2(i));
+    mgen = find(gid == gid2(i));
+    
+    if ~isempty(mgen)
+        % genrediff(i,1) = scores(mgen)/sum(scores) - scores2(i)/sum(scores2);
+        genrediff(i,1) = 1 - ( (scores2(i)/sum(scores2)) / (scores(mgen)/sum(scores)));
+
+    else 
+%         genrediff(i) = inf;
+        error 'genre ids not consistent'
+    end
+end
+genrediff = genrediff * 100;
+
+% visualise difference
+figure
+bar(genrediff);
+set(gca, 'XTick',1:44,'XTickLabel',genres2)
+axis([0 45 -200 100]);
+title 'relative loss for each genre considering comparison as an excerpt from MTT'
+ylabel 'loss(%)'
+
+% ---
+% get distinc genre music sets:
+% we try the following sets of 'similar' genre tags
+% ---
+cgdb = db_magnaclips.genredb.subset(clips.id);
+
+cids = cgdb.owner({'Classical', 'Baroque'}, 'or');
+cgdb = cgdb.exclude(cids);
+
+eids = cgdb.owner({'Electronica', 'New Age', 'Ambient'}, 'or');
+cgdb = cgdb.exclude(eids);
+
+rids = cgdb.owner({'Rock', 'Alt Rock', 'Hard Rock', 'Metal'}, 'or');
+cgdb = cgdb.exclude(rids);
+
+% get reverse indexing for comparisons
+rev_compid = sparse(comparison_ids, 1, 1:numel(comparison_ids));
+
+% 8 triples entirely classical 
+cfit = get_comparison_linfits(comparison, rev_compid(cids));
+
+% 43 triples entirely electronic
+efit = get_comparison_linfits(comparison, rev_compid(eids));
+
+% 6 triples entirely rock
+rfit = get_comparison_linfits(comparison, rev_compid(rids));
+
+cgdb.stats
+
+end
+% this function returns for each comparison line 
+% the num. of appearance of given selection of clip ids
+function out = get_comparison_linfits(comparison, goodset)
+    
+    out = zeros(1, size(comparison,1));
+    % for each line
+    for i = 1:size(comparison,1)
+    
+        out(i) = numel(intersect(comparison(i,1:3), goodset'));
+    end
+end
+
+