diff core/magnatagatune/genre_stats.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/magnatagatune/genre_stats.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,58 @@
+function out = genre_stats(tagged, names, freqs, childof)
+% out = genre_stats(names, freqs, childof)
+% 
+% calculates statistics for genre distributions
+% 
+%
+
+% get overall genre frequency and sort accordingly
+allapp = sum(freqs,2);
+allapp = allapp/max(allapp);
+
+[null, idx] = sort(allapp,'descend');
+
+% get root potential
+rootpot = 1 - sum(childof,2);
+
+figure;
+bar(1:numel(names),[allapp(idx) rootpot(idx)])
+set(gca,'XTick',1:numel(names));
+set(gca,'XTickLabel',names(idx));
+legend('#appearances','root genre possibility');
+title 'genre statistics sorted by frequency of appearances'
+
+% ---
+% determine genres that include x% of the whole dataset
+% ---
+pctl = 0.98; % 80 percent included
+
+% ---
+% re-sort by appearance and root potential.
+% using the multiplication, we can filter out subgenres
+% ---
+[null, idxrt] = sort(rootpot.*allapp,'descend');
+
+% iteratively add 'best' genre according to root potential
+gotclips = [];
+numclips = [];
+num_included = 0;
+i = 1;
+while i <= numel(names) && num_included < pctl * length(tagged) 
+    
+    % count clips found for this genre
+    fprintf('%s \n', char(names{idxrt(i)}));
+    newclips = setdiff(find(tagged(:,idxrt(i)))', gotclips);
+    
+    gotclips = [gotclips newclips];
+    numclips(i) = numel(newclips);
+    
+    num_included = num_included + numclips(i);
+    i = i + 1;
+end
+
+figure;
+pie(numclips(numclips > 0) / length(tagged));
+legend(names{idxrt(numclips > 0)});
+
+out = [];
+