Mercurial > hg > camir-aes2014
diff core/magnatagatune/genre_stats.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/magnatagatune/genre_stats.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,58 @@ +function out = genre_stats(tagged, names, freqs, childof) +% out = genre_stats(names, freqs, childof) +% +% calculates statistics for genre distributions +% +% + +% get overall genre frequency and sort accordingly +allapp = sum(freqs,2); +allapp = allapp/max(allapp); + +[null, idx] = sort(allapp,'descend'); + +% get root potential +rootpot = 1 - sum(childof,2); + +figure; +bar(1:numel(names),[allapp(idx) rootpot(idx)]) +set(gca,'XTick',1:numel(names)); +set(gca,'XTickLabel',names(idx)); +legend('#appearances','root genre possibility'); +title 'genre statistics sorted by frequency of appearances' + +% --- +% determine genres that include x% of the whole dataset +% --- +pctl = 0.98; % 80 percent included + +% --- +% re-sort by appearance and root potential. +% using the multiplication, we can filter out subgenres +% --- +[null, idxrt] = sort(rootpot.*allapp,'descend'); + +% iteratively add 'best' genre according to root potential +gotclips = []; +numclips = []; +num_included = 0; +i = 1; +while i <= numel(names) && num_included < pctl * length(tagged) + + % count clips found for this genre + fprintf('%s \n', char(names{idxrt(i)})); + newclips = setdiff(find(tagged(:,idxrt(i)))', gotclips); + + gotclips = [gotclips newclips]; + numclips(i) = numel(newclips); + + num_included = num_included + numclips(i); + i = i + 1; +end + +figure; +pie(numclips(numclips > 0) / length(tagged)); +legend(names{idxrt(numclips > 0)}); + +out = []; +