wolffd@0: function out = genre_stats(tagged, names, freqs, childof)
wolffd@0: % out = genre_stats(names, freqs, childof)
wolffd@0: % 
wolffd@0: % calculates statistics for genre distributions
wolffd@0: % 
wolffd@0: %
wolffd@0: 
wolffd@0: % get overall genre frequency and sort accordingly
wolffd@0: allapp = sum(freqs,2);
wolffd@0: allapp = allapp/max(allapp);
wolffd@0: 
wolffd@0: [null, idx] = sort(allapp,'descend');
wolffd@0: 
wolffd@0: % get root potential
wolffd@0: rootpot = 1 - sum(childof,2);
wolffd@0: 
wolffd@0: figure;
wolffd@0: bar(1:numel(names),[allapp(idx) rootpot(idx)])
wolffd@0: set(gca,'XTick',1:numel(names));
wolffd@0: set(gca,'XTickLabel',names(idx));
wolffd@0: legend('#appearances','root genre possibility');
wolffd@0: title 'genre statistics sorted by frequency of appearances'
wolffd@0: 
wolffd@0: % ---
wolffd@0: % determine genres that include x% of the whole dataset
wolffd@0: % ---
wolffd@0: pctl = 0.98; % 80 percent included
wolffd@0: 
wolffd@0: % ---
wolffd@0: % re-sort by appearance and root potential.
wolffd@0: % using the multiplication, we can filter out subgenres
wolffd@0: % ---
wolffd@0: [null, idxrt] = sort(rootpot.*allapp,'descend');
wolffd@0: 
wolffd@0: % iteratively add 'best' genre according to root potential
wolffd@0: gotclips = [];
wolffd@0: numclips = [];
wolffd@0: num_included = 0;
wolffd@0: i = 1;
wolffd@0: while i <= numel(names) && num_included < pctl * length(tagged) 
wolffd@0:     
wolffd@0:     % count clips found for this genre
wolffd@0:     fprintf('%s \n', char(names{idxrt(i)}));
wolffd@0:     newclips = setdiff(find(tagged(:,idxrt(i)))', gotclips);
wolffd@0:     
wolffd@0:     gotclips = [gotclips newclips];
wolffd@0:     numclips(i) = numel(newclips);
wolffd@0:     
wolffd@0:     num_included = num_included + numclips(i);
wolffd@0:     i = i + 1;
wolffd@0: end
wolffd@0: 
wolffd@0: figure;
wolffd@0: pie(numclips(numclips > 0) / length(tagged));
wolffd@0: legend(names{idxrt(numclips > 0)});
wolffd@0: 
wolffd@0: out = [];
wolffd@0: