wolffd@0: function out = genre_stats(tagged, names, freqs, childof) wolffd@0: % out = genre_stats(names, freqs, childof) wolffd@0: % wolffd@0: % calculates statistics for genre distributions wolffd@0: % wolffd@0: % wolffd@0: wolffd@0: % get overall genre frequency and sort accordingly wolffd@0: allapp = sum(freqs,2); wolffd@0: allapp = allapp/max(allapp); wolffd@0: wolffd@0: [null, idx] = sort(allapp,'descend'); wolffd@0: wolffd@0: % get root potential wolffd@0: rootpot = 1 - sum(childof,2); wolffd@0: wolffd@0: figure; wolffd@0: bar(1:numel(names),[allapp(idx) rootpot(idx)]) wolffd@0: set(gca,'XTick',1:numel(names)); wolffd@0: set(gca,'XTickLabel',names(idx)); wolffd@0: legend('#appearances','root genre possibility'); wolffd@0: title 'genre statistics sorted by frequency of appearances' wolffd@0: wolffd@0: % --- wolffd@0: % determine genres that include x% of the whole dataset wolffd@0: % --- wolffd@0: pctl = 0.98; % 80 percent included wolffd@0: wolffd@0: % --- wolffd@0: % re-sort by appearance and root potential. wolffd@0: % using the multiplication, we can filter out subgenres wolffd@0: % --- wolffd@0: [null, idxrt] = sort(rootpot.*allapp,'descend'); wolffd@0: wolffd@0: % iteratively add 'best' genre according to root potential wolffd@0: gotclips = []; wolffd@0: numclips = []; wolffd@0: num_included = 0; wolffd@0: i = 1; wolffd@0: while i <= numel(names) && num_included < pctl * length(tagged) wolffd@0: wolffd@0: % count clips found for this genre wolffd@0: fprintf('%s \n', char(names{idxrt(i)})); wolffd@0: newclips = setdiff(find(tagged(:,idxrt(i)))', gotclips); wolffd@0: wolffd@0: gotclips = [gotclips newclips]; wolffd@0: numclips(i) = numel(newclips); wolffd@0: wolffd@0: num_included = num_included + numclips(i); wolffd@0: i = i + 1; wolffd@0: end wolffd@0: wolffd@0: figure; wolffd@0: pie(numclips(numclips > 0) / length(tagged)); wolffd@0: legend(names{idxrt(numclips > 0)}); wolffd@0: wolffd@0: out = []; wolffd@0: