wolffd@0: function [num_compares] = get_comparison_stats() wolffd@0: % [num_compares] = get_comparison_stats() wolffd@0: % wolffd@0: % - retrieves number of comparisons for each pair of data clips wolffd@0: % - extracts genre associations and statistics wolffd@0: wolffd@0: global db_magnaclips; wolffd@0: global comparison; wolffd@0: global comparison_ids; wolffd@0: wolffd@0: num_compares = sparse(numel(comparison_ids), numel(comparison_ids)); wolffd@0: wolffd@0: for i = 1:size(comparison,1) wolffd@0: abc = sort(comparison(i,1:3)); wolffd@0: a = abc(1); wolffd@0: b = abc(2); wolffd@0: c = abc(3); wolffd@0: num_compares(a,b) = num_compares(a,b) +1; wolffd@0: num_compares(b,c) = num_compares(b,c) +1; wolffd@0: num_compares(a,c) = num_compares(a,c) +1; wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % check for repeated pairs wolffd@0: % --- wolffd@0: % [i,j] = ind2sub(size(num_compares), find(num_compares > 1)) wolffd@0: % num = 300; wolffd@0: % a = find((comparison(:,1) == i(num) | comparison(:,2) == i(num) | comparison(:,3) == i(num))...) wolffd@0: % & (comparison(:,1) == j(num) | comparison(:,2) == j(num) | comparison(:,3) == j(num))) wolffd@0: % wolffd@0: % comparison(a,:) wolffd@0: wolffd@0: wolffd@0: % ------------------------------------------------------------ wolffd@0: % compare genre frequency wolffd@0: % --- wolffd@0: clips = MTTClip(comparison_ids); wolffd@0: wolffd@0: % comparison genres wolffd@0: [genres, scores, gid] = clips.genres(); wolffd@0: wolffd@0: % all genres wolffd@0: [genres2, scores2, gid2] = db_magnaclips.genredb.stats; wolffd@0: wolffd@0: % --- wolffd@0: % plot top genres wolffd@0: % --- wolffd@0: wolffd@0: figure; wolffd@0: bar(scores(1:20)) wolffd@0: set(gca, 'XTick',1:20,'XTickLabel',genres(1:20)) wolffd@0: wolffd@0: figure; wolffd@0: subplot(2,1,1) wolffd@0: bar(scores(1:10) / max(scores)) wolffd@0: set(gca, 'XTickLabel',genres) wolffd@0: title 'comparison subset' wolffd@0: wolffd@0: subplot(2,1,2) wolffd@0: bar(scores2(1:10) / max(scores2)) wolffd@0: set(gca, 'XTickLabel',genres2,'FontSize', 8) wolffd@0: title 'Full MTT data set' wolffd@0: wolffd@0: % --- wolffd@0: % evaluate differences in distribution for all genres wolffd@0: % shown are changes in relation to the whole database(genres2) wolffd@0: % ( relative to the summed tags ) wolffd@0: % --- wolffd@0: genrediff = zeros(numel(genres2),1); wolffd@0: for i=1:numel(genres2) wolffd@0: wolffd@0: % mgen = strcellfind(genres, genres2(i)); wolffd@0: mgen = find(gid == gid2(i)); wolffd@0: wolffd@0: if ~isempty(mgen) wolffd@0: % genrediff(i,1) = scores(mgen)/sum(scores) - scores2(i)/sum(scores2); wolffd@0: genrediff(i,1) = 1 - ( (scores2(i)/sum(scores2)) / (scores(mgen)/sum(scores))); wolffd@0: wolffd@0: else wolffd@0: % genrediff(i) = inf; wolffd@0: error 'genre ids not consistent' wolffd@0: end wolffd@0: end wolffd@0: genrediff = genrediff * 100; wolffd@0: wolffd@0: % visualise difference wolffd@0: figure wolffd@0: bar(genrediff); wolffd@0: set(gca, 'XTick',1:44,'XTickLabel',genres2) wolffd@0: axis([0 45 -200 100]); wolffd@0: title 'relative loss for each genre considering comparison as an excerpt from MTT' wolffd@0: ylabel 'loss(%)' wolffd@0: wolffd@0: % --- wolffd@0: % get distinc genre music sets: wolffd@0: % we try the following sets of 'similar' genre tags wolffd@0: % --- wolffd@0: cgdb = db_magnaclips.genredb.subset(clips.id); wolffd@0: wolffd@0: cids = cgdb.owner({'Classical', 'Baroque'}, 'or'); wolffd@0: cgdb = cgdb.exclude(cids); wolffd@0: wolffd@0: eids = cgdb.owner({'Electronica', 'New Age', 'Ambient'}, 'or'); wolffd@0: cgdb = cgdb.exclude(eids); wolffd@0: wolffd@0: rids = cgdb.owner({'Rock', 'Alt Rock', 'Hard Rock', 'Metal'}, 'or'); wolffd@0: cgdb = cgdb.exclude(rids); wolffd@0: wolffd@0: % get reverse indexing for comparisons wolffd@0: rev_compid = sparse(comparison_ids, 1, 1:numel(comparison_ids)); wolffd@0: wolffd@0: % 8 triples entirely classical wolffd@0: cfit = get_comparison_linfits(comparison, rev_compid(cids)); wolffd@0: wolffd@0: % 43 triples entirely electronic wolffd@0: efit = get_comparison_linfits(comparison, rev_compid(eids)); wolffd@0: wolffd@0: % 6 triples entirely rock wolffd@0: rfit = get_comparison_linfits(comparison, rev_compid(rids)); wolffd@0: wolffd@0: cgdb.stats wolffd@0: wolffd@0: end wolffd@0: % this function returns for each comparison line wolffd@0: % the num. of appearance of given selection of clip ids wolffd@0: function out = get_comparison_linfits(comparison, goodset) wolffd@0: wolffd@0: out = zeros(1, size(comparison,1)); wolffd@0: % for each line wolffd@0: for i = 1:size(comparison,1) wolffd@0: wolffd@0: out(i) = numel(intersect(comparison(i,1:3), goodset')); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: