annotate core/magnatagatune/get_comparison_stats.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [num_compares] = get_comparison_stats()
wolffd@0 2 % [num_compares] = get_comparison_stats()
wolffd@0 3 %
wolffd@0 4 % - retrieves number of comparisons for each pair of data clips
wolffd@0 5 % - extracts genre associations and statistics
wolffd@0 6
wolffd@0 7 global db_magnaclips;
wolffd@0 8 global comparison;
wolffd@0 9 global comparison_ids;
wolffd@0 10
wolffd@0 11 num_compares = sparse(numel(comparison_ids), numel(comparison_ids));
wolffd@0 12
wolffd@0 13 for i = 1:size(comparison,1)
wolffd@0 14 abc = sort(comparison(i,1:3));
wolffd@0 15 a = abc(1);
wolffd@0 16 b = abc(2);
wolffd@0 17 c = abc(3);
wolffd@0 18 num_compares(a,b) = num_compares(a,b) +1;
wolffd@0 19 num_compares(b,c) = num_compares(b,c) +1;
wolffd@0 20 num_compares(a,c) = num_compares(a,c) +1;
wolffd@0 21 end
wolffd@0 22
wolffd@0 23 % ---
wolffd@0 24 % check for repeated pairs
wolffd@0 25 % ---
wolffd@0 26 % [i,j] = ind2sub(size(num_compares), find(num_compares > 1))
wolffd@0 27 % num = 300;
wolffd@0 28 % a = find((comparison(:,1) == i(num) | comparison(:,2) == i(num) | comparison(:,3) == i(num))...)
wolffd@0 29 % & (comparison(:,1) == j(num) | comparison(:,2) == j(num) | comparison(:,3) == j(num)))
wolffd@0 30 %
wolffd@0 31 % comparison(a,:)
wolffd@0 32
wolffd@0 33
wolffd@0 34 % ------------------------------------------------------------
wolffd@0 35 % compare genre frequency
wolffd@0 36 % ---
wolffd@0 37 clips = MTTClip(comparison_ids);
wolffd@0 38
wolffd@0 39 % comparison genres
wolffd@0 40 [genres, scores, gid] = clips.genres();
wolffd@0 41
wolffd@0 42 % all genres
wolffd@0 43 [genres2, scores2, gid2] = db_magnaclips.genredb.stats;
wolffd@0 44
wolffd@0 45 % ---
wolffd@0 46 % plot top genres
wolffd@0 47 % ---
wolffd@0 48
wolffd@0 49 figure;
wolffd@0 50 bar(scores(1:20))
wolffd@0 51 set(gca, 'XTick',1:20,'XTickLabel',genres(1:20))
wolffd@0 52
wolffd@0 53 figure;
wolffd@0 54 subplot(2,1,1)
wolffd@0 55 bar(scores(1:10) / max(scores))
wolffd@0 56 set(gca, 'XTickLabel',genres)
wolffd@0 57 title 'comparison subset'
wolffd@0 58
wolffd@0 59 subplot(2,1,2)
wolffd@0 60 bar(scores2(1:10) / max(scores2))
wolffd@0 61 set(gca, 'XTickLabel',genres2,'FontSize', 8)
wolffd@0 62 title 'Full MTT data set'
wolffd@0 63
wolffd@0 64 % ---
wolffd@0 65 % evaluate differences in distribution for all genres
wolffd@0 66 % shown are changes in relation to the whole database(genres2)
wolffd@0 67 % ( relative to the summed tags )
wolffd@0 68 % ---
wolffd@0 69 genrediff = zeros(numel(genres2),1);
wolffd@0 70 for i=1:numel(genres2)
wolffd@0 71
wolffd@0 72 % mgen = strcellfind(genres, genres2(i));
wolffd@0 73 mgen = find(gid == gid2(i));
wolffd@0 74
wolffd@0 75 if ~isempty(mgen)
wolffd@0 76 % genrediff(i,1) = scores(mgen)/sum(scores) - scores2(i)/sum(scores2);
wolffd@0 77 genrediff(i,1) = 1 - ( (scores2(i)/sum(scores2)) / (scores(mgen)/sum(scores)));
wolffd@0 78
wolffd@0 79 else
wolffd@0 80 % genrediff(i) = inf;
wolffd@0 81 error 'genre ids not consistent'
wolffd@0 82 end
wolffd@0 83 end
wolffd@0 84 genrediff = genrediff * 100;
wolffd@0 85
wolffd@0 86 % visualise difference
wolffd@0 87 figure
wolffd@0 88 bar(genrediff);
wolffd@0 89 set(gca, 'XTick',1:44,'XTickLabel',genres2)
wolffd@0 90 axis([0 45 -200 100]);
wolffd@0 91 title 'relative loss for each genre considering comparison as an excerpt from MTT'
wolffd@0 92 ylabel 'loss(%)'
wolffd@0 93
wolffd@0 94 % ---
wolffd@0 95 % get distinc genre music sets:
wolffd@0 96 % we try the following sets of 'similar' genre tags
wolffd@0 97 % ---
wolffd@0 98 cgdb = db_magnaclips.genredb.subset(clips.id);
wolffd@0 99
wolffd@0 100 cids = cgdb.owner({'Classical', 'Baroque'}, 'or');
wolffd@0 101 cgdb = cgdb.exclude(cids);
wolffd@0 102
wolffd@0 103 eids = cgdb.owner({'Electronica', 'New Age', 'Ambient'}, 'or');
wolffd@0 104 cgdb = cgdb.exclude(eids);
wolffd@0 105
wolffd@0 106 rids = cgdb.owner({'Rock', 'Alt Rock', 'Hard Rock', 'Metal'}, 'or');
wolffd@0 107 cgdb = cgdb.exclude(rids);
wolffd@0 108
wolffd@0 109 % get reverse indexing for comparisons
wolffd@0 110 rev_compid = sparse(comparison_ids, 1, 1:numel(comparison_ids));
wolffd@0 111
wolffd@0 112 % 8 triples entirely classical
wolffd@0 113 cfit = get_comparison_linfits(comparison, rev_compid(cids));
wolffd@0 114
wolffd@0 115 % 43 triples entirely electronic
wolffd@0 116 efit = get_comparison_linfits(comparison, rev_compid(eids));
wolffd@0 117
wolffd@0 118 % 6 triples entirely rock
wolffd@0 119 rfit = get_comparison_linfits(comparison, rev_compid(rids));
wolffd@0 120
wolffd@0 121 cgdb.stats
wolffd@0 122
wolffd@0 123 end
wolffd@0 124 % this function returns for each comparison line
wolffd@0 125 % the num. of appearance of given selection of clip ids
wolffd@0 126 function out = get_comparison_linfits(comparison, goodset)
wolffd@0 127
wolffd@0 128 out = zeros(1, size(comparison,1));
wolffd@0 129 % for each line
wolffd@0 130 for i = 1:size(comparison,1)
wolffd@0 131
wolffd@0 132 out(i) = numel(intersect(comparison(i,1:3), goodset'));
wolffd@0 133 end
wolffd@0 134 end
wolffd@0 135
wolffd@0 136