annotate core/magnatagatune/get_magnagenre_numeric.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [clip_magnagenres, magnagenres, magnagenre_freqs, magnagenre_childof] = get_magnagenre_numeric(clip_info_magnagenres);
wolffd@0 2 %
wolffd@0 3 % uses the clip_info magnagenres_final db to get a numerical genre
wolffd@0 4 % representation ofthe database
wolffd@0 5 %
wolffd@0 6 % genre_freqs: frequency of genre x in position y
wolffd@0 7 % genre_childof: percentage of genre x being a successor of genre y
wolffd@0 8 %
wolffd@0 9 % reimports the text - based representation of magnatunes and tries to
wolffd@0 10 % determine an underlying structure.
wolffd@0 11 %
wolffd@0 12
wolffd@0 13 data = clip_info_magnagenres(:,3);
wolffd@0 14
wolffd@0 15 % ---
wolffd@0 16 % genre and genre posfrequency list:
wolffd@0 17 % ---
wolffd@0 18
wolffd@0 19 magnagen_id = {};
wolffd@0 20
wolffd@0 21 genres = {};
wolffd@0 22
wolffd@0 23 max_genres = 50;
wolffd@0 24 max_simul_genres = 4;
wolffd@0 25 genre_freqs = [];
wolffd@0 26 genre_childof = [];
wolffd@0 27
wolffd@0 28 % for each of the genre tags
wolffd@0 29 for i = 1:length(data)
wolffd@0 30
wolffd@0 31 % separate genres;
wolffd@0 32 tmp = explode(',', data{i});
wolffd@0 33
wolffd@0 34 % find and save correspnding genre indices
wolffd@0 35 for j = 1:length(tmp)
wolffd@0 36 genidx = strcellfind(genres,tmp(j));
wolffd@0 37
wolffd@0 38 % add genre to genre list if not existent
wolffd@0 39 if genidx < 1
wolffd@0 40 genidx = size(genres, 1) + 1;
wolffd@0 41 genres = cat(1, genres, tmp(j));
wolffd@0 42
wolffd@0 43 genre_freqs(genidx,:) = zeros(1, max_simul_genres);
wolffd@0 44 genre_childof(genidx,:) = zeros(1, max_genres);
wolffd@0 45 end
wolffd@0 46
wolffd@0 47 % ---
wolffd@0 48 % here, we save the index to a new genre structure
wolffd@0 49 % ---
wolffd@0 50 if j == 1
wolffd@0 51 magnagen_id{i} = genidx;
wolffd@0 52 else
wolffd@0 53 magnagen_id{i} = [magnagen_id{i}, genidx];
wolffd@0 54 end
wolffd@0 55 % ---
wolffd@0 56 % further genre statistics, perhaps its a hierarchy
wolffd@0 57 % ---
wolffd@0 58
wolffd@0 59 % save frequency by position
wolffd@0 60 genre_freqs(genidx, j) = genre_freqs(genidx, j) + 1;
wolffd@0 61
wolffd@0 62 % save parent genre if applicable
wolffd@0 63 if j == 1
wolffd@0 64
wolffd@0 65 % remember parent index
wolffd@0 66 paridx = genidx;
wolffd@0 67 else
wolffd@0 68
wolffd@0 69 % count index for this parent
wolffd@0 70 genre_childof(genidx, paridx) = genre_childof(genidx, paridx) + 1;
wolffd@0 71 end
wolffd@0 72 end
wolffd@0 73
wolffd@0 74 % ---
wolffd@0 75 % - save preceeding first genre for this into another table
wolffd@0 76 % ---
wolffd@0 77 end
wolffd@0 78
wolffd@0 79 % ---
wolffd@0 80 % this should output quite generic data, to ease
wolffd@0 81 % comparison with other genre hierarchies.
wolffd@0 82 %
wolffd@0 83 % thus, we set the parental relation relative to overall
wolffd@0 84 % appearance of the child genre
wolffd@0 85 % ---
wolffd@0 86
wolffd@0 87 % remove overlapping columns
wolffd@0 88 idx = find(sum(genre_childof,1) > 0,1, 'last');
wolffd@0 89 idx = max(size(genre_childof,1),idx);
wolffd@0 90
wolffd@0 91 genre_childof = genre_childof(:, 1:idx);
wolffd@0 92
wolffd@0 93 % make values relative to total occurrence of child
wolffd@0 94 for i = 1: size(genre_childof,1)
wolffd@0 95 genre_childof(i, :) = genre_childof(i, :) ./ sum(genre_freqs(i,:));
wolffd@0 96 end
wolffd@0 97
wolffd@0 98 % ---
wolffd@0 99 % reformat genre attribute table as sparse matrix
wolffd@0 100 % ---
wolffd@0 101 clip_magnagenres = sparse(length(magnagen_id),length(genres));
wolffd@0 102 for i = 1:length(magnagen_id)
wolffd@0 103 clip_magnagenres(i,magnagen_id{i}) = 1;
wolffd@0 104 end
wolffd@0 105
wolffd@0 106 magnagenres = genres;
wolffd@0 107 magnagenre_freqs = genre_freqs;
wolffd@0 108 magnagenre_childof = genre_childof;