comparison core/magnatagatune/get_magnagenre_numeric.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function [clip_magnagenres, magnagenres, magnagenre_freqs, magnagenre_childof] = get_magnagenre_numeric(clip_info_magnagenres);
2 %
3 % uses the clip_info magnagenres_final db to get a numerical genre
4 % representation ofthe database
5 %
6 % genre_freqs: frequency of genre x in position y
7 % genre_childof: percentage of genre x being a successor of genre y
8 %
9 % reimports the text - based representation of magnatunes and tries to
10 % determine an underlying structure.
11 %
12
13 data = clip_info_magnagenres(:,3);
14
15 % ---
16 % genre and genre posfrequency list:
17 % ---
18
19 magnagen_id = {};
20
21 genres = {};
22
23 max_genres = 50;
24 max_simul_genres = 4;
25 genre_freqs = [];
26 genre_childof = [];
27
28 % for each of the genre tags
29 for i = 1:length(data)
30
31 % separate genres;
32 tmp = explode(',', data{i});
33
34 % find and save correspnding genre indices
35 for j = 1:length(tmp)
36 genidx = strcellfind(genres,tmp(j));
37
38 % add genre to genre list if not existent
39 if genidx < 1
40 genidx = size(genres, 1) + 1;
41 genres = cat(1, genres, tmp(j));
42
43 genre_freqs(genidx,:) = zeros(1, max_simul_genres);
44 genre_childof(genidx,:) = zeros(1, max_genres);
45 end
46
47 % ---
48 % here, we save the index to a new genre structure
49 % ---
50 if j == 1
51 magnagen_id{i} = genidx;
52 else
53 magnagen_id{i} = [magnagen_id{i}, genidx];
54 end
55 % ---
56 % further genre statistics, perhaps its a hierarchy
57 % ---
58
59 % save frequency by position
60 genre_freqs(genidx, j) = genre_freqs(genidx, j) + 1;
61
62 % save parent genre if applicable
63 if j == 1
64
65 % remember parent index
66 paridx = genidx;
67 else
68
69 % count index for this parent
70 genre_childof(genidx, paridx) = genre_childof(genidx, paridx) + 1;
71 end
72 end
73
74 % ---
75 % - save preceeding first genre for this into another table
76 % ---
77 end
78
79 % ---
80 % this should output quite generic data, to ease
81 % comparison with other genre hierarchies.
82 %
83 % thus, we set the parental relation relative to overall
84 % appearance of the child genre
85 % ---
86
87 % remove overlapping columns
88 idx = find(sum(genre_childof,1) > 0,1, 'last');
89 idx = max(size(genre_childof,1),idx);
90
91 genre_childof = genre_childof(:, 1:idx);
92
93 % make values relative to total occurrence of child
94 for i = 1: size(genre_childof,1)
95 genre_childof(i, :) = genre_childof(i, :) ./ sum(genre_freqs(i,:));
96 end
97
98 % ---
99 % reformat genre attribute table as sparse matrix
100 % ---
101 clip_magnagenres = sparse(length(magnagen_id),length(genres));
102 for i = 1:length(magnagen_id)
103 clip_magnagenres(i,magnagen_id{i}) = 1;
104 end
105
106 magnagenres = genres;
107 magnagenre_freqs = genre_freqs;
108 magnagenre_childof = genre_childof;