wolffd@0
|
1 function [clip_magnagenres, magnagenres, magnagenre_freqs, magnagenre_childof] = get_magnagenre_numeric(clip_info_magnagenres);
|
wolffd@0
|
2 %
|
wolffd@0
|
3 % uses the clip_info magnagenres_final db to get a numerical genre
|
wolffd@0
|
4 % representation ofthe database
|
wolffd@0
|
5 %
|
wolffd@0
|
6 % genre_freqs: frequency of genre x in position y
|
wolffd@0
|
7 % genre_childof: percentage of genre x being a successor of genre y
|
wolffd@0
|
8 %
|
wolffd@0
|
9 % reimports the text - based representation of magnatunes and tries to
|
wolffd@0
|
10 % determine an underlying structure.
|
wolffd@0
|
11 %
|
wolffd@0
|
12
|
wolffd@0
|
13 data = clip_info_magnagenres(:,3);
|
wolffd@0
|
14
|
wolffd@0
|
15 % ---
|
wolffd@0
|
16 % genre and genre posfrequency list:
|
wolffd@0
|
17 % ---
|
wolffd@0
|
18
|
wolffd@0
|
19 magnagen_id = {};
|
wolffd@0
|
20
|
wolffd@0
|
21 genres = {};
|
wolffd@0
|
22
|
wolffd@0
|
23 max_genres = 50;
|
wolffd@0
|
24 max_simul_genres = 4;
|
wolffd@0
|
25 genre_freqs = [];
|
wolffd@0
|
26 genre_childof = [];
|
wolffd@0
|
27
|
wolffd@0
|
28 % for each of the genre tags
|
wolffd@0
|
29 for i = 1:length(data)
|
wolffd@0
|
30
|
wolffd@0
|
31 % separate genres;
|
wolffd@0
|
32 tmp = explode(',', data{i});
|
wolffd@0
|
33
|
wolffd@0
|
34 % find and save correspnding genre indices
|
wolffd@0
|
35 for j = 1:length(tmp)
|
wolffd@0
|
36 genidx = strcellfind(genres,tmp(j));
|
wolffd@0
|
37
|
wolffd@0
|
38 % add genre to genre list if not existent
|
wolffd@0
|
39 if genidx < 1
|
wolffd@0
|
40 genidx = size(genres, 1) + 1;
|
wolffd@0
|
41 genres = cat(1, genres, tmp(j));
|
wolffd@0
|
42
|
wolffd@0
|
43 genre_freqs(genidx,:) = zeros(1, max_simul_genres);
|
wolffd@0
|
44 genre_childof(genidx,:) = zeros(1, max_genres);
|
wolffd@0
|
45 end
|
wolffd@0
|
46
|
wolffd@0
|
47 % ---
|
wolffd@0
|
48 % here, we save the index to a new genre structure
|
wolffd@0
|
49 % ---
|
wolffd@0
|
50 if j == 1
|
wolffd@0
|
51 magnagen_id{i} = genidx;
|
wolffd@0
|
52 else
|
wolffd@0
|
53 magnagen_id{i} = [magnagen_id{i}, genidx];
|
wolffd@0
|
54 end
|
wolffd@0
|
55 % ---
|
wolffd@0
|
56 % further genre statistics, perhaps its a hierarchy
|
wolffd@0
|
57 % ---
|
wolffd@0
|
58
|
wolffd@0
|
59 % save frequency by position
|
wolffd@0
|
60 genre_freqs(genidx, j) = genre_freqs(genidx, j) + 1;
|
wolffd@0
|
61
|
wolffd@0
|
62 % save parent genre if applicable
|
wolffd@0
|
63 if j == 1
|
wolffd@0
|
64
|
wolffd@0
|
65 % remember parent index
|
wolffd@0
|
66 paridx = genidx;
|
wolffd@0
|
67 else
|
wolffd@0
|
68
|
wolffd@0
|
69 % count index for this parent
|
wolffd@0
|
70 genre_childof(genidx, paridx) = genre_childof(genidx, paridx) + 1;
|
wolffd@0
|
71 end
|
wolffd@0
|
72 end
|
wolffd@0
|
73
|
wolffd@0
|
74 % ---
|
wolffd@0
|
75 % - save preceeding first genre for this into another table
|
wolffd@0
|
76 % ---
|
wolffd@0
|
77 end
|
wolffd@0
|
78
|
wolffd@0
|
79 % ---
|
wolffd@0
|
80 % this should output quite generic data, to ease
|
wolffd@0
|
81 % comparison with other genre hierarchies.
|
wolffd@0
|
82 %
|
wolffd@0
|
83 % thus, we set the parental relation relative to overall
|
wolffd@0
|
84 % appearance of the child genre
|
wolffd@0
|
85 % ---
|
wolffd@0
|
86
|
wolffd@0
|
87 % remove overlapping columns
|
wolffd@0
|
88 idx = find(sum(genre_childof,1) > 0,1, 'last');
|
wolffd@0
|
89 idx = max(size(genre_childof,1),idx);
|
wolffd@0
|
90
|
wolffd@0
|
91 genre_childof = genre_childof(:, 1:idx);
|
wolffd@0
|
92
|
wolffd@0
|
93 % make values relative to total occurrence of child
|
wolffd@0
|
94 for i = 1: size(genre_childof,1)
|
wolffd@0
|
95 genre_childof(i, :) = genre_childof(i, :) ./ sum(genre_freqs(i,:));
|
wolffd@0
|
96 end
|
wolffd@0
|
97
|
wolffd@0
|
98 % ---
|
wolffd@0
|
99 % reformat genre attribute table as sparse matrix
|
wolffd@0
|
100 % ---
|
wolffd@0
|
101 clip_magnagenres = sparse(length(magnagen_id),length(genres));
|
wolffd@0
|
102 for i = 1:length(magnagen_id)
|
wolffd@0
|
103 clip_magnagenres(i,magnagen_id{i}) = 1;
|
wolffd@0
|
104 end
|
wolffd@0
|
105
|
wolffd@0
|
106 magnagenres = genres;
|
wolffd@0
|
107 magnagenre_freqs = genre_freqs;
|
wolffd@0
|
108 magnagenre_childof = genre_childof;
|