Mercurial > hg > camir-aes2014
comparison core/magnatagatune/makro_merge_last.fm_data.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 % makro_merge_last.fm_data | |
2 | |
3 % --- | |
4 % make sure we just search once for each artist | |
5 % --- | |
6 [artists,null, idx] = unique(clip_info_proper(:,4)); | |
7 | |
8 | |
9 % --- | |
10 % this is were we search for our artists in last.fm | |
11 % --- | |
12 [fmartist, fmartist_names] = fm_corresponding_artists(artists); | |
13 | |
14 %% | |
15 % --- | |
16 % add Id's to artist structure | |
17 % --- | |
18 | |
19 % collect clip ids | |
20 for i = 1:numel(artists) | |
21 clip_ids{i} = annots_ids(idx == i); | |
22 end | |
23 fmartists = cat(2,clip_ids', fmartist); | |
24 fmartist_names = {'clip_ids',fmartist_names{:}}; | |
25 | |
26 clear fmartist; | |
27 | |
28 [a, b] = fm_retrieve_artist('Mijo'); | |
29 fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}}; | |
30 | |
31 % --- | |
32 % TODO: code manual sorting out of bad associations here, for better | |
33 % reproducibility | |
34 % --- | |
35 | |
36 % | |
37 bad_artiidx = find(strcmp('-1', fmartist(:,2))); | |
38 | |
39 % --- | |
40 % we remove the magnatune compilation artist, | |
41 % as tags are not really descriptive for this | |
42 % --- | |
43 bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)]; | |
44 | |
45 bad_artists = zeros(size(fmartist,1),1); | |
46 bad_artists(bad_artiidx) = 1; | |
47 | |
48 | |
49 % --- | |
50 % NOTE: as we have two categories of reasons for non-existing tags | |
51 % (exclusion above and failure), there is two different data entries fur | |
52 % such: '-1' for "artist not found in last.fm database" and | |
53 % [] for excluded items. | |
54 % --- | |
55 | |
56 fmartist_tags = {}; | |
57 for i = 1:size(fmartist,1) | |
58 | |
59 if ~bad_artists(i) | |
60 fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2}); | |
61 [a, b] = fm_retrieve_artist_tags(fmartist{i,2}); | |
62 fmartist_tags(i,:) = {a, b}; | |
63 end | |
64 end | |
65 %% | |
66 % --- | |
67 % now, we access the frequency of all tags, trying to establish an | |
68 % vocabulary suitable for defining similarity measurements | |
69 % --- | |
70 % --- | |
71 % collect all tags and number of occurence | |
72 % the tag array is allocated beforeghand to save time | |
73 % --- | |
74 | |
75 fmartist_annots = sparse(size(fmartist_tags,1),2000); | |
76 fmartist_annots_names = {}; | |
77 for i = 1:size(fmartist_tags,1) | |
78 | |
79 % --- | |
80 % FIXME: obviously some tags get into the names table but dont get any | |
81 % score associated. | |
82 % --- | |
83 for j = 1:numel(fmartist_tags{i,1}) | |
84 if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1'); | |
85 continue; | |
86 end | |
87 | |
88 % find tag in new tag array | |
89 tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j}); | |
90 if tagidx ~= -1 | |
91 | |
92 % --- | |
93 % NOTE: the fmartist_annots array saves the tag popularities in | |
94 % an INT structure. this has to be converted to double before | |
95 % using it in any other circumstances | |
96 % --- | |
97 | |
98 % save tag domination | |
99 fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; | |
100 else | |
101 tagidx = numel(fmartist_annots_names)+1; | |
102 | |
103 % create new tag field | |
104 fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j}; | |
105 | |
106 % save tag domination | |
107 fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; | |
108 end | |
109 end | |
110 end |