comparison core/magnatagatune/makro_merge_last.fm_data.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 % makro_merge_last.fm_data
2
3 % ---
4 % make sure we just search once for each artist
5 % ---
6 [artists,null, idx] = unique(clip_info_proper(:,4));
7
8
9 % ---
10 % this is were we search for our artists in last.fm
11 % ---
12 [fmartist, fmartist_names] = fm_corresponding_artists(artists);
13
14 %%
15 % ---
16 % add Id's to artist structure
17 % ---
18
19 % collect clip ids
20 for i = 1:numel(artists)
21 clip_ids{i} = annots_ids(idx == i);
22 end
23 fmartists = cat(2,clip_ids', fmartist);
24 fmartist_names = {'clip_ids',fmartist_names{:}};
25
26 clear fmartist;
27
28 [a, b] = fm_retrieve_artist('Mijo');
29 fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}};
30
31 % ---
32 % TODO: code manual sorting out of bad associations here, for better
33 % reproducibility
34 % ---
35
36 %
37 bad_artiidx = find(strcmp('-1', fmartist(:,2)));
38
39 % ---
40 % we remove the magnatune compilation artist,
41 % as tags are not really descriptive for this
42 % ---
43 bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)];
44
45 bad_artists = zeros(size(fmartist,1),1);
46 bad_artists(bad_artiidx) = 1;
47
48
49 % ---
50 % NOTE: as we have two categories of reasons for non-existing tags
51 % (exclusion above and failure), there is two different data entries fur
52 % such: '-1' for "artist not found in last.fm database" and
53 % [] for excluded items.
54 % ---
55
56 fmartist_tags = {};
57 for i = 1:size(fmartist,1)
58
59 if ~bad_artists(i)
60 fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2});
61 [a, b] = fm_retrieve_artist_tags(fmartist{i,2});
62 fmartist_tags(i,:) = {a, b};
63 end
64 end
65 %%
66 % ---
67 % now, we access the frequency of all tags, trying to establish an
68 % vocabulary suitable for defining similarity measurements
69 % ---
70 % ---
71 % collect all tags and number of occurence
72 % the tag array is allocated beforeghand to save time
73 % ---
74
75 fmartist_annots = sparse(size(fmartist_tags,1),2000);
76 fmartist_annots_names = {};
77 for i = 1:size(fmartist_tags,1)
78
79 % ---
80 % FIXME: obviously some tags get into the names table but dont get any
81 % score associated.
82 % ---
83 for j = 1:numel(fmartist_tags{i,1})
84 if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1');
85 continue;
86 end
87
88 % find tag in new tag array
89 tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j});
90 if tagidx ~= -1
91
92 % ---
93 % NOTE: the fmartist_annots array saves the tag popularities in
94 % an INT structure. this has to be converted to double before
95 % using it in any other circumstances
96 % ---
97
98 % save tag domination
99 fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
100 else
101 tagidx = numel(fmartist_annots_names)+1;
102
103 % create new tag field
104 fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j};
105
106 % save tag domination
107 fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
108 end
109 end
110 end