wolffd@0
|
1 % makro_merge_last.fm_data
|
wolffd@0
|
2
|
wolffd@0
|
3 % ---
|
wolffd@0
|
4 % make sure we just search once for each artist
|
wolffd@0
|
5 % ---
|
wolffd@0
|
6 [artists,null, idx] = unique(clip_info_proper(:,4));
|
wolffd@0
|
7
|
wolffd@0
|
8
|
wolffd@0
|
9 % ---
|
wolffd@0
|
10 % this is were we search for our artists in last.fm
|
wolffd@0
|
11 % ---
|
wolffd@0
|
12 [fmartist, fmartist_names] = fm_corresponding_artists(artists);
|
wolffd@0
|
13
|
wolffd@0
|
14 %%
|
wolffd@0
|
15 % ---
|
wolffd@0
|
16 % add Id's to artist structure
|
wolffd@0
|
17 % ---
|
wolffd@0
|
18
|
wolffd@0
|
19 % collect clip ids
|
wolffd@0
|
20 for i = 1:numel(artists)
|
wolffd@0
|
21 clip_ids{i} = annots_ids(idx == i);
|
wolffd@0
|
22 end
|
wolffd@0
|
23 fmartists = cat(2,clip_ids', fmartist);
|
wolffd@0
|
24 fmartist_names = {'clip_ids',fmartist_names{:}};
|
wolffd@0
|
25
|
wolffd@0
|
26 clear fmartist;
|
wolffd@0
|
27
|
wolffd@0
|
28 [a, b] = fm_retrieve_artist('Mijo');
|
wolffd@0
|
29 fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}};
|
wolffd@0
|
30
|
wolffd@0
|
31 % ---
|
wolffd@0
|
32 % TODO: code manual sorting out of bad associations here, for better
|
wolffd@0
|
33 % reproducibility
|
wolffd@0
|
34 % ---
|
wolffd@0
|
35
|
wolffd@0
|
36 %
|
wolffd@0
|
37 bad_artiidx = find(strcmp('-1', fmartist(:,2)));
|
wolffd@0
|
38
|
wolffd@0
|
39 % ---
|
wolffd@0
|
40 % we remove the magnatune compilation artist,
|
wolffd@0
|
41 % as tags are not really descriptive for this
|
wolffd@0
|
42 % ---
|
wolffd@0
|
43 bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)];
|
wolffd@0
|
44
|
wolffd@0
|
45 bad_artists = zeros(size(fmartist,1),1);
|
wolffd@0
|
46 bad_artists(bad_artiidx) = 1;
|
wolffd@0
|
47
|
wolffd@0
|
48
|
wolffd@0
|
49 % ---
|
wolffd@0
|
50 % NOTE: as we have two categories of reasons for non-existing tags
|
wolffd@0
|
51 % (exclusion above and failure), there is two different data entries fur
|
wolffd@0
|
52 % such: '-1' for "artist not found in last.fm database" and
|
wolffd@0
|
53 % [] for excluded items.
|
wolffd@0
|
54 % ---
|
wolffd@0
|
55
|
wolffd@0
|
56 fmartist_tags = {};
|
wolffd@0
|
57 for i = 1:size(fmartist,1)
|
wolffd@0
|
58
|
wolffd@0
|
59 if ~bad_artists(i)
|
wolffd@0
|
60 fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2});
|
wolffd@0
|
61 [a, b] = fm_retrieve_artist_tags(fmartist{i,2});
|
wolffd@0
|
62 fmartist_tags(i,:) = {a, b};
|
wolffd@0
|
63 end
|
wolffd@0
|
64 end
|
wolffd@0
|
65 %%
|
wolffd@0
|
66 % ---
|
wolffd@0
|
67 % now, we access the frequency of all tags, trying to establish an
|
wolffd@0
|
68 % vocabulary suitable for defining similarity measurements
|
wolffd@0
|
69 % ---
|
wolffd@0
|
70 % ---
|
wolffd@0
|
71 % collect all tags and number of occurence
|
wolffd@0
|
72 % the tag array is allocated beforeghand to save time
|
wolffd@0
|
73 % ---
|
wolffd@0
|
74
|
wolffd@0
|
75 fmartist_annots = sparse(size(fmartist_tags,1),2000);
|
wolffd@0
|
76 fmartist_annots_names = {};
|
wolffd@0
|
77 for i = 1:size(fmartist_tags,1)
|
wolffd@0
|
78
|
wolffd@0
|
79 % ---
|
wolffd@0
|
80 % FIXME: obviously some tags get into the names table but dont get any
|
wolffd@0
|
81 % score associated.
|
wolffd@0
|
82 % ---
|
wolffd@0
|
83 for j = 1:numel(fmartist_tags{i,1})
|
wolffd@0
|
84 if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1');
|
wolffd@0
|
85 continue;
|
wolffd@0
|
86 end
|
wolffd@0
|
87
|
wolffd@0
|
88 % find tag in new tag array
|
wolffd@0
|
89 tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j});
|
wolffd@0
|
90 if tagidx ~= -1
|
wolffd@0
|
91
|
wolffd@0
|
92 % ---
|
wolffd@0
|
93 % NOTE: the fmartist_annots array saves the tag popularities in
|
wolffd@0
|
94 % an INT structure. this has to be converted to double before
|
wolffd@0
|
95 % using it in any other circumstances
|
wolffd@0
|
96 % ---
|
wolffd@0
|
97
|
wolffd@0
|
98 % save tag domination
|
wolffd@0
|
99 fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
|
wolffd@0
|
100 else
|
wolffd@0
|
101 tagidx = numel(fmartist_annots_names)+1;
|
wolffd@0
|
102
|
wolffd@0
|
103 % create new tag field
|
wolffd@0
|
104 fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j};
|
wolffd@0
|
105
|
wolffd@0
|
106 % save tag domination
|
wolffd@0
|
107 fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
|
wolffd@0
|
108 end
|
wolffd@0
|
109 end
|
wolffd@0
|
110 end
|