annotate core/magnatagatune/makro_merge_last.fm_data.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 % makro_merge_last.fm_data
wolffd@0 2
wolffd@0 3 % ---
wolffd@0 4 % make sure we just search once for each artist
wolffd@0 5 % ---
wolffd@0 6 [artists,null, idx] = unique(clip_info_proper(:,4));
wolffd@0 7
wolffd@0 8
wolffd@0 9 % ---
wolffd@0 10 % this is were we search for our artists in last.fm
wolffd@0 11 % ---
wolffd@0 12 [fmartist, fmartist_names] = fm_corresponding_artists(artists);
wolffd@0 13
wolffd@0 14 %%
wolffd@0 15 % ---
wolffd@0 16 % add Id's to artist structure
wolffd@0 17 % ---
wolffd@0 18
wolffd@0 19 % collect clip ids
wolffd@0 20 for i = 1:numel(artists)
wolffd@0 21 clip_ids{i} = annots_ids(idx == i);
wolffd@0 22 end
wolffd@0 23 fmartists = cat(2,clip_ids', fmartist);
wolffd@0 24 fmartist_names = {'clip_ids',fmartist_names{:}};
wolffd@0 25
wolffd@0 26 clear fmartist;
wolffd@0 27
wolffd@0 28 [a, b] = fm_retrieve_artist('Mijo');
wolffd@0 29 fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}};
wolffd@0 30
wolffd@0 31 % ---
wolffd@0 32 % TODO: code manual sorting out of bad associations here, for better
wolffd@0 33 % reproducibility
wolffd@0 34 % ---
wolffd@0 35
wolffd@0 36 %
wolffd@0 37 bad_artiidx = find(strcmp('-1', fmartist(:,2)));
wolffd@0 38
wolffd@0 39 % ---
wolffd@0 40 % we remove the magnatune compilation artist,
wolffd@0 41 % as tags are not really descriptive for this
wolffd@0 42 % ---
wolffd@0 43 bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)];
wolffd@0 44
wolffd@0 45 bad_artists = zeros(size(fmartist,1),1);
wolffd@0 46 bad_artists(bad_artiidx) = 1;
wolffd@0 47
wolffd@0 48
wolffd@0 49 % ---
wolffd@0 50 % NOTE: as we have two categories of reasons for non-existing tags
wolffd@0 51 % (exclusion above and failure), there is two different data entries fur
wolffd@0 52 % such: '-1' for "artist not found in last.fm database" and
wolffd@0 53 % [] for excluded items.
wolffd@0 54 % ---
wolffd@0 55
wolffd@0 56 fmartist_tags = {};
wolffd@0 57 for i = 1:size(fmartist,1)
wolffd@0 58
wolffd@0 59 if ~bad_artists(i)
wolffd@0 60 fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2});
wolffd@0 61 [a, b] = fm_retrieve_artist_tags(fmartist{i,2});
wolffd@0 62 fmartist_tags(i,:) = {a, b};
wolffd@0 63 end
wolffd@0 64 end
wolffd@0 65 %%
wolffd@0 66 % ---
wolffd@0 67 % now, we access the frequency of all tags, trying to establish an
wolffd@0 68 % vocabulary suitable for defining similarity measurements
wolffd@0 69 % ---
wolffd@0 70 % ---
wolffd@0 71 % collect all tags and number of occurence
wolffd@0 72 % the tag array is allocated beforeghand to save time
wolffd@0 73 % ---
wolffd@0 74
wolffd@0 75 fmartist_annots = sparse(size(fmartist_tags,1),2000);
wolffd@0 76 fmartist_annots_names = {};
wolffd@0 77 for i = 1:size(fmartist_tags,1)
wolffd@0 78
wolffd@0 79 % ---
wolffd@0 80 % FIXME: obviously some tags get into the names table but dont get any
wolffd@0 81 % score associated.
wolffd@0 82 % ---
wolffd@0 83 for j = 1:numel(fmartist_tags{i,1})
wolffd@0 84 if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1');
wolffd@0 85 continue;
wolffd@0 86 end
wolffd@0 87
wolffd@0 88 % find tag in new tag array
wolffd@0 89 tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j});
wolffd@0 90 if tagidx ~= -1
wolffd@0 91
wolffd@0 92 % ---
wolffd@0 93 % NOTE: the fmartist_annots array saves the tag popularities in
wolffd@0 94 % an INT structure. this has to be converted to double before
wolffd@0 95 % using it in any other circumstances
wolffd@0 96 % ---
wolffd@0 97
wolffd@0 98 % save tag domination
wolffd@0 99 fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
wolffd@0 100 else
wolffd@0 101 tagidx = numel(fmartist_annots_names)+1;
wolffd@0 102
wolffd@0 103 % create new tag field
wolffd@0 104 fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j};
wolffd@0 105
wolffd@0 106 % save tag domination
wolffd@0 107 fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
wolffd@0 108 end
wolffd@0 109 end
wolffd@0 110 end