Mercurial > hg > camir-aes2014
diff core/magnatagatune/makro_merge_last.fm_data.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/magnatagatune/makro_merge_last.fm_data.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,110 @@ +% makro_merge_last.fm_data + +% --- +% make sure we just search once for each artist +% --- +[artists,null, idx] = unique(clip_info_proper(:,4)); + + +% --- +% this is were we search for our artists in last.fm +% --- +[fmartist, fmartist_names] = fm_corresponding_artists(artists); + +%% +% --- +% add Id's to artist structure +% --- + +% collect clip ids +for i = 1:numel(artists) + clip_ids{i} = annots_ids(idx == i); +end +fmartists = cat(2,clip_ids', fmartist); +fmartist_names = {'clip_ids',fmartist_names{:}}; + +clear fmartist; + +[a, b] = fm_retrieve_artist('Mijo'); +fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}}; + +% --- +% TODO: code manual sorting out of bad associations here, for better +% reproducibility +% --- + +% +bad_artiidx = find(strcmp('-1', fmartist(:,2))); + +% --- +% we remove the magnatune compilation artist, +% as tags are not really descriptive for this +% --- +bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)]; + +bad_artists = zeros(size(fmartist,1),1); +bad_artists(bad_artiidx) = 1; + + +% --- +% NOTE: as we have two categories of reasons for non-existing tags +% (exclusion above and failure), there is two different data entries fur +% such: '-1' for "artist not found in last.fm database" and +% [] for excluded items. +% --- + +fmartist_tags = {}; +for i = 1:size(fmartist,1) + + if ~bad_artists(i) + fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2}); + [a, b] = fm_retrieve_artist_tags(fmartist{i,2}); + fmartist_tags(i,:) = {a, b}; + end +end +%% +% --- +% now, we access the frequency of all tags, trying to establish an +% vocabulary suitable for defining similarity measurements +% --- +% --- +% collect all tags and number of occurence +% the tag array is allocated beforeghand to save time +% --- + +fmartist_annots = sparse(size(fmartist_tags,1),2000); +fmartist_annots_names = {}; +for i = 1:size(fmartist_tags,1) + + % --- + % FIXME: obviously some tags get into the names table but dont get any + % score associated. + % --- + for j = 1:numel(fmartist_tags{i,1}) + if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1'); + continue; + end + + % find tag in new tag array + tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j}); + if tagidx ~= -1 + + % --- + % NOTE: the fmartist_annots array saves the tag popularities in + % an INT structure. this has to be converted to double before + % using it in any other circumstances + % --- + + % save tag domination + fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; + else + tagidx = numel(fmartist_annots_names)+1; + + % create new tag field + fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j}; + + % save tag domination + fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; + end + end +end