Mercurial > hg > camir-aes2014
view core/magnatagatune/makro_merge_last.fm_data.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line source
% makro_merge_last.fm_data % --- % make sure we just search once for each artist % --- [artists,null, idx] = unique(clip_info_proper(:,4)); % --- % this is were we search for our artists in last.fm % --- [fmartist, fmartist_names] = fm_corresponding_artists(artists); %% % --- % add Id's to artist structure % --- % collect clip ids for i = 1:numel(artists) clip_ids{i} = annots_ids(idx == i); end fmartists = cat(2,clip_ids', fmartist); fmartist_names = {'clip_ids',fmartist_names{:}}; clear fmartist; [a, b] = fm_retrieve_artist('Mijo'); fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}}; % --- % TODO: code manual sorting out of bad associations here, for better % reproducibility % --- % bad_artiidx = find(strcmp('-1', fmartist(:,2))); % --- % we remove the magnatune compilation artist, % as tags are not really descriptive for this % --- bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)]; bad_artists = zeros(size(fmartist,1),1); bad_artists(bad_artiidx) = 1; % --- % NOTE: as we have two categories of reasons for non-existing tags % (exclusion above and failure), there is two different data entries fur % such: '-1' for "artist not found in last.fm database" and % [] for excluded items. % --- fmartist_tags = {}; for i = 1:size(fmartist,1) if ~bad_artists(i) fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2}); [a, b] = fm_retrieve_artist_tags(fmartist{i,2}); fmartist_tags(i,:) = {a, b}; end end %% % --- % now, we access the frequency of all tags, trying to establish an % vocabulary suitable for defining similarity measurements % --- % --- % collect all tags and number of occurence % the tag array is allocated beforeghand to save time % --- fmartist_annots = sparse(size(fmartist_tags,1),2000); fmartist_annots_names = {}; for i = 1:size(fmartist_tags,1) % --- % FIXME: obviously some tags get into the names table but dont get any % score associated. % --- for j = 1:numel(fmartist_tags{i,1}) if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1'); continue; end % find tag in new tag array tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j}); if tagidx ~= -1 % --- % NOTE: the fmartist_annots array saves the tag popularities in % an INT structure. this has to be converted to double before % using it in any other circumstances % --- % save tag domination fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; else tagidx = numel(fmartist_annots_names)+1; % create new tag field fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j}; % save tag domination fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; end end end