wolffd@0: % makro_merge_last.fm_data wolffd@0: wolffd@0: % --- wolffd@0: % make sure we just search once for each artist wolffd@0: % --- wolffd@0: [artists,null, idx] = unique(clip_info_proper(:,4)); wolffd@0: wolffd@0: wolffd@0: % --- wolffd@0: % this is were we search for our artists in last.fm wolffd@0: % --- wolffd@0: [fmartist, fmartist_names] = fm_corresponding_artists(artists); wolffd@0: wolffd@0: %% wolffd@0: % --- wolffd@0: % add Id's to artist structure wolffd@0: % --- wolffd@0: wolffd@0: % collect clip ids wolffd@0: for i = 1:numel(artists) wolffd@0: clip_ids{i} = annots_ids(idx == i); wolffd@0: end wolffd@0: fmartists = cat(2,clip_ids', fmartist); wolffd@0: fmartist_names = {'clip_ids',fmartist_names{:}}; wolffd@0: wolffd@0: clear fmartist; wolffd@0: wolffd@0: [a, b] = fm_retrieve_artist('Mijo'); wolffd@0: fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}}; wolffd@0: wolffd@0: % --- wolffd@0: % TODO: code manual sorting out of bad associations here, for better wolffd@0: % reproducibility wolffd@0: % --- wolffd@0: wolffd@0: % wolffd@0: bad_artiidx = find(strcmp('-1', fmartist(:,2))); wolffd@0: wolffd@0: % --- wolffd@0: % we remove the magnatune compilation artist, wolffd@0: % as tags are not really descriptive for this wolffd@0: % --- wolffd@0: bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)]; wolffd@0: wolffd@0: bad_artists = zeros(size(fmartist,1),1); wolffd@0: bad_artists(bad_artiidx) = 1; wolffd@0: wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: as we have two categories of reasons for non-existing tags wolffd@0: % (exclusion above and failure), there is two different data entries fur wolffd@0: % such: '-1' for "artist not found in last.fm database" and wolffd@0: % [] for excluded items. wolffd@0: % --- wolffd@0: wolffd@0: fmartist_tags = {}; wolffd@0: for i = 1:size(fmartist,1) wolffd@0: wolffd@0: if ~bad_artists(i) wolffd@0: fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2}); wolffd@0: [a, b] = fm_retrieve_artist_tags(fmartist{i,2}); wolffd@0: fmartist_tags(i,:) = {a, b}; wolffd@0: end wolffd@0: end wolffd@0: %% wolffd@0: % --- wolffd@0: % now, we access the frequency of all tags, trying to establish an wolffd@0: % vocabulary suitable for defining similarity measurements wolffd@0: % --- wolffd@0: % --- wolffd@0: % collect all tags and number of occurence wolffd@0: % the tag array is allocated beforeghand to save time wolffd@0: % --- wolffd@0: wolffd@0: fmartist_annots = sparse(size(fmartist_tags,1),2000); wolffd@0: fmartist_annots_names = {}; wolffd@0: for i = 1:size(fmartist_tags,1) wolffd@0: wolffd@0: % --- wolffd@0: % FIXME: obviously some tags get into the names table but dont get any wolffd@0: % score associated. wolffd@0: % --- wolffd@0: for j = 1:numel(fmartist_tags{i,1}) wolffd@0: if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1'); wolffd@0: continue; wolffd@0: end wolffd@0: wolffd@0: % find tag in new tag array wolffd@0: tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j}); wolffd@0: if tagidx ~= -1 wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: the fmartist_annots array saves the tag popularities in wolffd@0: % an INT structure. this has to be converted to double before wolffd@0: % using it in any other circumstances wolffd@0: % --- wolffd@0: wolffd@0: % save tag domination wolffd@0: fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; wolffd@0: else wolffd@0: tagidx = numel(fmartist_annots_names)+1; wolffd@0: wolffd@0: % create new tag field wolffd@0: fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j}; wolffd@0: wolffd@0: % save tag domination wolffd@0: fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; wolffd@0: end wolffd@0: end wolffd@0: end