wolffd@0: % makro_merge_last.fm_data
wolffd@0: 
wolffd@0: % ---
wolffd@0: % make sure we just search once for each artist
wolffd@0: % ---
wolffd@0: [artists,null, idx] = unique(clip_info_proper(:,4));
wolffd@0: 
wolffd@0: 
wolffd@0: % ---
wolffd@0: % this is were we search for our artists in last.fm
wolffd@0: % ---
wolffd@0: [fmartist, fmartist_names] = fm_corresponding_artists(artists);
wolffd@0: 
wolffd@0: %%
wolffd@0: % ---
wolffd@0: % add Id's to artist structure
wolffd@0: % ---
wolffd@0: 
wolffd@0: % collect clip ids
wolffd@0: for i = 1:numel(artists)
wolffd@0:     clip_ids{i} = annots_ids(idx == i);
wolffd@0: end
wolffd@0: fmartists = cat(2,clip_ids', fmartist);
wolffd@0: fmartist_names = {'clip_ids',fmartist_names{:}};
wolffd@0: 
wolffd@0: clear fmartist;
wolffd@0: 
wolffd@0: [a, b] = fm_retrieve_artist('Mijo');
wolffd@0: fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}};
wolffd@0: 
wolffd@0: % ---
wolffd@0: % TODO: code manual sorting out of bad associations here, for better
wolffd@0: % reproducibility
wolffd@0: % ---
wolffd@0: 
wolffd@0: % 
wolffd@0: bad_artiidx = find(strcmp('-1', fmartist(:,2)));
wolffd@0: 
wolffd@0: % ---
wolffd@0: % we remove the magnatune compilation artist,
wolffd@0: % as tags are not really descriptive for this
wolffd@0: % ---
wolffd@0: bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)];
wolffd@0: 
wolffd@0: bad_artists = zeros(size(fmartist,1),1);
wolffd@0: bad_artists(bad_artiidx) = 1;
wolffd@0: 
wolffd@0: 
wolffd@0: % ---
wolffd@0: % NOTE: as we have two categories of reasons for non-existing tags
wolffd@0: % (exclusion above and failure), there is two different data entries fur
wolffd@0: % such: '-1' for "artist not found in last.fm database" and 
wolffd@0: %       [] for excluded items.
wolffd@0: % ---
wolffd@0: 
wolffd@0: fmartist_tags = {};
wolffd@0: for i = 1:size(fmartist,1)
wolffd@0:     
wolffd@0:     if ~bad_artists(i)
wolffd@0:         fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2});
wolffd@0:         [a, b] = fm_retrieve_artist_tags(fmartist{i,2});
wolffd@0:         fmartist_tags(i,:) = {a, b};
wolffd@0:     end
wolffd@0: end
wolffd@0: %%
wolffd@0: % ---
wolffd@0: % now, we access the frequency of all tags, trying to establish an
wolffd@0: % vocabulary suitable for defining similarity measurements
wolffd@0: % ---
wolffd@0: % ---
wolffd@0: % collect all tags and number of occurence
wolffd@0: % the tag array is allocated beforeghand to save time
wolffd@0: % ---
wolffd@0: 
wolffd@0: fmartist_annots = sparse(size(fmartist_tags,1),2000);
wolffd@0: fmartist_annots_names = {};
wolffd@0: for i = 1:size(fmartist_tags,1)
wolffd@0:     
wolffd@0:     % ---
wolffd@0:     % FIXME: obviously some tags get into the names table but dont get any 
wolffd@0:     % score associated. 
wolffd@0:     % ---
wolffd@0:     for j = 1:numel(fmartist_tags{i,1})
wolffd@0:         if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1');
wolffd@0:             continue;
wolffd@0:         end
wolffd@0:         
wolffd@0:         % find tag in new tag array
wolffd@0:         tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j});
wolffd@0:         if tagidx ~= -1
wolffd@0:             
wolffd@0:             % ---
wolffd@0:             % NOTE: the fmartist_annots array saves the tag popularities in
wolffd@0:             %  an INT structure. this has to be converted to double before
wolffd@0:             %  using it in any other circumstances
wolffd@0:             % ---
wolffd@0:             
wolffd@0:             % save tag domination
wolffd@0:             fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
wolffd@0:         else
wolffd@0:             tagidx = numel(fmartist_annots_names)+1;
wolffd@0:             
wolffd@0:             % create new tag field
wolffd@0:             fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j};
wolffd@0:             
wolffd@0:             % save tag domination
wolffd@0:             fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; 
wolffd@0:         end
wolffd@0:     end
wolffd@0: end