diff core/magnatagatune/makro_merge_last.fm_data.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/magnatagatune/makro_merge_last.fm_data.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,110 @@
+% makro_merge_last.fm_data
+
+% ---
+% make sure we just search once for each artist
+% ---
+[artists,null, idx] = unique(clip_info_proper(:,4));
+
+
+% ---
+% this is were we search for our artists in last.fm
+% ---
+[fmartist, fmartist_names] = fm_corresponding_artists(artists);
+
+%%
+% ---
+% add Id's to artist structure
+% ---
+
+% collect clip ids
+for i = 1:numel(artists)
+    clip_ids{i} = annots_ids(idx == i);
+end
+fmartists = cat(2,clip_ids', fmartist);
+fmartist_names = {'clip_ids',fmartist_names{:}};
+
+clear fmartist;
+
+[a, b] = fm_retrieve_artist('Mijo');
+fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}};
+
+% ---
+% TODO: code manual sorting out of bad associations here, for better
+% reproducibility
+% ---
+
+% 
+bad_artiidx = find(strcmp('-1', fmartist(:,2)));
+
+% ---
+% we remove the magnatune compilation artist,
+% as tags are not really descriptive for this
+% ---
+bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)];
+
+bad_artists = zeros(size(fmartist,1),1);
+bad_artists(bad_artiidx) = 1;
+
+
+% ---
+% NOTE: as we have two categories of reasons for non-existing tags
+% (exclusion above and failure), there is two different data entries fur
+% such: '-1' for "artist not found in last.fm database" and 
+%       [] for excluded items.
+% ---
+
+fmartist_tags = {};
+for i = 1:size(fmartist,1)
+    
+    if ~bad_artists(i)
+        fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2});
+        [a, b] = fm_retrieve_artist_tags(fmartist{i,2});
+        fmartist_tags(i,:) = {a, b};
+    end
+end
+%%
+% ---
+% now, we access the frequency of all tags, trying to establish an
+% vocabulary suitable for defining similarity measurements
+% ---
+% ---
+% collect all tags and number of occurence
+% the tag array is allocated beforeghand to save time
+% ---
+
+fmartist_annots = sparse(size(fmartist_tags,1),2000);
+fmartist_annots_names = {};
+for i = 1:size(fmartist_tags,1)
+    
+    % ---
+    % FIXME: obviously some tags get into the names table but dont get any 
+    % score associated. 
+    % ---
+    for j = 1:numel(fmartist_tags{i,1})
+        if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1');
+            continue;
+        end
+        
+        % find tag in new tag array
+        tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j});
+        if tagidx ~= -1
+            
+            % ---
+            % NOTE: the fmartist_annots array saves the tag popularities in
+            %  an INT structure. this has to be converted to double before
+            %  using it in any other circumstances
+            % ---
+            
+            % save tag domination
+            fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
+        else
+            tagidx = numel(fmartist_annots_names)+1;
+            
+            % create new tag field
+            fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j};
+            
+            % save tag domination
+            fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; 
+        end
+    end
+end