view core/magnatagatune/makro_merge_last.fm_data.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
% makro_merge_last.fm_data

% ---
% make sure we just search once for each artist
% ---
[artists,null, idx] = unique(clip_info_proper(:,4));


% ---
% this is were we search for our artists in last.fm
% ---
[fmartist, fmartist_names] = fm_corresponding_artists(artists);

%%
% ---
% add Id's to artist structure
% ---

% collect clip ids
for i = 1:numel(artists)
    clip_ids{i} = annots_ids(idx == i);
end
fmartists = cat(2,clip_ids', fmartist);
fmartist_names = {'clip_ids',fmartist_names{:}};

clear fmartist;

[a, b] = fm_retrieve_artist('Mijo');
fmartist(strcellfind(fmartist(:,1),'Mijo'),2:3) = {a{1}, b{1}};

% ---
% TODO: code manual sorting out of bad associations here, for better
% reproducibility
% ---

% 
bad_artiidx = find(strcmp('-1', fmartist(:,2)));

% ---
% we remove the magnatune compilation artist,
% as tags are not really descriptive for this
% ---
bad_artiidx = [bad_artiidx substrcellfind(fmartist(:,1), 'Magna', 1)];

bad_artists = zeros(size(fmartist,1),1);
bad_artists(bad_artiidx) = 1;


% ---
% NOTE: as we have two categories of reasons for non-existing tags
% (exclusion above and failure), there is two different data entries fur
% such: '-1' for "artist not found in last.fm database" and 
%       [] for excluded items.
% ---

fmartist_tags = {};
for i = 1:size(fmartist,1)
    
    if ~bad_artists(i)
        fprintf('%d percent: %s\n',floor(i*100/size(fmartist,1)),fmartist{i,2});
        [a, b] = fm_retrieve_artist_tags(fmartist{i,2});
        fmartist_tags(i,:) = {a, b};
    end
end
%%
% ---
% now, we access the frequency of all tags, trying to establish an
% vocabulary suitable for defining similarity measurements
% ---
% ---
% collect all tags and number of occurence
% the tag array is allocated beforeghand to save time
% ---

fmartist_annots = sparse(size(fmartist_tags,1),2000);
fmartist_annots_names = {};
for i = 1:size(fmartist_tags,1)
    
    % ---
    % FIXME: obviously some tags get into the names table but dont get any 
    % score associated. 
    % ---
    for j = 1:numel(fmartist_tags{i,1})
        if isempty(fmartist_tags{i,1}) || strcmp(fmartist_tags{i,1}{j},'-1');
            continue;
        end
        
        % find tag in new tag array
        tagidx = strcellfind(fmartist_annots_names, fmartist_tags{i,1}{j});
        if tagidx ~= -1
            
            % ---
            % NOTE: the fmartist_annots array saves the tag popularities in
            %  an INT structure. this has to be converted to double before
            %  using it in any other circumstances
            % ---
            
            % save tag domination
            fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100;
        else
            tagidx = numel(fmartist_annots_names)+1;
            
            % create new tag field
            fmartist_annots_names{tagidx} = fmartist_tags{i,1}{j};
            
            % save tag domination
            fmartist_annots(i,tagidx) = double(fmartist_tags{i,2}(j))./100; 
        end
    end
end