view core/magnatagatune/makro_import_magnatagatune.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
% makro_import_magnatatatune

csv2cell('annotations_final.csv','fromfile')
clip_info_final = csv2cell('clip_info_final.csv','fromfile');
comparisons_final = csv2cell('comparisons_final.csv','fromfile');

% tag annotations
annots = strcell2matrix(annotations_final,[1 0],[0 1]);
annots_descripts = {annotations_final{1,:}};
annots_filenames = cat(1,{annotations_final{:,1}},{annotations_final{:,end}})';

% comparison measures
comp = strcell2matrix(comparisons_final,[0 0], [0 3]);
comp_descripts = {comparisons_final{1,:}};

%%
% -----------------------------------------------------------
% ---
% part two: extracting and associating Genre
% 
% at first we filter out the relevant positions in clip_info_final-
% ---

% prepare clip_info
info_ids = {clip_info_final{:,1}};
info_ids = strcell2matrix(info_ids);

% ---
% CAUTION: a offset because of table header is added
% ---
info_ids = info_ids(2:end);

% prepare file annots
file_ids = {annots_filenames{:,1}};
file_ids = strcell2matrix(file_ids);
file_ids = file_ids(2:end);

% intersect ids
[c, ia, ib] = intersect(file_ids, info_ids);

% and save into proper
clip_info_proper_names = clip_info_final(1,:);
clip_info_proper = clip_info_final(ib+1,:);

% clean proper for mysterious beginnings
for i = 1:size(clip_info_proper,1)
    % leave out first row;
    for j = 2:size(clip_info_proper,2)
        clip_info_proper{i,j} = clip_info_proper{i,j}(2:end);
    end
end

% ---
%  % GENRE extraction
%
% now, we load the new file and search for the information on the 
% actual excerpts we have
% ---
tmp = csv2cell('song_info.csv','fromfile');

%%
% these are for keeping book of missing items
not_found = [];
man_album_name = {};

rel_cols = [6,7];
% make header
clip_info_extra_names = {'clip_id',tmp{1,rel_cols}};
clip_info_extra = {};

for i = 1:size(clip_info_proper,1)
    % ---
    % search by url
    % ---
    % convert search string 
    s = char(clip_info_proper{i,9});
    idx = strcellfind(tmp(:,9),s);
    
    % we'll have to loosen the search
    if idx < 1
        warning(sprintf('! %s, album %s, artist %s!',clip_info_proper{i,1},...
            clip_info_proper{i,5},clip_info_proper{i,4}));
        
        % make note ...
        not_found = cat(1,not_found,[str2num(clip_info_proper{i,1}), 0]);
        
        not_found(end,2) = 1;
        % ---
        % ok, no problem, lets look for the album!
        % ---
        s = char(clip_info_proper{i,5});
        idx = strcellfind(tmp(:,3),s);
    
        if idx < 1
            
            not_found(end,2) = 2;
            % ---
            % search for artist
            % ---
            s = char(clip_info_proper{i,4});
            idx = strcellfind(tmp(:,1),s);
        end

        if idx < 1
            
            not_found(end,2) = 3;
            % ---
            % this is the last try to get hold of such artists
            % they may be noted as a trackname or description substring in a compilation
            % ---
            s = char(clip_info_proper{i,4});
            idx = substrcellfind(tmp(:,2),s);
        end
        
        if idx < 1 
            
            warning(sprintf(' - %s, %s \n',clip_info_proper{i,1},clip_info_proper{i,4}));
            newinfo = {''};
            
            % reset suspected success
            not_found(end,2) = 0;
        else
            
            warning(sprintf(' + associated album %s, artist %s \n',tmp{idx,3},tmp{idx,1}));
            
            % get relevant data
            newinfo = {tmp{idx,[3 1 12]}};
            
            % ---
            % save genre
            % ---
            clip_info_extra = cat(1,clip_info_extra,{clip_info_proper{i,1},tmp{idx,rel_cols}});
        end
        
        % report new location/info of album
        s = char(clip_info_proper{i,5});
        if isempty(man_album_name) || strcellfind(man_album_name(:,1),s) == -1 ;
            man_album_name = cat(1, man_album_name, {clip_info_proper{i,5},newinfo{:}});
        end
        
    else
        % ---
        % save genre
        % ---
        clip_info_extra = cat(1,clip_info_extra,{clip_info_proper{i,1},tmp{idx,rel_cols}});
    end    
end

clear('newinfo','i','j','idx','s','ia','ib','rel_cols')