view core/magnatagatune/get_magnagenre_numeric.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
function [clip_magnagenres, magnagenres, magnagenre_freqs, magnagenre_childof]  = get_magnagenre_numeric(clip_info_magnagenres);
%
% uses the clip_info magnagenres_final db to get a numerical genre
% representation ofthe database
%
% genre_freqs: frequency of genre x in position y
% genre_childof: percentage of genre x being a successor of genre y
%
% reimports the text - based representation of magnatunes and tries to
% determine an underlying structure.
%

data = clip_info_magnagenres(:,3);

% ---
% genre and genre posfrequency list:
% ---

magnagen_id = {};

genres = {};

max_genres = 50;
max_simul_genres = 4;
genre_freqs = [];
genre_childof = [];

% for each of the genre tags
for i = 1:length(data)
    
    % separate genres;
    tmp = explode(',', data{i});

    % find and save correspnding genre indices
    for j = 1:length(tmp)
        genidx = strcellfind(genres,tmp(j));
        
        % add genre to genre list if not existent
        if genidx < 1
            genidx = size(genres, 1) + 1;
            genres = cat(1, genres, tmp(j));
            
            genre_freqs(genidx,:) = zeros(1, max_simul_genres);
            genre_childof(genidx,:) = zeros(1, max_genres);
        end

        % ---
        % here, we save the index to a new genre structure
        % ---
        if  j == 1
            magnagen_id{i} = genidx;
        else
            magnagen_id{i} = [magnagen_id{i}, genidx];
        end
        % ---
        % further genre statistics, perhaps its a hierarchy
        % ---
             
        % save frequency by position
        genre_freqs(genidx, j) = genre_freqs(genidx, j) + 1;
        
        % save parent genre if applicable
        if j == 1
            
            % remember parent index
            paridx = genidx;
        else
            
            % count index for this parent
            genre_childof(genidx, paridx) = genre_childof(genidx, paridx) + 1;
        end
    end
    
% ---
% - save preceeding first genre for this into another table
% ---
end

% ---
% this should output quite generic data, to ease 
% comparison with other genre hierarchies.
% 
% thus, we set the parental relation relative to overall
% appearance of the child genre
% ---

% remove overlapping columns
idx = find(sum(genre_childof,1) > 0,1, 'last');
idx = max(size(genre_childof,1),idx);

genre_childof = genre_childof(:, 1:idx);

% make values relative to total occurrence of child
for i = 1: size(genre_childof,1)
    genre_childof(i, :) = genre_childof(i, :) ./ sum(genre_freqs(i,:));
end

% ---
% reformat genre attribute table as sparse matrix
% ---
clip_magnagenres = sparse(length(magnagen_id),length(genres));
for i = 1:length(magnagen_id)
    clip_magnagenres(i,magnagen_id{i}) = 1;
end

magnagenres = genres;
magnagenre_freqs = genre_freqs;
magnagenre_childof = genre_childof;