wolffd@0: function [clip_magnagenres, magnagenres, magnagenre_freqs, magnagenre_childof] = get_magnagenre_numeric(clip_info_magnagenres); wolffd@0: % wolffd@0: % uses the clip_info magnagenres_final db to get a numerical genre wolffd@0: % representation ofthe database wolffd@0: % wolffd@0: % genre_freqs: frequency of genre x in position y wolffd@0: % genre_childof: percentage of genre x being a successor of genre y wolffd@0: % wolffd@0: % reimports the text - based representation of magnatunes and tries to wolffd@0: % determine an underlying structure. wolffd@0: % wolffd@0: wolffd@0: data = clip_info_magnagenres(:,3); wolffd@0: wolffd@0: % --- wolffd@0: % genre and genre posfrequency list: wolffd@0: % --- wolffd@0: wolffd@0: magnagen_id = {}; wolffd@0: wolffd@0: genres = {}; wolffd@0: wolffd@0: max_genres = 50; wolffd@0: max_simul_genres = 4; wolffd@0: genre_freqs = []; wolffd@0: genre_childof = []; wolffd@0: wolffd@0: % for each of the genre tags wolffd@0: for i = 1:length(data) wolffd@0: wolffd@0: % separate genres; wolffd@0: tmp = explode(',', data{i}); wolffd@0: wolffd@0: % find and save correspnding genre indices wolffd@0: for j = 1:length(tmp) wolffd@0: genidx = strcellfind(genres,tmp(j)); wolffd@0: wolffd@0: % add genre to genre list if not existent wolffd@0: if genidx < 1 wolffd@0: genidx = size(genres, 1) + 1; wolffd@0: genres = cat(1, genres, tmp(j)); wolffd@0: wolffd@0: genre_freqs(genidx,:) = zeros(1, max_simul_genres); wolffd@0: genre_childof(genidx,:) = zeros(1, max_genres); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % here, we save the index to a new genre structure wolffd@0: % --- wolffd@0: if j == 1 wolffd@0: magnagen_id{i} = genidx; wolffd@0: else wolffd@0: magnagen_id{i} = [magnagen_id{i}, genidx]; wolffd@0: end wolffd@0: % --- wolffd@0: % further genre statistics, perhaps its a hierarchy wolffd@0: % --- wolffd@0: wolffd@0: % save frequency by position wolffd@0: genre_freqs(genidx, j) = genre_freqs(genidx, j) + 1; wolffd@0: wolffd@0: % save parent genre if applicable wolffd@0: if j == 1 wolffd@0: wolffd@0: % remember parent index wolffd@0: paridx = genidx; wolffd@0: else wolffd@0: wolffd@0: % count index for this parent wolffd@0: genre_childof(genidx, paridx) = genre_childof(genidx, paridx) + 1; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % - save preceeding first genre for this into another table wolffd@0: % --- wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % this should output quite generic data, to ease wolffd@0: % comparison with other genre hierarchies. wolffd@0: % wolffd@0: % thus, we set the parental relation relative to overall wolffd@0: % appearance of the child genre wolffd@0: % --- wolffd@0: wolffd@0: % remove overlapping columns wolffd@0: idx = find(sum(genre_childof,1) > 0,1, 'last'); wolffd@0: idx = max(size(genre_childof,1),idx); wolffd@0: wolffd@0: genre_childof = genre_childof(:, 1:idx); wolffd@0: wolffd@0: % make values relative to total occurrence of child wolffd@0: for i = 1: size(genre_childof,1) wolffd@0: genre_childof(i, :) = genre_childof(i, :) ./ sum(genre_freqs(i,:)); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % reformat genre attribute table as sparse matrix wolffd@0: % --- wolffd@0: clip_magnagenres = sparse(length(magnagen_id),length(genres)); wolffd@0: for i = 1:length(magnagen_id) wolffd@0: clip_magnagenres(i,magnagen_id{i}) = 1; wolffd@0: end wolffd@0: wolffd@0: magnagenres = genres; wolffd@0: magnagenre_freqs = genre_freqs; wolffd@0: magnagenre_childof = genre_childof;