wolffd@0: function [clip_magnagenres, magnagenres, magnagenre_freqs, magnagenre_childof]  = get_magnagenre_numeric(clip_info_magnagenres);
wolffd@0: %
wolffd@0: % uses the clip_info magnagenres_final db to get a numerical genre
wolffd@0: % representation ofthe database
wolffd@0: %
wolffd@0: % genre_freqs: frequency of genre x in position y
wolffd@0: % genre_childof: percentage of genre x being a successor of genre y
wolffd@0: %
wolffd@0: % reimports the text - based representation of magnatunes and tries to
wolffd@0: % determine an underlying structure.
wolffd@0: %
wolffd@0: 
wolffd@0: data = clip_info_magnagenres(:,3);
wolffd@0: 
wolffd@0: % ---
wolffd@0: % genre and genre posfrequency list:
wolffd@0: % ---
wolffd@0: 
wolffd@0: magnagen_id = {};
wolffd@0: 
wolffd@0: genres = {};
wolffd@0: 
wolffd@0: max_genres = 50;
wolffd@0: max_simul_genres = 4;
wolffd@0: genre_freqs = [];
wolffd@0: genre_childof = [];
wolffd@0: 
wolffd@0: % for each of the genre tags
wolffd@0: for i = 1:length(data)
wolffd@0:     
wolffd@0:     % separate genres;
wolffd@0:     tmp = explode(',', data{i});
wolffd@0: 
wolffd@0:     % find and save correspnding genre indices
wolffd@0:     for j = 1:length(tmp)
wolffd@0:         genidx = strcellfind(genres,tmp(j));
wolffd@0:         
wolffd@0:         % add genre to genre list if not existent
wolffd@0:         if genidx < 1
wolffd@0:             genidx = size(genres, 1) + 1;
wolffd@0:             genres = cat(1, genres, tmp(j));
wolffd@0:             
wolffd@0:             genre_freqs(genidx,:) = zeros(1, max_simul_genres);
wolffd@0:             genre_childof(genidx,:) = zeros(1, max_genres);
wolffd@0:         end
wolffd@0: 
wolffd@0:         % ---
wolffd@0:         % here, we save the index to a new genre structure
wolffd@0:         % ---
wolffd@0:         if  j == 1
wolffd@0:             magnagen_id{i} = genidx;
wolffd@0:         else
wolffd@0:             magnagen_id{i} = [magnagen_id{i}, genidx];
wolffd@0:         end
wolffd@0:         % ---
wolffd@0:         % further genre statistics, perhaps its a hierarchy
wolffd@0:         % ---
wolffd@0:              
wolffd@0:         % save frequency by position
wolffd@0:         genre_freqs(genidx, j) = genre_freqs(genidx, j) + 1;
wolffd@0:         
wolffd@0:         % save parent genre if applicable
wolffd@0:         if j == 1
wolffd@0:             
wolffd@0:             % remember parent index
wolffd@0:             paridx = genidx;
wolffd@0:         else
wolffd@0:             
wolffd@0:             % count index for this parent
wolffd@0:             genre_childof(genidx, paridx) = genre_childof(genidx, paridx) + 1;
wolffd@0:         end
wolffd@0:     end
wolffd@0:     
wolffd@0: % ---
wolffd@0: % - save preceeding first genre for this into another table
wolffd@0: % ---
wolffd@0: end
wolffd@0: 
wolffd@0: % ---
wolffd@0: % this should output quite generic data, to ease 
wolffd@0: % comparison with other genre hierarchies.
wolffd@0: % 
wolffd@0: % thus, we set the parental relation relative to overall
wolffd@0: % appearance of the child genre
wolffd@0: % ---
wolffd@0: 
wolffd@0: % remove overlapping columns
wolffd@0: idx = find(sum(genre_childof,1) > 0,1, 'last');
wolffd@0: idx = max(size(genre_childof,1),idx);
wolffd@0: 
wolffd@0: genre_childof = genre_childof(:, 1:idx);
wolffd@0: 
wolffd@0: % make values relative to total occurrence of child
wolffd@0: for i = 1: size(genre_childof,1)
wolffd@0:     genre_childof(i, :) = genre_childof(i, :) ./ sum(genre_freqs(i,:));
wolffd@0: end
wolffd@0: 
wolffd@0: % ---
wolffd@0: % reformat genre attribute table as sparse matrix
wolffd@0: % ---
wolffd@0: clip_magnagenres = sparse(length(magnagen_id),length(genres));
wolffd@0: for i = 1:length(magnagen_id)
wolffd@0:     clip_magnagenres(i,magnagen_id{i}) = 1;
wolffd@0: end
wolffd@0: 
wolffd@0: magnagenres = genres;
wolffd@0: magnagenre_freqs = genre_freqs;
wolffd@0: magnagenre_childof = genre_childof;