diff core/magnatagatune/get_magnagenre_numeric.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/magnatagatune/get_magnagenre_numeric.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,108 @@
+function [clip_magnagenres, magnagenres, magnagenre_freqs, magnagenre_childof]  = get_magnagenre_numeric(clip_info_magnagenres);
+%
+% uses the clip_info magnagenres_final db to get a numerical genre
+% representation ofthe database
+%
+% genre_freqs: frequency of genre x in position y
+% genre_childof: percentage of genre x being a successor of genre y
+%
+% reimports the text - based representation of magnatunes and tries to
+% determine an underlying structure.
+%
+
+data = clip_info_magnagenres(:,3);
+
+% ---
+% genre and genre posfrequency list:
+% ---
+
+magnagen_id = {};
+
+genres = {};
+
+max_genres = 50;
+max_simul_genres = 4;
+genre_freqs = [];
+genre_childof = [];
+
+% for each of the genre tags
+for i = 1:length(data)
+    
+    % separate genres;
+    tmp = explode(',', data{i});
+
+    % find and save correspnding genre indices
+    for j = 1:length(tmp)
+        genidx = strcellfind(genres,tmp(j));
+        
+        % add genre to genre list if not existent
+        if genidx < 1
+            genidx = size(genres, 1) + 1;
+            genres = cat(1, genres, tmp(j));
+            
+            genre_freqs(genidx,:) = zeros(1, max_simul_genres);
+            genre_childof(genidx,:) = zeros(1, max_genres);
+        end
+
+        % ---
+        % here, we save the index to a new genre structure
+        % ---
+        if  j == 1
+            magnagen_id{i} = genidx;
+        else
+            magnagen_id{i} = [magnagen_id{i}, genidx];
+        end
+        % ---
+        % further genre statistics, perhaps its a hierarchy
+        % ---
+             
+        % save frequency by position
+        genre_freqs(genidx, j) = genre_freqs(genidx, j) + 1;
+        
+        % save parent genre if applicable
+        if j == 1
+            
+            % remember parent index
+            paridx = genidx;
+        else
+            
+            % count index for this parent
+            genre_childof(genidx, paridx) = genre_childof(genidx, paridx) + 1;
+        end
+    end
+    
+% ---
+% - save preceeding first genre for this into another table
+% ---
+end
+
+% ---
+% this should output quite generic data, to ease 
+% comparison with other genre hierarchies.
+% 
+% thus, we set the parental relation relative to overall
+% appearance of the child genre
+% ---
+
+% remove overlapping columns
+idx = find(sum(genre_childof,1) > 0,1, 'last');
+idx = max(size(genre_childof,1),idx);
+
+genre_childof = genre_childof(:, 1:idx);
+
+% make values relative to total occurrence of child
+for i = 1: size(genre_childof,1)
+    genre_childof(i, :) = genre_childof(i, :) ./ sum(genre_freqs(i,:));
+end
+
+% ---
+% reformat genre attribute table as sparse matrix
+% ---
+clip_magnagenres = sparse(length(magnagen_id),length(genres));
+for i = 1:length(magnagen_id)
+    clip_magnagenres(i,magnagen_id{i}) = 1;
+end
+
+magnagenres = genres;
+magnagenre_freqs = genre_freqs;
+magnagenre_childof = genre_childof;