diff Code/Classifiers/kmeans_MFCC_Singing.m @ 4:92ca03a8fa99 tip

Update to ICASSP 2013 benchmark
author Dawn Black
date Wed, 13 Feb 2013 11:02:39 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/Classifiers/kmeans_MFCC_Singing.m	Wed Feb 13 11:02:39 2013 +0000
@@ -0,0 +1,154 @@
+function [] = kmeans_MFCC_Singing( inputFileName, outputFileName )
+
+cd 'C:\Users\dawn\Dropbox\TestResults'
+inputFileName
+
+DEBUG = 1;
+% output results file name
+masterFileOutputID = fopen( outputFileName, 'a' );   %fopen( 'kmeans_Singing_MFCC.txt', 'a' );
+% % input results file name
+% inputFileName = 'singingMFCCStats_VoicedAndUnvoiced.txt';
+
+fprintf( masterFileOutputID, '\n RESULTS FILE NAME: %s\n', inputFileName);
+inputFileID = fopen( inputFileName );
+
+% noOfArguments = length(varargin);
+% 
+% outputFileName = 'individualResults/kmeans_Results_';
+% resultsFileName = 'kmeans_Results_';
+titleName = '';
+% for i=1 : noOfArguments
+%     titleName = [ titleName varargin{i} '_'];
+%     fprintf( masterFileOutputID, '%s_', varargin{i} );
+% end
+% 
+% outputFileName = [ outputFileName titleName ];
+% resultsFileName = [ resultsFileName titleName ];
+
+fprintf( masterFileOutputID, '\t' );
+
+% outputFileName = [ outputFileName '.txt'];
+% resultsFileName = [ resultsFileName '.txt'];
+
+% fileOutputID = fopen( outputFileName, 'w' );
+% fileKMeansOutputID = fopen( resultsFileName, 'w' );
+
+% -------------------- get the data from the results file ---------------
+lineCount = 0;
+fileCount = 0;
+data = [];
+while( ~(feof(inputFileID)) )
+    
+    outputValues = [];
+    thestr = fgetl(inputFileID);
+    fileCount = fileCount + 1;
+
+    % determine whether we have a positive or negative sample
+    sampleEmotion( fileCount ) = 'U';
+    if( ~(isempty(strfind(thestr,'pos'))))
+        % sample is positive
+        sampleEmotion( fileCount ) = 'P';
+    elseif( ~(isempty(strfind(thestr,'neg'))))
+        % sample is negative
+        sampleEmotion( fileCount ) = 'N';
+    else
+        disp('EEEK!');
+        pause;
+    end
+
+    % determine whether we have a male, female or trans sample
+    gender( fileCount ) = '?';
+    if( ~(isempty(strfind(thestr,'fem'))))
+        % gender is female
+        gender( fileCount ) = 'F';
+    elseif( ~(isempty(strfind(thestr,'male'))))
+        % gender is male
+        gender( fileCount ) = 'M';
+    elseif( ~(isempty(strfind(thestr,'trans'))))
+        % gender is trans
+        gender( fileCount ) = 'T';
+    else
+        disp('EEEK!');
+        pause;
+    end
+
+    %how many values are in the string?
+    spaces = strfind( thestr, ' ' );
+    numberstr = thestr( spaces(1) : end ); % chop off the file name  
+    vars = sscanf( numberstr, '%f', inf );
+    data( fileCount, : ) = vars;
+        
+    lineCount = lineCount + 1;
+    
+end
+fclose(inputFileID);
+
+% try with and without different metrics classes
+% data is a 162 variable array of MFCC stats
+% first there are 6 metrics for each of the 13 frequency bands (mean, median, var, min, max, range) 
+% then there are 6 metrics for the 1st derivative of the 13 frequency bands (mean, median, var, min, max, range) 
+% then there are 6 metrics for the mean of all frequency bands
+
+%  dataOptions = (1:1:(13*12)+6); % fprintf( masterFileOutputID, '\n try with all \n');
+% dataOptions = (1:1:13*6);  fprintf( masterFileOutputID, '\n try with just the ordinary 13 frequency bands \n');
+% dataOptions = (1:1:13*6);  fprintf( masterFileOutputID, '\n try with just the ordinary 13 frequency bands and without the median or range \n');
+
+% ------------  apply the k-means classifier  ------------------------
+
+noOfClusters = 2; % we are only trying to identify positive and negative emotions
+
+
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'cityblock');
+
+%display results grouped by emotion
+fprintf( masterFileOutputID, '\n Emotion grouping \n');
+fprintf( masterFileOutputID, 'cityblock \n');
+[ groupStats, groupNames ] = processKMeansResults( 'cityblock', idx, sampleEmotion, masterFileOutputID, titleName, DEBUG );
+[ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
+% if(DEBUG == 1)
+%     disp('press space');
+%     pause;
+% end
+
+fprintf( masterFileOutputID, 'sqEuclidean \n');
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'sqEuclidean');
+
+[ groupStats, groupNames ] = processKMeansResults( 'sqEuclidean', idx, sampleEmotion, masterFileOutputID, titleName, DEBUG );
+[ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
+
+% if(DEBUG == 1)
+%     disp('press space');
+%     pause;
+% end
+
+% display results grouped by gender
+fprintf( masterFileOutputID, '\n Gender grouping \n');
+noOfClusters = 3;
+
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'cityblock');
+
+fprintf( masterFileOutputID, 'cityblock \n');
+[ groupStats, groupNames ] = processKMeansResults( 'cityblock', idx, gender, masterFileOutputID, titleName, DEBUG );
+[ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
+
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'sqEuclidean');
+
+fprintf( masterFileOutputID, 'sqEuclidean \n');
+[ groupStats, groupNames ] = processKMeansResults( 'sqEuclidean', idx, gender, masterFileOutputID, titleName, DEBUG );
+[ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
+
+
+% fprintf( fileOutputID, '\n' );
+% fclose( fileOutputID );
+% fprintf( fileKMeansOutputID, '\n' );
+% fclose( fileKMeansOutputID );
+fprintf( masterFileOutputID, '\n' );
+fclose( masterFileOutputID );
+
+end
+
+