annotate Code/Classifiers/kmeans_MFCC_Singing.m @ 4:92ca03a8fa99 tip

Update to ICASSP 2013 benchmark
author Dawn Black
date Wed, 13 Feb 2013 11:02:39 +0000
parents
children
rev   line source
Dawn@4 1 function [] = kmeans_MFCC_Singing( inputFileName, outputFileName )
Dawn@4 2
Dawn@4 3 cd 'C:\Users\dawn\Dropbox\TestResults'
Dawn@4 4 inputFileName
Dawn@4 5
Dawn@4 6 DEBUG = 1;
Dawn@4 7 % output results file name
Dawn@4 8 masterFileOutputID = fopen( outputFileName, 'a' ); %fopen( 'kmeans_Singing_MFCC.txt', 'a' );
Dawn@4 9 % % input results file name
Dawn@4 10 % inputFileName = 'singingMFCCStats_VoicedAndUnvoiced.txt';
Dawn@4 11
Dawn@4 12 fprintf( masterFileOutputID, '\n RESULTS FILE NAME: %s\n', inputFileName);
Dawn@4 13 inputFileID = fopen( inputFileName );
Dawn@4 14
Dawn@4 15 % noOfArguments = length(varargin);
Dawn@4 16 %
Dawn@4 17 % outputFileName = 'individualResults/kmeans_Results_';
Dawn@4 18 % resultsFileName = 'kmeans_Results_';
Dawn@4 19 titleName = '';
Dawn@4 20 % for i=1 : noOfArguments
Dawn@4 21 % titleName = [ titleName varargin{i} '_'];
Dawn@4 22 % fprintf( masterFileOutputID, '%s_', varargin{i} );
Dawn@4 23 % end
Dawn@4 24 %
Dawn@4 25 % outputFileName = [ outputFileName titleName ];
Dawn@4 26 % resultsFileName = [ resultsFileName titleName ];
Dawn@4 27
Dawn@4 28 fprintf( masterFileOutputID, '\t' );
Dawn@4 29
Dawn@4 30 % outputFileName = [ outputFileName '.txt'];
Dawn@4 31 % resultsFileName = [ resultsFileName '.txt'];
Dawn@4 32
Dawn@4 33 % fileOutputID = fopen( outputFileName, 'w' );
Dawn@4 34 % fileKMeansOutputID = fopen( resultsFileName, 'w' );
Dawn@4 35
Dawn@4 36 % -------------------- get the data from the results file ---------------
Dawn@4 37 lineCount = 0;
Dawn@4 38 fileCount = 0;
Dawn@4 39 data = [];
Dawn@4 40 while( ~(feof(inputFileID)) )
Dawn@4 41
Dawn@4 42 outputValues = [];
Dawn@4 43 thestr = fgetl(inputFileID);
Dawn@4 44 fileCount = fileCount + 1;
Dawn@4 45
Dawn@4 46 % determine whether we have a positive or negative sample
Dawn@4 47 sampleEmotion( fileCount ) = 'U';
Dawn@4 48 if( ~(isempty(strfind(thestr,'pos'))))
Dawn@4 49 % sample is positive
Dawn@4 50 sampleEmotion( fileCount ) = 'P';
Dawn@4 51 elseif( ~(isempty(strfind(thestr,'neg'))))
Dawn@4 52 % sample is negative
Dawn@4 53 sampleEmotion( fileCount ) = 'N';
Dawn@4 54 else
Dawn@4 55 disp('EEEK!');
Dawn@4 56 pause;
Dawn@4 57 end
Dawn@4 58
Dawn@4 59 % determine whether we have a male, female or trans sample
Dawn@4 60 gender( fileCount ) = '?';
Dawn@4 61 if( ~(isempty(strfind(thestr,'fem'))))
Dawn@4 62 % gender is female
Dawn@4 63 gender( fileCount ) = 'F';
Dawn@4 64 elseif( ~(isempty(strfind(thestr,'male'))))
Dawn@4 65 % gender is male
Dawn@4 66 gender( fileCount ) = 'M';
Dawn@4 67 elseif( ~(isempty(strfind(thestr,'trans'))))
Dawn@4 68 % gender is trans
Dawn@4 69 gender( fileCount ) = 'T';
Dawn@4 70 else
Dawn@4 71 disp('EEEK!');
Dawn@4 72 pause;
Dawn@4 73 end
Dawn@4 74
Dawn@4 75 %how many values are in the string?
Dawn@4 76 spaces = strfind( thestr, ' ' );
Dawn@4 77 numberstr = thestr( spaces(1) : end ); % chop off the file name
Dawn@4 78 vars = sscanf( numberstr, '%f', inf );
Dawn@4 79 data( fileCount, : ) = vars;
Dawn@4 80
Dawn@4 81 lineCount = lineCount + 1;
Dawn@4 82
Dawn@4 83 end
Dawn@4 84 fclose(inputFileID);
Dawn@4 85
Dawn@4 86 % try with and without different metrics classes
Dawn@4 87 % data is a 162 variable array of MFCC stats
Dawn@4 88 % first there are 6 metrics for each of the 13 frequency bands (mean, median, var, min, max, range)
Dawn@4 89 % then there are 6 metrics for the 1st derivative of the 13 frequency bands (mean, median, var, min, max, range)
Dawn@4 90 % then there are 6 metrics for the mean of all frequency bands
Dawn@4 91
Dawn@4 92 % dataOptions = (1:1:(13*12)+6); % fprintf( masterFileOutputID, '\n try with all \n');
Dawn@4 93 % dataOptions = (1:1:13*6); fprintf( masterFileOutputID, '\n try with just the ordinary 13 frequency bands \n');
Dawn@4 94 % dataOptions = (1:1:13*6); fprintf( masterFileOutputID, '\n try with just the ordinary 13 frequency bands and without the median or range \n');
Dawn@4 95
Dawn@4 96 % ------------ apply the k-means classifier ------------------------
Dawn@4 97
Dawn@4 98 noOfClusters = 2; % we are only trying to identify positive and negative emotions
Dawn@4 99
Dawn@4 100
Dawn@4 101 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
Dawn@4 102 'start', 'sample', 'Distance', 'cityblock');
Dawn@4 103
Dawn@4 104 %display results grouped by emotion
Dawn@4 105 fprintf( masterFileOutputID, '\n Emotion grouping \n');
Dawn@4 106 fprintf( masterFileOutputID, 'cityblock \n');
Dawn@4 107 [ groupStats, groupNames ] = processKMeansResults( 'cityblock', idx, sampleEmotion, masterFileOutputID, titleName, DEBUG );
Dawn@4 108 [ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
Dawn@4 109 % if(DEBUG == 1)
Dawn@4 110 % disp('press space');
Dawn@4 111 % pause;
Dawn@4 112 % end
Dawn@4 113
Dawn@4 114 fprintf( masterFileOutputID, 'sqEuclidean \n');
Dawn@4 115 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
Dawn@4 116 'start', 'sample', 'Distance', 'sqEuclidean');
Dawn@4 117
Dawn@4 118 [ groupStats, groupNames ] = processKMeansResults( 'sqEuclidean', idx, sampleEmotion, masterFileOutputID, titleName, DEBUG );
Dawn@4 119 [ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
Dawn@4 120
Dawn@4 121 % if(DEBUG == 1)
Dawn@4 122 % disp('press space');
Dawn@4 123 % pause;
Dawn@4 124 % end
Dawn@4 125
Dawn@4 126 % display results grouped by gender
Dawn@4 127 fprintf( masterFileOutputID, '\n Gender grouping \n');
Dawn@4 128 noOfClusters = 3;
Dawn@4 129
Dawn@4 130 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
Dawn@4 131 'start', 'sample', 'Distance', 'cityblock');
Dawn@4 132
Dawn@4 133 fprintf( masterFileOutputID, 'cityblock \n');
Dawn@4 134 [ groupStats, groupNames ] = processKMeansResults( 'cityblock', idx, gender, masterFileOutputID, titleName, DEBUG );
Dawn@4 135 [ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
Dawn@4 136
Dawn@4 137 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
Dawn@4 138 'start', 'sample', 'Distance', 'sqEuclidean');
Dawn@4 139
Dawn@4 140 fprintf( masterFileOutputID, 'sqEuclidean \n');
Dawn@4 141 [ groupStats, groupNames ] = processKMeansResults( 'sqEuclidean', idx, gender, masterFileOutputID, titleName, DEBUG );
Dawn@4 142 [ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
Dawn@4 143
Dawn@4 144
Dawn@4 145 % fprintf( fileOutputID, '\n' );
Dawn@4 146 % fclose( fileOutputID );
Dawn@4 147 % fprintf( fileKMeansOutputID, '\n' );
Dawn@4 148 % fclose( fileKMeansOutputID );
Dawn@4 149 fprintf( masterFileOutputID, '\n' );
Dawn@4 150 fclose( masterFileOutputID );
Dawn@4 151
Dawn@4 152 end
Dawn@4 153
Dawn@4 154