Mercurial > hg > emotion-detection-top-level
changeset 2:5fd388fdd6ef
initial commit - this file allows the programmer to select which of the PRAAT generated metrics the user wishes to use for classification, and then applies the k-means classifier.
author | Dawn Black <dawn.black@eecs.qmul.ac.uk> |
---|---|
date | Mon, 10 Sep 2012 09:18:15 +0100 |
parents | a3d62264030c |
children | e1cfa7765647 |
files | Code/Classifiers/kmeans_PRAAT_Singing.m |
diffstat | 1 files changed, 573 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Code/Classifiers/kmeans_PRAAT_Singing.m Mon Sep 10 09:18:15 2012 +0100 @@ -0,0 +1,573 @@ +function [] = kmeans_Singing( varargin ) + +cd 'C:\Users\dawn\Dropbox\TestResults' + +% output results file name +masterFileOutputID = fopen( 'kmeans_Singing_All.txt', 'a' ); +% input results file name +inputFileName = 'singingPRAATStats.txt'; + +% This function allows the user to stipulate which Singing voice LLD's they +% wish to forward to a k-means classifier and produces a file of +% performance characteristics. Input arguments stipulate the LLD's and +% there is a choice of:- +% +% ---- PRAAT JITTER MEASUREMENTS ---- +% '_jitter_ddp' +% '_jitter_local' +% '_jitter_ppq5' +% '_jitter_rap' +% ---- PRAAT SHIMMER MEASUREMENTS ---- +% '_shimmer_local' +% '_shimmer_dda' +% '_shimmer_apq3' +% '_shimmer_apq5' +% '_shimmer_apq11' +% ---- PRAAT FORMANT MEASUREMENTS ---- +% '_Formant_Burg' +% '_Formant_all' +% '_Formant_robust' +% +% A text file entitled kmeans_Singing_LLD1name_LLD2name_ ... LLDNname.txt +% is produced that contains the results of the k-mean classification for +% the LLD's specified and named in the result document title. + +fprintf( masterFileOutputID, '\n RESULTS FILE NAME: %s\n', inputFileName); +inputFileID = fopen( inputFileName ); + +% get the column numbers of the results that we want to classify + +% COLUMN NUMBER : METRIC +% 1 : audio file name +% 2 : jitter ddp +% 3 : jitter local +% 4 : jitter ppq5 +% 5 : jitter rap +% 6 : shimmer local +% 7 : shimmer dda +% 8 : shimmer apq3 +% 9 : shimmer apq5 +% 10 : shimmer apq11 +% +% ------------- BURG FORMANTS --------------- +% 11 : Number of BURG formants listed = nBF +% +% THERE ARE CURRENTLY 24 MEASUREMENTS TAKEN FOR EACH FORMANT +nMetrics = 24; +% +% 12 : mean frequency of the first BURG formant +% 13 : variance of the first BURG formant +% 14 : minimum frequency of the first BURG formant +% 15 : maximum frequency of the first BURG formant +% 16 : mean Frequency Derivative of the first BURG formant +% 17 : varience of the Frequency Derivative of the first BURG formant +% 18 : min of the Frequency Derivative of the first BURG formant +% 19 : max of the Frequency Derivative of the first BURG formant +% 20 : mean of the Frequency 2nd Derivative of the first BURG formant +% 21 : varience of the Frequency 2nd Derivative of the first BURG formant +% 22 : min of the Frequency 2nd Derivative of the first BURG formant +% 23 : max of the Frequency 2nd Derivative of the first BURG formant +% 24 : mean of the Bandwidth of the first BURG formant +% 25 : varience of the Bandwidth of the first BURG formant +% 26 : min of the Bandwidth of the first BURG formant +% 27 : max of the Bandwidth of the first BURG formant +% 28 : mean of the Bandwidth Derivative of the first BURG formant +% 29 : varience of the Bandwidth Derivative of the first BURG formant +% 30 : min of the Bandwidth Derivative of the first BURG formant +% 31 : max of the Bandwidth Derivative of the first BURG formant +% 32 : mean of the Bandwidth 2nd Derivative of the first BURG formant +% 33 : var of the Bandwidth 2nd Derivative of the first BURG formant +% 34 : min of the Bandwidth 2nd Derivative of the first BURG formant +% 35 : max of the Bandwidth 2nd Derivative of the first BURG formant +% +% ....... there are nMetrics for each formant in nBF formants, so cycle +% through until the last is reached ...... +% +% 36 + ((nBF-1)*nMetrics) : mean frequency of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 1 : variance of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 2 : minimum frequency of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 3 : maximum frequency of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 4 : mean Frequency Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 5 : varience of the Frequency Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 6 : min of the Frequency Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 7 : max of the Frequency Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 8 : mean of the Frequency 2nd Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 9 : varience of the Frequency 2nd Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 10 : min of the Frequency 2nd Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 11 : max of the Frequency 2nd Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 12 : mean of the Bandwidth of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 13 : varience of the Bandwidth of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 14 : min of the Bandwidth of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 15 : max of the Bandwidth of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 16 : mean of the Bandwidth Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 17 : variece of the Bandwidth Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 18 : min of the Bandwidth Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 19 : max of the Bandwidth Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 20 : mean of the Bandwidth 2nd Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 21 : var of the Bandwidth 2nd Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 22 : min of the Bandwidth 2nd Derivative of the nBF BURG formant +% 36 + ((nBF-1)*nMetrics) + 23 : max of the Bandwidth 2nd Derivative of the nBF BURG formant +% +% FOR THE MEAN OF ALL BURG FORMANTS +% 36 + (nBF*nMetrics) : mean of all formants Frequency +% 36 + (nBF*nMetrics) + 1 : varience of the mean of all formants Frequency +% 36 + (nBF*nMetrics) + 2 : minimum of the mean of all formants Frequency +% 36 + (nBF*nMetrics) + 3 : maximum of the mean of all formants Frequency +% 36 + (nBF*nMetrics) + 4 : mean of all formants mean Frequency Derivative +% 36 + (nBF*nMetrics) + 5 : mean of all formants varience Frequency Derivative +% 36 + (nBF*nMetrics) + 6 : min of the mean of all formants Frequency Derivative +% 36 + (nBF*nMetrics) + 7 : max of the mean of all formants Frequency Derivative +% 36 + (nBF*nMetrics) + 8 : mean of the mean of all formants Frequency 2nd Derivative +% 36 + (nBF*nMetrics) + 9 : varience of the mean of all formants Frequency 2nd Derivative +% 36 + (nBF*nMetrics) + 10 : min of the mean of all formants Frequency 2nd Derivative +% 36 + (nBF*nMetrics) + 11 : max of the mean of all formants Frequency 2nd Derivative +% +% ------------- ALL FORMANTS --------------- +% +% 36 + (nBF*nMetrics) + 12 : Number of ALL formants listed = nAF +% +% startOfALLMeasurements = 36 + (nBF*nMetrics) + 13; +% +% startOfALLMeasurements : mean frequency of the first ALL formant +% startOfALLMeasurements + 1 : variance of the first ALL formant +% startOfALLMeasurements + 2 : minimum frequency of the first ALL formant +% startOfALLMeasurements + 3 : maximum frequency of the first ALL formant +% startOfALLMeasurements + 4 : mean Frequency Derivative of the first ALL formant +% startOfALLMeasurements + 5 : varience of the Frequency Derivative of the first ALL formant +% startOfALLMeasurements + 6 : min of the Frequency Derivative of the first ALL formant +% startOfALLMeasurements + 7 : max of the Frequency Derivative of the first ALL formant +% startOfALLMeasurements + 8 : mean of the Frequency 2nd Derivative of the first ALL formant +% startOfALLMeasurements + 9 : varience of the Frequency 2nd Derivative of the first ALL formant +% startOfALLMeasurements + 10 : min of the Frequency 2nd Derivative of the first ALL formant +% startOfALLMeasurements + 11 : max of the Frequency 2nd Derivative of the first ALL formant +% startOfALLMeasurements + 12 : mean of the Bandwidth of the first ALL formant +% startOfALLMeasurements + 13 : varience of the Bandwidth of the first ALL formant +% startOfALLMeasurements + 14 : min of the Bandwidth of the first ALL formant +% startOfALLMeasurements + 15 : max of the Bandwidth of the first ALL formant +% startOfALLMeasurements + 16 : mean of the Bandwidth Derivative of the first ALL formant +% startOfALLMeasurements + 17 : varience of the Bandwidth Derivative of the first ALL formant +% startOfALLMeasurements + 18 : min of the Bandwidth Derivative of the first ALL formant +% startOfALLMeasurements + 19 : max of the Bandwidth Derivative of the first ALL formant +% startOfALLMeasurements + 20 : mean of the Bandwidth 2nd Derivative of the first ALL formant +% startOfALLMeasurements + 21 : var of the Bandwidth 2nd Derivative of the first ALL formant +% startOfALLMeasurements + 22 : min of the Bandwidth 2nd Derivative of the first ALL formant +% startOfALLMeasurements + 23 : max of the Bandwidth 2nd Derivative of the first ALL formant +% +% ....... there are nMetrics for each formant in nAF formants, so cycle +% through until the last is reached ...... +% +% startOfALLMeasurements + ((nAF-1)*nMetrics) : mean frequency of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : variance of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : minimum frequency of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : maximum frequency of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : mean Frequency Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : varience of the Frequency Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : min of the Frequency Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : max of the Frequency Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : mean of the Frequency 2nd Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : varience of the Frequency 2nd Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : min of the Frequency 2nd Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : max of the Frequency 2nd Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : mean of the Bandwidth of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : varience of the Bandwidth of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : min of the Bandwidth of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : max of the Bandwidth of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : mean of the Bandwidth Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : variece of the Bandwidth Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : min of the Bandwidth Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : max of the Bandwidth Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : mean of the Bandwidth 2nd Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : var of the Bandwidth 2nd Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : min of the Bandwidth 2nd Derivative of the nAF ALL formant +% startOfALLMeasurements + ((nAF-1)*nMetrics) : max of the Bandwidth 2nd Derivative of the nAF ALL formant +% +% FOR THE MEAN OF ALL ALL FORMANTS +% startOfALLMeasurements + (nAF*nMetrics) : mean of all formants Frequency +% startOfALLMeasurements + (nAF*nMetrics) + 1 : varience of the mean of all formants Frequency +% startOfALLMeasurements + (nAF*nMetrics) + 2 : minimum of the mean of all formants Frequency +% startOfALLMeasurements + (nAF*nMetrics) + 3 : maximum of the mean of all formants Frequency +% startOfALLMeasurements + (nAF*nMetrics) + 4 : mean of all formants mean Frequency Derivative +% startOfALLMeasurements + (nAF*nMetrics) + 5 : mean of all formants varience Frequency Derivative +% startOfALLMeasurements + (nAF*nMetrics) + 6 : min of the mean of all formants Frequency Derivative +% startOfALLMeasurements + (nAF*nMetrics) + 7 : max of the mean of all formants Frequency Derivative +% startOfALLMeasurements + (nAF*nMetrics) + 8 : mean of the mean of all formants Frequency 2nd Derivative +% startOfALLMeasurements + (nAF*nMetrics) + 9 : varience of the mean of all formants Frequency 2nd Derivative +% startOfALLMeasurements + (nAF*nMetrics) + 10 : min of the mean of all formants Frequency 2nd Derivative +% startOfALLMeasurements + (nAF*nMetrics) + 11 : max of the mean of all formants Frequency 2nd Derivative +% +% ------------- ROBUST FORMANTS --------------- +% +% startOfALLMeasurements + (nAF*nMetrics) + 12 : Number of ROBUST formants listed = nRF +% +% startOfROBUSTMeasurements = startOfALLMeasurements + (nAF*nMetrics) + 13; +% +% startOfROBUSTMeasurements : mean frequency of the first ROBUST formant +% startOfROBUSTMeasurements + 1 : variance of the first ROBUST formant +% startOfROBUSTMeasurements + 2 : minimum frequency of the first ROBUST formant +% startOfROBUSTMeasurements + 3 : maximum frequency of the first ROBUST formant +% startOfROBUSTMeasurements + 4 : mean Frequency Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 5 : varience of the Frequency Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 6 : min of the Frequency Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 7 : max of the Frequency Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 8 : mean of the Frequency 2nd Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 9 : varience of the Frequency 2nd Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 10 : min of the Frequency 2nd Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 11 : max of the Frequency 2nd Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 12 : mean of the Bandwidth of the first ROBUST formant +% startOfROBUSTMeasurements + 13 : varience of the Bandwidth of the first ROBUST formant +% startOfROBUSTMeasurements + 14 : min of the Bandwidth of the first ROBUST formant +% startOfROBUSTMeasurements + 15 : max of the Bandwidth of the first ROBUST formant +% startOfROBUSTMeasurements + 16 : mean of the Bandwidth Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 17 : varience of the Bandwidth Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 18 : min of the Bandwidth Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 19 : max of the Bandwidth Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 20 : mean of the Bandwidth 2nd Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 21 : var of the Bandwidth 2nd Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 22 : min of the Bandwidth 2nd Derivative of the first ROBUST formant +% startOfROBUSTMeasurements + 23 : max of the Bandwidth 2nd Derivative of the first ROBUST formant +% +% ....... there are nMetrics for each formant in nRF formants, so cycle +% through until the last is reached ...... +% +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : mean frequency of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : variance of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : minimum frequency of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : maximum frequency of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : mean Frequency Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : varience of the Frequency Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : min of the Frequency Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : max of the Frequency Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : mean of the Frequency 2nd Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : varience of the Frequency 2nd Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : min of the Frequency 2nd Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : max of the Frequency 2nd Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : mean of the Bandwidth of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : varience of the Bandwidth of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : min of the Bandwidth of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : max of the Bandwidth of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : mean of the Bandwidth Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : variece of the Bandwidth Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : min of the Bandwidth Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : max of the Bandwidth Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : mean of the Bandwidth 2nd Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : var of the Bandwidth 2nd Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : min of the Bandwidth 2nd Derivative of the nRF ROBUST formant +% startOfROBUSTMeasurements + ((nRF-1)*nMetrics) : max of the Bandwidth 2nd Derivative of the nRF ROBUST formant +% +% FOR THE MEAN OF ALL ROBUST FORMANTS +% startOfROBUSTMeasurements + (nRF*nMetrics) : mean of all formants Frequency +% startOfROBUSTMeasurements + (nRF*nMetrics) + 1 : varience of the mean of all formants Frequency +% startOfROBUSTMeasurements + (nRF*nMetrics) + 2 : minimum of the mean of all formants Frequency +% startOfROBUSTMeasurements + (nRF*nMetrics) + 3 : maximum of the mean of all formants Frequency +% startOfROBUSTMeasurements + (nRF*nMetrics) + 4 : mean of all formants mean Frequency Derivative +% startOfROBUSTMeasurements + (nRF*nMetrics) + 5 : mean of all formants varience Frequency Derivative +% startOfROBUSTMeasurements + (nRF*nMetrics) + 6 : min of the mean of all formants Frequency Derivative +% startOfROBUSTMeasurements + (nRF*nMetrics) + 7 : max of the mean of all formants Frequency Derivative +% startOfROBUSTMeasurements + (nRF*nMetrics) + 8 : mean of the mean of all formants Frequency 2nd Derivative +% startOfROBUSTMeasurements + (nRF*nMetrics) + 9 : varience of the mean of all formants Frequency 2nd Derivative +% startOfROBUSTMeasurements + (nRF*nMetrics) + 10 : min of the mean of all formants Frequency 2nd Derivative +% startOfROBUSTMeasurements + (nRF*nMetrics) + 11 : max of the mean of all formants Frequency 2nd Derivative +% + +noOfArguments = length(varargin); +columnIndices = []; + +getBURGFormants = 0; +getAllForamnts=0; +getRobustFormants=0; + +for i=1 : noOfArguments + if( strcmp( varargin{i}, 'jitter_ddp' )) + columnIndices = [columnIndices 1]; + elseif( strcmp( varargin{i}, 'jitter_local' )) + columnIndices = [columnIndices 2]; + elseif( strcmp( varargin{i}, 'jitter_ppq5' )) + columnIndices = [columnIndices 3]; + elseif( strcmp( varargin{i}, 'jitter_rap' )) + columnIndices = [columnIndices 4]; + elseif( strcmp( varargin{i}, 'shimmer_local' )) + columnIndices = [columnIndices 5]; + elseif( strcmp( varargin{i}, 'shimmer_dda' )) + columnIndices = [columnIndices 6]; + elseif( strcmp( varargin{i}, 'shimmer_apq3' )) + columnIndices = [columnIndices 7]; + elseif( strcmp( varargin{i}, 'shimmer_apq5' )) + columnIndices = [columnIndices 8]; + elseif( strcmp( varargin{i}, 'shimmer_apq11' )) + columnIndices = [columnIndices 9]; + elseif( strcmp( varargin{i}, 'formant_Burg' )) + getBURGFormants = 1; + elseif( strcmp( varargin{i}, 'formant_all' )) + getAllForamnts=1; + elseif( strcmp( varargin{i}, 'formant_robust' )) + getRobustFormants=1; + end +end + + +outputFileName = 'individualResults/kmeans_Results_'; +resultsFileName = 'kmeans_Results_'; +titleName = ''; +for i=1 : noOfArguments +% outputFileName = [ outputFileName varargin{i} '_']; +% resultsFileName = [ resultsFileName varargin{i} '_']; + titleName = [ titleName varargin{i} '_']; + fprintf( masterFileOutputID, '%s_', varargin{i} ); +end + +% titleName = outputFileName; +outputFileName = [ outputFileName titleName ]; +resultsFileName = [ resultsFileName titleName ]; + +fprintf( masterFileOutputID, '\t' ); + +outputFileName = [ outputFileName '.txt']; +resultsFileName = [ resultsFileName '.txt']; + +fileOutputID = fopen( outputFileName, 'w' ); +fileKMeansOutputID = fopen( resultsFileName, 'w' ); + +% -------------------- get the data from the results file --------------- +lineCount = 0; +fileCount = 0; +data = []; +while( ~(feof(inputFileID)) ) + + outputValues = []; +% sampleEmotion = []; +% gender = []; + + thestr = fgetl(inputFileID); + if( lineCount > 10 ) % skip the file header + fileCount = fileCount + 1; + + % determine whether we have a positive or negative sample + sampleEmotion( fileCount ) = 'U'; + if( ~(isempty(strfind(thestr,'pos')))) + % sample is positive + sampleEmotion( fileCount ) = 'P'; + elseif( ~(isempty(strfind(thestr,'neg')))) + % sample is negative + sampleEmotion( fileCount ) = 'N'; + else + disp('EEEK!'); + pause; + end + + % determine whether we have a male, female or trans sample + gender( fileCount ) = '?'; + if( ~(isempty(strfind(thestr,'fem')))) + % gender is female + gender( fileCount ) = 'F'; + elseif( ~(isempty(strfind(thestr,'male')))) + % gender is male + gender( fileCount ) = 'M'; + elseif( ~(isempty(strfind(thestr,'trans')))) + % gender is trans + gender( fileCount ) = 'T'; + else + disp('EEEK!'); + pause; + end + + %how many values are in the string? + spaces = strfind( thestr, ' ' ); + numberstr = thestr( spaces(1) : end ); % chop off the file name + frmtpos = strfind( numberstr, 'maxNoOfFormants'); % find the position of the label for number of formants + + str1 = numberstr( 1 : frmtpos(1)-1 ); % string contains jitter and shimmer values + str2 = numberstr( frmtpos(1) : frmtpos(2)-1 ); % string contains all BURG formant information + str3 = numberstr( frmtpos(2) : frmtpos(3)-1 ); % string contains all ALL formant information + str4 = numberstr( frmtpos(3) : end ); % string contains all ROBUST formant information + + vars = sscanf( str1, '%f', inf ); + % extract the shimmer and jitter values + outputValues = [ outputValues vars( columnIndices )']; + + if( getBURGFormants ) + spaces = strfind( str2, ' ' ); % remove the string 'maxNoOfFormants' + vars = sscanf( str2( spaces(1) : end ), '%f', inf ); + outputValues = stripOutFormantValues( vars, outputValues ); + end + + if( getAllForamnts ) + spaces = strfind( str3, ' ' ); % remove the string 'maxNoOfFormants' + vars = sscanf( str3( spaces(1) : end ), '%f', inf ); + outputValues = stripOutFormantValues( vars, outputValues ); + end + + if( getRobustFormants ) + spaces = strfind( str4, ' ' ); % remove the string 'maxNoOfFormants' + vars = sscanf( str4( spaces(1) : end ), '%f', inf ); + outputValues = stripOutFormantValues( vars, outputValues ); + end + + [m n] = size( data ); + % sometimes the 'all' formants command gives us fewer formants than + % usual. If this is the case,then we will have to pad with zeros + % for now. + if( n > length( outputValues ) ) + lenDiff = n - length( outputValues ); + outputValues = [ outputValues zeros( 1, lenDiff ) ]; + end + + data( fileCount, : ) = outputValues; + + end + lineCount = lineCount + 1; + +end +fclose(inputFileID); + +%individual examination of the metrics does confirm that there is little +%difference in emotional content. However, singer identification is OK. + +% figure(2); subplot(211); hold off; +% +% for( i = 1 : length(data) ) +% if( sampleEmotion(i) == 'N') +% plot( i, data(i), 'b.' ); +% else +% plot( i, data(i), 'r.' ); +% end +% hold on; +% end +% +% subplot(212); hold off; +% +% for( i = 1 : length(data) ) +% if( gender(i) == 'M') +% plot( i, data(i), 'b.' ); +% elseif( gender(i) == 'F') +% plot( i, data(i), 'r.' ); +% else +% plot( i, data(i), 'g.' ); +% end +% hold on; +% end + +% ------------ apply the k-means classifier ------------------------ + +noOfClusters = 2; % we are only trying to identify positive and negative emotions + + +[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... + 'start', 'sample', 'Distance', 'cityblock'); + +%display results grouped by emotion +processKMeansResults( 'cityblock', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ); + +disp('press space'); +pause; + +[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... + 'start', 'sample', 'Distance', 'sqEuclidean'); + +processKMeansResults( 'sqEuclidean', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ); + +disp('press space'); +pause; + + +%display results grouped by gender + +noOfClusters = 3; + +[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... + 'start', 'sample', 'Distance', 'cityblock'); + +processKMeansResults( 'cityblock', idx, gender, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ); + +disp('press space'); +pause; + +[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... + 'start', 'sample', 'Distance', 'sqEuclidean'); + +processKMeansResults( 'sqEuclidean', idx, gender, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ); + +disp('press space'); +pause; + + +% [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... +% 'start', 'sample', 'Distance', 'cosine'); +% +% processKMeansResults( 'cosine', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName); +% +% disp('press space'); +% pause; + +% [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... +% 'start', 'sample', 'Distance', 'correlation'); +% +% processKMeansResults( 'correlation', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ); +% +% disp('press space'); +% pause; + +fprintf( fileOutputID, '\n' ); +fclose( fileOutputID ); +fprintf( fileKMeansOutputID, '\n' ); +fclose( fileKMeansOutputID ); +fprintf( masterFileOutputID, '\n' ); +fclose( masterFileOutputID ); + +end + +%------------------------------------------------------------------ + +function [ outputValues ] = stripOutFormantValues( vars, outputValues ) + + noOfFormantValues = length( vars ) - 1; % gives the number of formant arguments only + noOfFormants = vars(1); + % there are 12 measurements for the mean of all formants (so the number + % of formants is not important) for each formant measurement. + if( noOfFormants ~= (noOfFormantValues-12)/24 ) + disp('EEK!'); + pause; + else + outputValues = [ outputValues vars( 2:end )' ]; + end + +end + +%------------------------------------------------------------------- + +function [] = processKMeansResults( ID, idx, groupingCriteria, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ) + + fprintf( fileKMeansOutputID, '%s\t', ID ); + fprintf( masterFileOutputID, '%s\t', ID ); + + if( length( idx ) ~= length( groupingCriteria ) ) + disp('EEEK!'); + pause; + end + + groupIDs = ''; + groupStr = ''; + for( i = 1 : length( idx )) + fprintf( fileOutputID, '%s \t %d \n', groupingCriteria(i), idx(i) ); + gID = [ groupingCriteria(i) num2str( idx(i) )]; + groupIDs = [ groupIDs ; gID ]; + groupStr = [ groupStr gID ]; + end + + % ------------- work out the confusion matrix ------------------------- + + groups = unique( groupIDs, 'rows' ); + noOfGroups = length( groups ); + orderedGroups = sort(cellstr(groups)); + groupStats = []; + for( i = 1 : noOfGroups ) + groupStats(i) = ((length( strfind( groupStr, char(orderedGroups(i)))))/length( idx ) ) * 100; + fprintf( fileKMeansOutputID, '%s \t %f \t', char(orderedGroups(i)), groupStats(i) ); + fprintf( masterFileOutputID, '%s \t %f \t', char(orderedGroups(i)), groupStats(i) ); + end + + figure(1); + bar( groupStats ); + set( gca, 'XTickLabel', orderedGroups ); + title([ titleName ' ' ID]); + +end \ No newline at end of file