changeset 2:5fd388fdd6ef

initial commit - this file allows the programmer to select which of the PRAAT generated metrics the user wishes to use for classification, and then applies the k-means classifier.
author Dawn Black <dawn.black@eecs.qmul.ac.uk>
date Mon, 10 Sep 2012 09:18:15 +0100
parents a3d62264030c
children e1cfa7765647
files Code/Classifiers/kmeans_PRAAT_Singing.m
diffstat 1 files changed, 573 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/Classifiers/kmeans_PRAAT_Singing.m	Mon Sep 10 09:18:15 2012 +0100
@@ -0,0 +1,573 @@
+function [] = kmeans_Singing( varargin )
+
+cd 'C:\Users\dawn\Dropbox\TestResults'
+
+% output results file name
+masterFileOutputID = fopen( 'kmeans_Singing_All.txt', 'a' );
+% input results file name
+inputFileName = 'singingPRAATStats.txt';
+
+% This function allows the user to stipulate which Singing voice LLD's they
+% wish to forward to a k-means classifier and produces a file of
+% performance characteristics. Input arguments stipulate the LLD's and
+% there is a choice of:-
+%
+% ---- PRAAT JITTER MEASUREMENTS ----
+%     '_jitter_ddp'
+%     '_jitter_local'
+%     '_jitter_ppq5'
+%     '_jitter_rap'
+% ---- PRAAT SHIMMER MEASUREMENTS ----
+%     '_shimmer_local'
+%     '_shimmer_dda'
+%     '_shimmer_apq3'
+%     '_shimmer_apq5'
+%     '_shimmer_apq11'
+% ---- PRAAT FORMANT MEASUREMENTS ----
+%     '_Formant_Burg'
+%     '_Formant_all'
+%     '_Formant_robust'
+%
+% A text file entitled kmeans_Singing_LLD1name_LLD2name_ ... LLDNname.txt
+% is produced that contains the results of the k-mean classification for
+% the LLD's specified and named in the result document title.
+
+fprintf( masterFileOutputID, '\n RESULTS FILE NAME: %s\n', inputFileName);
+inputFileID = fopen( inputFileName );
+
+% get the column numbers of the results that we want to classify
+
+% COLUMN NUMBER     :   METRIC
+%   1   : audio file name
+%   2   : jitter ddp
+%   3   : jitter local
+%   4   : jitter ppq5
+%   5   : jitter rap
+%   6   : shimmer local
+%   7   : shimmer dda
+%   8   : shimmer apq3
+%   9   : shimmer apq5
+%   10  : shimmer apq11
+%
+% ------------- BURG FORMANTS ---------------
+%   11   : Number of BURG formants listed = nBF
+%
+%   THERE ARE CURRENTLY 24 MEASUREMENTS TAKEN FOR EACH FORMANT
+nMetrics = 24;
+%
+%   12   : mean frequency of the first BURG formant
+%   13   : variance of the first BURG formant
+%   14   : minimum frequency of the first BURG formant
+%   15   : maximum frequency of the first BURG formant
+%   16   : mean Frequency Derivative of the first BURG formant
+%   17   : varience of the Frequency Derivative of the first BURG formant
+%   18   : min of the Frequency Derivative of the first BURG formant
+%   19   : max  of the Frequency Derivative of the first BURG formant
+%   20   : mean of the Frequency 2nd Derivative of the first BURG formant
+%   21   : varience of the Frequency 2nd Derivative of the first BURG formant
+%   22   : min of the Frequency 2nd Derivative of the first BURG formant
+%   23   :  max of the Frequency 2nd Derivative of the first BURG formant
+%   24   : mean of the Bandwidth of the first BURG formant
+%   25   :  varience of the Bandwidth of the first BURG formant
+%   26   : min of the Bandwidth of the first BURG formant
+%   27   : max of the Bandwidth of the first BURG formant
+%   28   : mean of the Bandwidth Derivative of the first BURG formant
+%   29   : varience of the Bandwidth Derivative of the first BURG formant
+%   30   : min of the Bandwidth Derivative of the first BURG formant
+%   31   : max of the Bandwidth Derivative of the first BURG formant
+%   32   : mean of the Bandwidth 2nd Derivative of the first BURG formant
+%   33   : var of the Bandwidth 2nd Derivative of the first BURG formant
+%   34   : min of the Bandwidth 2nd Derivative of the first BURG formant
+%   35   : max of the Bandwidth 2nd Derivative of the first BURG formant
+%
+%    ....... there are nMetrics for each formant in nBF formants, so cycle
+%    through until the last is reached ......
+%
+%   36 + ((nBF-1)*nMetrics)       : mean frequency of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 1   : variance of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 2   : minimum frequency of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 3   : maximum frequency of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 4   : mean Frequency Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 5   : varience of the Frequency Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 6   : min of the Frequency Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 7   : max of the Frequency Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 8   : mean of the Frequency 2nd Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 9   : varience of the Frequency 2nd Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 10  : min of the Frequency 2nd Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 11  : max of the Frequency 2nd Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 12  : mean of the Bandwidth of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 13  : varience of the Bandwidth of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 14  : min of the Bandwidth of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 15  : max of the Bandwidth of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 16  : mean of the Bandwidth Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 17  : variece of the Bandwidth Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 18  : min of the Bandwidth Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 19  : max of the Bandwidth Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 20  : mean of the Bandwidth 2nd Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 21  : var of the Bandwidth 2nd Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 22  : min of the Bandwidth 2nd Derivative of the nBF BURG formant
+%   36 + ((nBF-1)*nMetrics) + 23  : max of the Bandwidth 2nd Derivative of the nBF BURG formant
+%
+%   FOR THE MEAN OF ALL BURG FORMANTS
+%   36 + (nBF*nMetrics)           : mean of all formants Frequency 
+%   36 + (nBF*nMetrics) + 1       : varience of the mean of all formants Frequency 
+%   36 + (nBF*nMetrics) + 2       : minimum of the mean of all formants Frequency 
+%   36 + (nBF*nMetrics) + 3       : maximum of the mean of all formants Frequency
+%   36 + (nBF*nMetrics) + 4       :  mean of all formants mean Frequency Derivative 
+%   36 + (nBF*nMetrics) + 5       :  mean of all formants varience Frequency Derivative 
+%   36 + (nBF*nMetrics) + 6       :  min of the mean of all formants Frequency Derivative
+%   36 + (nBF*nMetrics) + 7       :  max of the mean of all formants Frequency Derivative 
+%   36 + (nBF*nMetrics) + 8       :  mean of the mean of all formants Frequency 2nd Derivative 
+%   36 + (nBF*nMetrics) + 9       :  varience of the mean of all formants Frequency 2nd Derivative 
+%   36 + (nBF*nMetrics) + 10      :  min of the mean of all formants Frequency 2nd Derivative
+%   36 + (nBF*nMetrics) + 11      :  max of the mean of all formants Frequency 2nd Derivative
+%
+% ------------- ALL FORMANTS ---------------
+%
+%   36 + (nBF*nMetrics) + 12   : Number of ALL formants listed = nAF
+%
+%   startOfALLMeasurements = 36 + (nBF*nMetrics) + 13;
+%
+%   startOfALLMeasurements      : mean frequency of the first ALL formant
+%   startOfALLMeasurements + 1     : variance of the first ALL formant
+%   startOfALLMeasurements + 2     : minimum frequency of the first ALL formant
+%   startOfALLMeasurements + 3     : maximum frequency of the first ALL formant
+%   startOfALLMeasurements + 4     : mean Frequency Derivative of the first ALL formant
+%   startOfALLMeasurements + 5     : varience of the Frequency Derivative of the first ALL formant
+%   startOfALLMeasurements + 6     : min of the Frequency Derivative of the first ALL formant
+%   startOfALLMeasurements + 7     : max  of the Frequency Derivative of the first ALL formant
+%   startOfALLMeasurements + 8     : mean of the Frequency 2nd Derivative of the first ALL formant
+%   startOfALLMeasurements + 9     : varience of the Frequency 2nd Derivative of the first ALL formant
+%   startOfALLMeasurements + 10    : min of the Frequency 2nd Derivative of the first ALL formant
+%   startOfALLMeasurements + 11    :  max of the Frequency 2nd Derivative of the first ALL formant
+%   startOfALLMeasurements + 12    : mean of the Bandwidth of the first ALL formant
+%   startOfALLMeasurements + 13    :  varience of the Bandwidth of the first ALL formant
+%   startOfALLMeasurements + 14    : min of the Bandwidth of the first ALL formant
+%   startOfALLMeasurements + 15    : max of the Bandwidth of the first ALL formant
+%   startOfALLMeasurements + 16    : mean of the Bandwidth Derivative of the first ALL formant
+%   startOfALLMeasurements + 17    : varience of the Bandwidth Derivative of the first ALL formant
+%   startOfALLMeasurements + 18    : min of the Bandwidth Derivative of the first ALL formant
+%   startOfALLMeasurements + 19    : max of the Bandwidth Derivative of the first ALL formant
+%   startOfALLMeasurements + 20    : mean of the Bandwidth 2nd Derivative of the first ALL formant
+%   startOfALLMeasurements + 21    : var of the Bandwidth 2nd Derivative of the first ALL formant
+%   startOfALLMeasurements + 22    : min of the Bandwidth 2nd Derivative of the first ALL formant
+%   startOfALLMeasurements + 23    : max of the Bandwidth 2nd Derivative of the first ALL formant
+%
+%    ....... there are nMetrics for each formant in nAF formants, so cycle
+%    through until the last is reached ......
+%
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : mean frequency of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : variance of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : minimum frequency of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : maximum frequency of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : mean Frequency Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : varience of the Frequency Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : min of the Frequency Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : max of the Frequency Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : mean of the Frequency 2nd Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : varience of the Frequency 2nd Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : min of the Frequency 2nd Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : max of the Frequency 2nd Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : mean of the Bandwidth of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : varience of the Bandwidth of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : min of the Bandwidth of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : max of the Bandwidth of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : mean of the Bandwidth Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : variece of the Bandwidth Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : min of the Bandwidth Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : max of the Bandwidth Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : mean of the Bandwidth 2nd Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : var of the Bandwidth 2nd Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : min of the Bandwidth 2nd Derivative of the nAF ALL formant
+%   startOfALLMeasurements + ((nAF-1)*nMetrics)       : max of the Bandwidth 2nd Derivative of the nAF ALL formant
+%
+%   FOR THE MEAN OF ALL ALL FORMANTS
+%   startOfALLMeasurements + (nAF*nMetrics)           : mean of all formants Frequency 
+%   startOfALLMeasurements + (nAF*nMetrics) + 1       : varience of the mean of all formants Frequency 
+%   startOfALLMeasurements + (nAF*nMetrics) + 2       : minimum of the mean of all formants Frequency 
+%   startOfALLMeasurements + (nAF*nMetrics) + 3       : maximum of the mean of all formants Frequency
+%   startOfALLMeasurements + (nAF*nMetrics) + 4       :  mean of all formants mean Frequency Derivative 
+%   startOfALLMeasurements + (nAF*nMetrics) + 5       :  mean of all formants varience Frequency Derivative 
+%   startOfALLMeasurements + (nAF*nMetrics) + 6       :  min of the mean of all formants Frequency Derivative
+%   startOfALLMeasurements + (nAF*nMetrics) + 7       :  max of the mean of all formants Frequency Derivative 
+%   startOfALLMeasurements + (nAF*nMetrics) + 8       :  mean of the mean of all formants Frequency 2nd Derivative 
+%   startOfALLMeasurements + (nAF*nMetrics) + 9       :  varience of the mean of all formants Frequency 2nd Derivative 
+%   startOfALLMeasurements + (nAF*nMetrics) + 10      :  min of the mean of all formants Frequency 2nd Derivative
+%   startOfALLMeasurements + (nAF*nMetrics) + 11      :  max of the mean of all formants Frequency 2nd Derivative
+%
+% ------------- ROBUST FORMANTS ---------------
+%
+%   startOfALLMeasurements + (nAF*nMetrics) + 12    : Number of ROBUST formants listed = nRF
+%
+%   startOfROBUSTMeasurements = startOfALLMeasurements + (nAF*nMetrics) + 13;
+%
+%   startOfROBUSTMeasurements         : mean frequency of the first ROBUST formant
+%   startOfROBUSTMeasurements + 1     : variance of the first ROBUST formant
+%   startOfROBUSTMeasurements + 2     : minimum frequency of the first ROBUST formant
+%   startOfROBUSTMeasurements + 3     : maximum frequency of the first ROBUST formant
+%   startOfROBUSTMeasurements + 4     : mean Frequency Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 5     : varience of the Frequency Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 6     : min of the Frequency Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 7     : max  of the Frequency Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 8     : mean of the Frequency 2nd Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 9     : varience of the Frequency 2nd Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 10    : min of the Frequency 2nd Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 11    :  max of the Frequency 2nd Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 12    : mean of the Bandwidth of the first ROBUST formant
+%   startOfROBUSTMeasurements + 13    :  varience of the Bandwidth of the first ROBUST formant
+%   startOfROBUSTMeasurements + 14    : min of the Bandwidth of the first ROBUST formant
+%   startOfROBUSTMeasurements + 15    : max of the Bandwidth of the first ROBUST formant
+%   startOfROBUSTMeasurements + 16    : mean of the Bandwidth Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 17    : varience of the Bandwidth Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 18    : min of the Bandwidth Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 19    : max of the Bandwidth Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 20    : mean of the Bandwidth 2nd Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 21    : var of the Bandwidth 2nd Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 22    : min of the Bandwidth 2nd Derivative of the first ROBUST formant
+%   startOfROBUSTMeasurements + 23    : max of the Bandwidth 2nd Derivative of the first ROBUST formant
+%
+%    ....... there are nMetrics for each formant in nRF formants, so cycle
+%    through until the last is reached ......
+%
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : mean frequency of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : variance of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : minimum frequency of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : maximum frequency of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : mean Frequency Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : varience of the Frequency Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : min of the Frequency Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : max of the Frequency Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : mean of the Frequency 2nd Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : varience of the Frequency 2nd Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : min of the Frequency 2nd Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : max of the Frequency 2nd Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : mean of the Bandwidth of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : varience of the Bandwidth of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : min of the Bandwidth of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : max of the Bandwidth of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : mean of the Bandwidth Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : variece of the Bandwidth Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : min of the Bandwidth Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : max of the Bandwidth Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : mean of the Bandwidth 2nd Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : var of the Bandwidth 2nd Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : min of the Bandwidth 2nd Derivative of the nRF ROBUST formant
+%   startOfROBUSTMeasurements + ((nRF-1)*nMetrics)       : max of the Bandwidth 2nd Derivative of the nRF ROBUST formant
+%
+%   FOR THE MEAN OF ALL ROBUST FORMANTS
+%   startOfROBUSTMeasurements + (nRF*nMetrics)           : mean of all formants Frequency 
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 1       : varience of the mean of all formants Frequency 
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 2       : minimum of the mean of all formants Frequency 
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 3       : maximum of the mean of all formants Frequency
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 4       :  mean of all formants mean Frequency Derivative 
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 5       :  mean of all formants varience Frequency Derivative 
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 6       :  min of the mean of all formants Frequency Derivative
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 7       :  max of the mean of all formants Frequency Derivative 
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 8       :  mean of the mean of all formants Frequency 2nd Derivative 
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 9       :  varience of the mean of all formants Frequency 2nd Derivative 
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 10      :  min of the mean of all formants Frequency 2nd Derivative
+%   startOfROBUSTMeasurements + (nRF*nMetrics) + 11      :  max of the mean of all formants Frequency 2nd Derivative
+%
+
+noOfArguments = length(varargin);
+columnIndices = [];
+
+getBURGFormants = 0;
+getAllForamnts=0;
+getRobustFormants=0;
+
+for i=1 : noOfArguments
+    if( strcmp( varargin{i}, 'jitter_ddp' ))
+        columnIndices = [columnIndices 1];
+    elseif( strcmp( varargin{i}, 'jitter_local' ))
+        columnIndices = [columnIndices 2];
+    elseif( strcmp( varargin{i}, 'jitter_ppq5' ))
+        columnIndices = [columnIndices 3];
+    elseif( strcmp( varargin{i}, 'jitter_rap' ))
+        columnIndices = [columnIndices 4];
+    elseif( strcmp( varargin{i}, 'shimmer_local' ))
+        columnIndices = [columnIndices 5];
+    elseif( strcmp( varargin{i}, 'shimmer_dda' ))
+        columnIndices = [columnIndices 6];
+    elseif( strcmp( varargin{i}, 'shimmer_apq3' ))
+        columnIndices = [columnIndices 7];
+    elseif( strcmp( varargin{i}, 'shimmer_apq5' ))
+        columnIndices = [columnIndices 8];
+    elseif( strcmp( varargin{i}, 'shimmer_apq11' ))
+        columnIndices = [columnIndices 9];
+    elseif( strcmp( varargin{i}, 'formant_Burg' ))
+        getBURGFormants = 1;
+    elseif( strcmp( varargin{i}, 'formant_all' ))
+        getAllForamnts=1;
+    elseif( strcmp( varargin{i}, 'formant_robust' ))
+        getRobustFormants=1;
+    end
+end
+
+
+outputFileName = 'individualResults/kmeans_Results_';
+resultsFileName = 'kmeans_Results_';
+titleName = '';
+for i=1 : noOfArguments
+%     outputFileName = [ outputFileName varargin{i} '_'];
+%     resultsFileName = [ resultsFileName varargin{i} '_'];
+    titleName = [ titleName varargin{i} '_'];
+    fprintf( masterFileOutputID, '%s_', varargin{i} );
+end
+
+% titleName = outputFileName;
+outputFileName = [ outputFileName titleName ];
+resultsFileName = [ resultsFileName titleName ];
+
+fprintf( masterFileOutputID, '\t' );
+
+outputFileName = [ outputFileName '.txt'];
+resultsFileName = [ resultsFileName '.txt'];
+
+fileOutputID = fopen( outputFileName, 'w' );
+fileKMeansOutputID = fopen( resultsFileName, 'w' );
+
+% -------------------- get the data from the results file ---------------
+lineCount = 0;
+fileCount = 0;
+data = [];
+while( ~(feof(inputFileID)) )
+    
+    outputValues = [];
+%     sampleEmotion = [];
+%     gender = [];
+    
+    thestr = fgetl(inputFileID);
+    if( lineCount > 10 )    % skip the file header
+        fileCount = fileCount + 1;
+        
+        % determine whether we have a positive or negative sample
+        sampleEmotion( fileCount ) = 'U';
+        if( ~(isempty(strfind(thestr,'pos'))))
+            % sample is positive
+            sampleEmotion( fileCount ) = 'P';
+        elseif( ~(isempty(strfind(thestr,'neg'))))
+            % sample is negative
+            sampleEmotion( fileCount ) = 'N';
+        else
+            disp('EEEK!');
+            pause;
+        end
+        
+        % determine whether we have a male, female or trans sample
+        gender( fileCount ) = '?';
+        if( ~(isempty(strfind(thestr,'fem'))))
+            % gender is female
+            gender( fileCount ) = 'F';
+        elseif( ~(isempty(strfind(thestr,'male'))))
+            % gender is male
+            gender( fileCount ) = 'M';
+        elseif( ~(isempty(strfind(thestr,'trans'))))
+            % gender is trans
+            gender( fileCount ) = 'T';
+        else
+            disp('EEEK!');
+            pause;
+        end
+
+        %how many values are in the string?
+        spaces = strfind( thestr, ' ' );
+        numberstr = thestr( spaces(1) : end ); % chop off the file name
+        frmtpos = strfind( numberstr, 'maxNoOfFormants'); % find the position of the label for number of formants
+        
+        str1 = numberstr( 1 : frmtpos(1)-1 ); % string contains jitter and shimmer values
+        str2 = numberstr( frmtpos(1) : frmtpos(2)-1 ); % string contains all BURG formant information
+        str3 = numberstr( frmtpos(2) : frmtpos(3)-1 ); % string contains all ALL formant information
+        str4 = numberstr( frmtpos(3) : end ); % string contains all ROBUST formant information
+        
+        vars = sscanf( str1, '%f', inf );
+        % extract the shimmer and jitter values
+        outputValues = [ outputValues vars( columnIndices )'];
+        
+        if( getBURGFormants )
+            spaces = strfind( str2, ' ' ); % remove the string 'maxNoOfFormants'
+            vars = sscanf( str2( spaces(1) : end ), '%f', inf );
+            outputValues = stripOutFormantValues( vars, outputValues );
+        end
+        
+        if( getAllForamnts )
+            spaces = strfind( str3, ' ' ); % remove the string 'maxNoOfFormants'
+            vars = sscanf( str3( spaces(1) : end ), '%f', inf );
+            outputValues = stripOutFormantValues( vars, outputValues );
+        end
+        
+        if( getRobustFormants )
+            spaces = strfind( str4, ' ' ); % remove the string 'maxNoOfFormants'
+            vars = sscanf( str4( spaces(1) : end ), '%f', inf );
+            outputValues = stripOutFormantValues( vars, outputValues );
+        end
+        
+        [m n] = size( data );
+        % sometimes the 'all' formants command gives us fewer formants than
+        % usual. If this is the case,then we will have to pad with zeros
+        % for now.
+        if( n > length( outputValues ) )
+            lenDiff = n - length( outputValues );
+            outputValues = [ outputValues zeros( 1, lenDiff ) ];
+        end
+        
+        data( fileCount, : ) = outputValues;
+        
+    end
+    lineCount = lineCount + 1;
+    
+end
+fclose(inputFileID);
+
+%individual examination of the metrics does confirm that there is little
+%difference in emotional content. However, singer identification is OK. 
+
+% figure(2); subplot(211); hold off;
+% 
+% for( i = 1 : length(data) )
+%     if( sampleEmotion(i) == 'N')
+%         plot( i, data(i), 'b.' );
+%     else
+%         plot( i, data(i), 'r.' );
+%     end
+%     hold on;
+% end
+% 
+% subplot(212); hold off;
+% 
+% for( i = 1 : length(data) )
+%     if( gender(i) == 'M')
+%         plot( i, data(i), 'b.' );
+%     elseif( gender(i) == 'F')
+%         plot( i, data(i), 'r.' );
+%     else
+%         plot( i, data(i), 'g.' );
+%     end
+%     hold on;
+% end
+
+% ------------  apply the k-means classifier  ------------------------
+
+noOfClusters = 2; % we are only trying to identify positive and negative emotions
+
+
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'cityblock');
+
+%display results grouped by emotion
+processKMeansResults( 'cityblock', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName );
+
+disp('press space');
+pause;
+
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'sqEuclidean');
+
+processKMeansResults( 'sqEuclidean', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName );
+
+disp('press space');
+pause;
+
+
+%display results grouped by gender
+
+noOfClusters = 3;
+
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'cityblock');
+
+processKMeansResults( 'cityblock', idx, gender, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName );
+
+disp('press space');
+pause;
+
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'sqEuclidean');
+
+processKMeansResults( 'sqEuclidean', idx, gender, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName );
+
+disp('press space');
+pause;
+
+
+% [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+%     'start', 'sample', 'Distance', 'cosine');
+% 
+% processKMeansResults( 'cosine', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName);
+% 
+% disp('press space');
+% pause;
+
+% [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+%     'start', 'sample', 'Distance', 'correlation');
+% 
+% processKMeansResults( 'correlation', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName );
+% 
+% disp('press space');
+% pause;
+
+fprintf( fileOutputID, '\n' );
+fclose( fileOutputID );
+fprintf( fileKMeansOutputID, '\n' );
+fclose( fileKMeansOutputID );
+fprintf( masterFileOutputID, '\n' );
+fclose( masterFileOutputID );
+
+end
+
+%------------------------------------------------------------------
+
+function [ outputValues ] = stripOutFormantValues( vars, outputValues )
+    
+    noOfFormantValues = length( vars ) - 1; % gives the number of formant arguments only
+    noOfFormants = vars(1);
+    % there are 12 measurements for the mean of all formants (so the number
+    % of formants is not important) for each formant measurement.
+    if( noOfFormants ~= (noOfFormantValues-12)/24 )
+        disp('EEK!');
+        pause;
+    else
+        outputValues = [ outputValues vars( 2:end )' ];
+    end
+
+end
+
+%-------------------------------------------------------------------
+
+function [] = processKMeansResults( ID, idx, groupingCriteria, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName )
+
+    fprintf( fileKMeansOutputID, '%s\t', ID );
+    fprintf( masterFileOutputID, '%s\t', ID );
+    
+    if( length( idx ) ~= length( groupingCriteria ) )
+        disp('EEEK!');
+        pause;
+    end
+
+    groupIDs = '';
+    groupStr = '';
+    for( i = 1 : length( idx ))
+        fprintf( fileOutputID, '%s \t %d \n', groupingCriteria(i), idx(i) );
+        gID = [ groupingCriteria(i) num2str( idx(i) )];
+        groupIDs = [ groupIDs ; gID ];
+        groupStr = [ groupStr gID ];
+    end
+
+    % ------------- work out the confusion matrix -------------------------
+
+    groups = unique( groupIDs, 'rows' );
+    noOfGroups = length( groups );
+    orderedGroups = sort(cellstr(groups));
+    groupStats = [];
+    for( i = 1 : noOfGroups )
+        groupStats(i) = ((length( strfind( groupStr, char(orderedGroups(i)))))/length( idx ) ) * 100;
+        fprintf( fileKMeansOutputID, '%s \t %f \t', char(orderedGroups(i)), groupStats(i) );
+        fprintf( masterFileOutputID, '%s \t %f \t', char(orderedGroups(i)), groupStats(i) );
+    end
+    
+    figure(1);
+    bar( groupStats );
+    set( gca, 'XTickLabel', orderedGroups );
+    title([ titleName ' ' ID]);
+    
+end
\ No newline at end of file