Mercurial > hg > emotion-detection-top-level

diff Code/Descriptors/Matlab/Common/voicingByClustering.m @ 4:92ca03a8fa99 tip
Update to ICASSP 2013 benchmark
author: Dawn Black
date: Wed, 13 Feb 2013 11:02:39 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/Descriptors/Matlab/Common/voicingByClustering.m	Wed Feb 13 11:02:39 2013 +0000
@@ -0,0 +1,54 @@
+function [ idx ] = voicingByClustering( x, fs, noOfFrames, frameLength )
+% attempt to classify the voiced/unvoiced frames using k-means
+% clustering with the short term energy and spectral centroid
+% as feature vectors
+% returns an array containing voicing decision for frames
+% Useful only for speech frames
+% Window length and step (in seconds):
+win = frameLength/fs;
+step = win;
+
+% calculate the short term energy
+Eor = ShortTimeEnergy(x, win*fs, step*fs );
+% calculate the spectral centroid 
+Cor = SpectralCentroid(x, win*fs, step*fs, fs );
+
+% dataFileName = '../../../../../Results/VUVgrouping.txt';
+% dataFileID = fopen( dataFileName, 'w' );
+
+noOfClusters = 2; %voiced, unvoiced
+data = [Eor Cor];
+idArray = zeros(1, length(Eor));
+
+% myColours = ['r.'; 'm.'; 'c.'; 'w.'; 'g.'; 'y.'; 'b.'];
+
+[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
+    'start', 'sample', 'Distance', 'cityblock');
+
+% we don't know which group will be classed as voiced
+% or unvoiced.
+% assume that the number of voiced frames is more than unvoiced
+
+noOfAFrames = length( find( idx == 1 ));
+noOfBFrames = length( find( idx == 2 ));
+if( noOfAFrames < noOfBFrames )
+    voicedIdx = 2;
+    unvoicedIdx = 1;
+else
+    voicedIdx = 1;
+    unvoicedIdx = 2;
+end
+
+
+% now re-number the idx array so all voiced frames = 1 and
+% unvoiced = 2
+
+voicedPos = find( idx == voicedIdx );
+unvoicedPos = find( idx == unvoicedIdx );
+
+%replace the idx
+idx( voicedPos ) = 1;
+idx( unvoicedPos ) = 2;
+
+end
+
author	Dawn Black
date	Wed, 13 Feb 2013 11:02:39 +0000
parents
children