Mercurial > hg > emotion-detection-top-level

function [ idx ] = voicingByClustering( x, fs, noOfFrames, frameLength )
% attempt to classify the voiced/unvoiced frames using k-means
% clustering with the short term energy and spectral centroid
% as feature vectors
% returns an array containing voicing decision for frames
% Useful only for speech frames
% Window length and step (in seconds):
win = frameLength/fs;
step = win;

% calculate the short term energy
Eor = ShortTimeEnergy(x, win*fs, step*fs );
% calculate the spectral centroid
Cor = SpectralCentroid(x, win*fs, step*fs, fs );

% dataFileName = '../../../../../Results/VUVgrouping.txt';
% dataFileID = fopen( dataFileName, 'w' );

noOfClusters = 2; %voiced, unvoiced
data = [Eor Cor];
idArray = zeros(1, length(Eor));

% myColours = ['r.'; 'm.'; 'c.'; 'w.'; 'g.'; 'y.'; 'b.'];

[idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
    'start', 'sample', 'Distance', 'cityblock');

% we don't know which group will be classed as voiced
% or unvoiced.
% assume that the number of voiced frames is more than unvoiced

noOfAFrames = length( find( idx == 1 ));
noOfBFrames = length( find( idx == 2 ));
if( noOfAFrames < noOfBFrames )
    voicedIdx = 2;
    unvoicedIdx = 1;
else
    voicedIdx = 1;
    unvoicedIdx = 2;
end


% now re-number the idx array so all voiced frames = 1 and
% unvoiced = 2

voicedPos = find( idx == voicedIdx );
unvoicedPos = find( idx == unvoicedIdx );

%replace the idx
idx( voicedPos ) = 1;
idx( unvoicedPos ) = 2;

end
author	Dawn Black
date	Wed, 13 Feb 2013 11:02:39 +0000
parents
children