comparison Code/Classifiers/kmeans_PRAAT_Singing.m @ 4:92ca03a8fa99 tip

Update to ICASSP 2013 benchmark
author Dawn Black
date Wed, 13 Feb 2013 11:02:39 +0000
parents 5fd388fdd6ef
children
comparison
equal deleted inserted replaced
3:e1cfa7765647 4:92ca03a8fa99
1 function [] = kmeans_Singing( varargin ) 1 function [] = kmeans_PRAAT_Singing( varargin )
2 2
3 cd 'C:\Users\dawn\Dropbox\TestResults' 3 cd 'C:\Users\dawn\Dropbox\TestResults'
4 4
5 DEBUG = 0;
5 % output results file name 6 % output results file name
6 masterFileOutputID = fopen( 'kmeans_Singing_All.txt', 'a' ); 7 masterFileOutputID = fopen( 'kmeans_Singing_PRAAT.txt', 'a' );
7 % input results file name 8 % input results file name
8 inputFileName = 'singingPRAATStats.txt'; 9 inputFileName = 'singingPRAATStats.txt';
9 10
10 % This function allows the user to stipulate which Singing voice LLD's they 11 % This function allows the user to stipulate which Singing voice LLD's they
11 % wish to forward to a k-means classifier and produces a file of 12 % wish to forward to a k-means classifier and produces a file of
271 272
272 noOfArguments = length(varargin); 273 noOfArguments = length(varargin);
273 columnIndices = []; 274 columnIndices = [];
274 275
275 getBURGFormants = 0; 276 getBURGFormants = 0;
276 getAllForamnts=0; 277 getAllFormants=0;
277 getRobustFormants=0; 278 getRobustFormants=0;
278 279
279 for i=1 : noOfArguments 280 for i=1 : noOfArguments
280 if( strcmp( varargin{i}, 'jitter_ddp' )) 281 if( strcmp( varargin{i}, 'jitter_ddp' ))
281 columnIndices = [columnIndices 1]; 282 columnIndices = [columnIndices 1];
296 elseif( strcmp( varargin{i}, 'shimmer_apq11' )) 297 elseif( strcmp( varargin{i}, 'shimmer_apq11' ))
297 columnIndices = [columnIndices 9]; 298 columnIndices = [columnIndices 9];
298 elseif( strcmp( varargin{i}, 'formant_Burg' )) 299 elseif( strcmp( varargin{i}, 'formant_Burg' ))
299 getBURGFormants = 1; 300 getBURGFormants = 1;
300 elseif( strcmp( varargin{i}, 'formant_all' )) 301 elseif( strcmp( varargin{i}, 'formant_all' ))
301 getAllForamnts=1; 302 getAllFormants=1;
302 elseif( strcmp( varargin{i}, 'formant_robust' )) 303 elseif( strcmp( varargin{i}, 'formant_robust' ))
303 getRobustFormants=1; 304 getRobustFormants=1;
304 end 305 end
305 end 306 end
306 307
307 308
308 outputFileName = 'individualResults/kmeans_Results_'; 309 % outputFileName = 'individualResults/kmeans_Results_';
309 resultsFileName = 'kmeans_Results_'; 310 % resultsFileName = 'kmeans_Results_';
310 titleName = ''; 311 titleName = '';
311 for i=1 : noOfArguments 312 for i=1 : noOfArguments
312 % outputFileName = [ outputFileName varargin{i} '_']; 313 % outputFileName = [ outputFileName varargin{i} '_'];
313 % resultsFileName = [ resultsFileName varargin{i} '_']; 314 % resultsFileName = [ resultsFileName varargin{i} '_'];
314 titleName = [ titleName varargin{i} '_']; 315 titleName = [ titleName varargin{i} '_'];
315 fprintf( masterFileOutputID, '%s_', varargin{i} ); 316 fprintf( masterFileOutputID, '%s_', varargin{i} );
316 end 317 end
317 318
318 % titleName = outputFileName; 319 % titleName = outputFileName;
319 outputFileName = [ outputFileName titleName ]; 320 % outputFileName = [ outputFileName titleName ];
320 resultsFileName = [ resultsFileName titleName ]; 321 % resultsFileName = [ resultsFileName titleName ];
321 322
322 fprintf( masterFileOutputID, '\t' ); 323 fprintf( masterFileOutputID, '\t' );
323 324
324 outputFileName = [ outputFileName '.txt']; 325 % outputFileName = [ outputFileName '.txt'];
325 resultsFileName = [ resultsFileName '.txt']; 326 % resultsFileName = [ resultsFileName '.txt'];
326 327
327 fileOutputID = fopen( outputFileName, 'w' ); 328 % fileOutputID = fopen( outputFileName, 'w' );
328 fileKMeansOutputID = fopen( resultsFileName, 'w' ); 329 % fileKMeansOutputID = fopen( resultsFileName, 'w' );
329 330
330 % -------------------- get the data from the results file --------------- 331 % -------------------- get the data from the results file ---------------
331 lineCount = 0; 332 lineCount = 0;
332 fileCount = 0; 333 fileCount = 0;
333 data = []; 334 data = [];
388 spaces = strfind( str2, ' ' ); % remove the string 'maxNoOfFormants' 389 spaces = strfind( str2, ' ' ); % remove the string 'maxNoOfFormants'
389 vars = sscanf( str2( spaces(1) : end ), '%f', inf ); 390 vars = sscanf( str2( spaces(1) : end ), '%f', inf );
390 outputValues = stripOutFormantValues( vars, outputValues ); 391 outputValues = stripOutFormantValues( vars, outputValues );
391 end 392 end
392 393
393 if( getAllForamnts ) 394 if( getAllFormants )
394 spaces = strfind( str3, ' ' ); % remove the string 'maxNoOfFormants' 395 spaces = strfind( str3, ' ' ); % remove the string 'maxNoOfFormants'
395 vars = sscanf( str3( spaces(1) : end ), '%f', inf ); 396 vars = sscanf( str3( spaces(1) : end ), '%f', inf );
396 outputValues = stripOutFormantValues( vars, outputValues ); 397 outputValues = stripOutFormantValues( vars, outputValues );
397 end 398 end
398 399
417 lineCount = lineCount + 1; 418 lineCount = lineCount + 1;
418 419
419 end 420 end
420 fclose(inputFileID); 421 fclose(inputFileID);
421 422
422 %individual examination of the metrics does confirm that there is little
423 %difference in emotional content. However, singer identification is OK.
424
425 % figure(2); subplot(211); hold off;
426 %
427 % for( i = 1 : length(data) )
428 % if( sampleEmotion(i) == 'N')
429 % plot( i, data(i), 'b.' );
430 % else
431 % plot( i, data(i), 'r.' );
432 % end
433 % hold on;
434 % end
435 %
436 % subplot(212); hold off;
437 %
438 % for( i = 1 : length(data) )
439 % if( gender(i) == 'M')
440 % plot( i, data(i), 'b.' );
441 % elseif( gender(i) == 'F')
442 % plot( i, data(i), 'r.' );
443 % else
444 % plot( i, data(i), 'g.' );
445 % end
446 % hold on;
447 % end
448
449 % ------------ apply the k-means classifier ------------------------ 423 % ------------ apply the k-means classifier ------------------------
450 424
451 noOfClusters = 2; % we are only trying to identify positive and negative emotions 425 noOfClusters = 2; % we are only trying to identify positive and negative emotions
452 426
453 427
454 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... 428 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
455 'start', 'sample', 'Distance', 'cityblock'); 429 'start', 'sample', 'Distance', 'cityblock');
456 430
457 %display results grouped by emotion 431 %display results grouped by emotion
458 processKMeansResults( 'cityblock', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ); 432 fprintf( masterFileOutputID, '\n Emotion grouping \n');
459 433 fprintf( masterFileOutputID, 'cityblock \n');
460 disp('press space'); 434 [ groupStats, groupNames ] = processKMeansResults( 'cityblock', idx, sampleEmotion, masterFileOutputID, titleName, DEBUG );
461 pause; 435 [ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
462 436
437
438 fprintf( masterFileOutputID, 'sqEuclidean \n');
463 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... 439 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
464 'start', 'sample', 'Distance', 'sqEuclidean'); 440 'start', 'sample', 'Distance', 'sqEuclidean');
465 441
466 processKMeansResults( 'sqEuclidean', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ); 442 [ groupStats, groupNames ] = processKMeansResults( 'sqEuclidean', idx, sampleEmotion, masterFileOutputID, titleName, DEBUG );
467 443 [ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
468 disp('press space'); 444
469 pause; 445 % ------------
470 446
471 447 % % display results grouped by gender
472 %display results grouped by gender 448 % fprintf( masterFileOutputID, '\n Gender grouping \n');
473 449 % noOfClusters = 3;
474 noOfClusters = 3; 450 %
475
476 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
477 'start', 'sample', 'Distance', 'cityblock');
478
479 processKMeansResults( 'cityblock', idx, gender, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName );
480
481 disp('press space');
482 pause;
483
484 [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
485 'start', 'sample', 'Distance', 'sqEuclidean');
486
487 processKMeansResults( 'sqEuclidean', idx, gender, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName );
488
489 disp('press space');
490 pause;
491
492
493 % [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... 451 % [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
494 % 'start', 'sample', 'Distance', 'cosine'); 452 % 'start', 'sample', 'Distance', 'cityblock');
495 % 453 %
496 % processKMeansResults( 'cosine', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName); 454 % fprintf( masterFileOutputID, 'cityblock \n');
497 % 455 % [ groupStats, groupNames ] = processKMeansResults( 'cityblock', idx, gender, masterFileOutputID, titleName, DEBUG );
498 % disp('press space'); 456 % [ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
499 % pause; 457 %
500
501 % [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,... 458 % [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
502 % 'start', 'sample', 'Distance', 'correlation'); 459 % 'start', 'sample', 'Distance', 'sqEuclidean');
503 % 460 %
504 % processKMeansResults( 'correlation', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName ); 461 % fprintf( masterFileOutputID, 'sqEuclidean \n');
505 % 462 % [ groupStats, groupNames ] = processKMeansResults( 'sqEuclidean', idx, gender, masterFileOutputID, titleName, DEBUG );
506 % disp('press space'); 463 % [ confusionMatrix ] = getConfusionMatrix( groupStats, groupNames, masterFileOutputID );
507 % pause; 464 %
508 465 %
509 fprintf( fileOutputID, '\n' ); 466
510 fclose( fileOutputID ); 467 %
511 fprintf( fileKMeansOutputID, '\n' ); 468 % % [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
512 fclose( fileKMeansOutputID ); 469 % % 'start', 'sample', 'Distance', 'cosine');
470 % %
471 % % processKMeansResults( 'cosine', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName);
472 % %
473 % % disp('press space');
474 % % pause;
475 %
476 % % [idx ctrs]=kmeans( data, noOfClusters, 'Replicates',100,...
477 % % 'start', 'sample', 'Distance', 'correlation');
478 % %
479 % % processKMeansResults( 'correlation', idx, sampleEmotion, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName );
480
513 fprintf( masterFileOutputID, '\n' ); 481 fprintf( masterFileOutputID, '\n' );
514 fclose( masterFileOutputID ); 482 fclose( masterFileOutputID );
515 483
516 end 484 end
517 485
531 end 499 end
532 500
533 end 501 end
534 502
535 %------------------------------------------------------------------- 503 %-------------------------------------------------------------------
536
537 function [] = processKMeansResults( ID, idx, groupingCriteria, fileOutputID, fileKMeansOutputID, masterFileOutputID, titleName )
538
539 fprintf( fileKMeansOutputID, '%s\t', ID );
540 fprintf( masterFileOutputID, '%s\t', ID );
541
542 if( length( idx ) ~= length( groupingCriteria ) )
543 disp('EEEK!');
544 pause;
545 end
546
547 groupIDs = '';
548 groupStr = '';
549 for( i = 1 : length( idx ))
550 fprintf( fileOutputID, '%s \t %d \n', groupingCriteria(i), idx(i) );
551 gID = [ groupingCriteria(i) num2str( idx(i) )];
552 groupIDs = [ groupIDs ; gID ];
553 groupStr = [ groupStr gID ];
554 end
555
556 % ------------- work out the confusion matrix -------------------------
557
558 groups = unique( groupIDs, 'rows' );
559 noOfGroups = length( groups );
560 orderedGroups = sort(cellstr(groups));
561 groupStats = [];
562 for( i = 1 : noOfGroups )
563 groupStats(i) = ((length( strfind( groupStr, char(orderedGroups(i)))))/length( idx ) ) * 100;
564 fprintf( fileKMeansOutputID, '%s \t %f \t', char(orderedGroups(i)), groupStats(i) );
565 fprintf( masterFileOutputID, '%s \t %f \t', char(orderedGroups(i)), groupStats(i) );
566 end
567
568 figure(1);
569 bar( groupStats );
570 set( gca, 'XTickLabel', orderedGroups );
571 title([ titleName ' ' ID]);
572
573 end