changeset 100:ae195c41c7bd

- Python results plotting (finally). - Proper results reporting script. - Test on ALL talkers. The results script then generates a summary based on all the various subsets. - Fixed chown users (hopefully sudos to be deleted entirely soon) - More...
author tomwalters
date Mon, 13 Sep 2010 18:34:23 +0000
parents be3bbd8b3fcd
children 9416e88d7c56
files experiments/scripts/HTK/install_htk.sh experiments/scripts/aimc/build_aimc.sh experiments/scripts/cnbh-syllables/results_plotting/gen_results.py experiments/scripts/cnbh-syllables/results_plotting/plot_all_results.m experiments/scripts/cnbh-syllables/results_plotting/spider_plot.py experiments/scripts/cnbh-syllables/run_training_and_testing/run_test_instance.sh experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/gen_spoke_points/spoke_pattern.txt experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/generate_train_test_lists.sh experiments/scripts/master.sh experiments/scripts/setup_aws_instance.sh scripts/aws_prepare.sh
diffstat 11 files changed, 289 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/experiments/scripts/HTK/install_htk.sh	Tue Sep 07 01:14:22 2010 +0000
+++ b/experiments/scripts/HTK/install_htk.sh	Mon Sep 13 18:34:23 2010 +0000
@@ -3,7 +3,7 @@
 set -u
 if [ ! -e /mnt/experiments/htk/.htk_installed_success ]; then
 sudo mkdir -p /mnt/experiments/htk
-sudo chown ubuntu /mnt/experiments/htk
+sudo chown `whoami` /mnt/experiments/htk
 cd /mnt/experiments/htk
 wget --user $HTK_USERNAME --password $HTK_PASSWORD http://htk.eng.cam.ac.uk/ftp/software/HTK-3.4.1.tar.gz
 tar -xzf HTK-3.4.1.tar.gz
--- a/experiments/scripts/aimc/build_aimc.sh	Tue Sep 07 01:14:22 2010 +0000
+++ b/experiments/scripts/aimc/build_aimc.sh	Mon Sep 13 18:34:23 2010 +0000
@@ -6,7 +6,7 @@
 AIMC_DIR=/mnt/experiments/aimc
 
 sudo mkdir -p $AIMC_DIR
-sudo chown ubuntu $AIMC_DIR
+sudo chown `whoami` $AIMC_DIR
 cd $AIMC_DIR
 svn checkout http://aimc.googlecode.com/svn/trunk/ aimc-read-only
 cd aimc-read-only
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experiments/scripts/cnbh-syllables/results_plotting/gen_results.py	Mon Sep 13 18:34:23 2010 +0000
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+gen_results.py
+
+Created by Thomas Walters on 2010-09-12.
+"""
+
+import sys
+import getopt
+import re
+
+
+help_message = '''
+Generate a file containing all the results for a run of the
+syllable recognition experiment. Expected input is a 'misclassificaions'
+file of the type generated by run_test_instance.sh, along with the locations
+of files containing the training talkers and all the talkers that the system
+was tested on.
+
+Arguments:
+-i --input_file
+-t --train_talkers
+-s --test_talkers
+-o --output_filename
+-c --value_count
+-p --spoke_pattern
+'''
+
+class Usage(Exception):
+  def __init__(self, msg):
+    self.msg = msg
+
+
+def main(argv=None):
+  if argv is None:
+    argv = sys.argv
+  try:
+    try:
+      opts, args = getopt.getopt(argv[1:], "hi:t:s:o:c:p:v",
+                                 ["help", "input_file=", "train_talkers=",
+                                  "test_talkers=", "output_filename=",
+                                  "value_count=", "spoke_pattern="])
+    except getopt.error, msg:
+      raise Usage(msg)
+  
+    # defaults
+    input_file = "misclassified_syllables_iteration_15"
+    train_talkers = "training_talkers"
+    test_talkers = "testing_talkers"
+    output_filename = "results.txt"
+    total_value_count = 185
+    spoke_pattern_file = "spoke_pattern.txt"
+    
+    # option processing
+    for option, value in opts:
+      if option == "-v":
+        verbose = True
+      if option in ("-h", "--help"):
+        raise Usage(help_message)
+      if option in ("-i", "--input_file"):
+        input_file = value
+      if option in ("-t", "--train_talkers"):
+        train_talkers = value
+      if option in ("-s", "--test_talkers"):
+        test_talkers = value
+      if option in ("-c", "--value_count"):
+        total_value_count = int(value)
+      if option in ("-p", "--spoke_pattern_file"):
+        spoke_pattern_file = value
+  
+  except Usage, err:
+    print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
+    print >> sys.stderr, "\t for help use --help"
+    return 2
+    
+  results = dict()
+  f = open(input_file, 'r')
+  for line in f:
+    values = line.strip().split(' ')
+    results[values[1]]=100*(1-float(values[0])/total_value_count)
+ 	
+  f = open(test_talkers, 'r')
+  test_talkers_list = f.readlines()
+  f.close()
+
+  f = open(train_talkers, 'r')
+  train_talkers_list = f.readlines()
+  f.close()
+
+  spoke_pattern = []
+  f = open(spoke_pattern_file, 'r')
+  for line in f:
+	spoke_pattern.append(line.strip().split(' '))
+
+  all_talkers_list = []
+  all_talkers_list.extend(train_talkers_list)
+  all_talkers_list.extend(test_talkers_list)
+
+  # Here I make the rather rash assumption that the model was tested on all talkers
+  # this should be true if the training and testing was done using my scripts.
+  for t in all_talkers_list:
+	results.setdefault(t.strip(), 100.0)
+
+  total_score = 0.0
+  element_count = 0
+  for t in train_talkers_list:
+	total_score += results[t.strip()]
+	element_count += 1
+  score = total_score / element_count
+  print ("# Score on training talkers: %.1f%%" % score)
+
+  total_score = 0.0
+  element_count = 0
+  for t in all_talkers_list:
+	total_score += results[t.strip()]
+	element_count += 1
+  score = total_score / element_count
+  print ("# Score on all talkers: %.1f%%" % score)
+
+  total_score = 0.0
+  element_count = 0
+  for t in test_talkers_list:
+	total_score += results[t.strip()]
+	element_count += 1
+  score = total_score / element_count
+  print ("# Score on test talkers: %.1f%%" % score)
+
+  score = results[spoke_pattern[0][0]]
+  print ("# Score on central talker: %.1f" % score)
+
+  for s in xrange(1,9):
+	print ("# Results for spoke %d" % s)
+	for p in xrange(0, 7):
+	  score = results[spoke_pattern[s][p]]
+	  m = re.match('(.*)p(.*)s', spoke_pattern[s][p])
+	  gpr = m.group(1)
+	  vtl=m.group(2)
+	  print ("%s %s %s" % (gpr, vtl, score))
+
+if __name__ == "__main__":
+  sys.exit(main())
--- a/experiments/scripts/cnbh-syllables/results_plotting/plot_all_results.m	Tue Sep 07 01:14:22 2010 +0000
+++ b/experiments/scripts/cnbh-syllables/results_plotting/plot_all_results.m	Mon Sep 13 18:34:23 2010 +0000
@@ -3,16 +3,21 @@
 % Nick Fyson and Martin Vestergaard.
 % Copyright 2009 University of Cambridge
 % Author: Tom Walters <tcw24@cam>
-function plot_all_results(exp_path)
+function plot_all_results(exp_path, iteration, plot_end_numbers)
+
+plot_numbers = true;
+if nargin < 3
+  plot_end_numbers = false;
+end
 
 % Load the results from the experimental directory
-load([exp_path 'misclassified.txt_iter15']); 
+misclassified = load([exp_path 'misclassified_syllables_iteration_' num2str(iteration)]); 
 
 % The total number of syllables in the CNBH syllable database 
 num_points = 185;
 target_VTL = 15;
 
-misclassified(:, 1) = 1 - misclassified(:, 1) / num_points; %#ok<NODEF>
+misclassified(:, 1) = 1 - misclassified(:, 1) / num_points;
 
 % The individual data points are plotted as spheres
 sphere_size_x = 1.2;
@@ -61,20 +66,30 @@
   y_pos_2 = spokes{i}(j, 2);
   z_pos_2 = results{i}(j);
   
-  text(x_pos + 0.3*(x_pos - x_pos_2), y_pos + 0.3*(y_pos - y_pos_2), z_pos + 0.3*(z_pos - z_pos_2) , [num2str(results{i}(j), 3) '%']);
+  j=1;
+  
+  if (~plot_numbers && plot_end_numbers)
+    text(x_pos + 0.3*(x_pos - x_pos_2), y_pos + 0.3*(y_pos - y_pos_2), z_pos + 0.3*(z_pos - z_pos_2) , [num2str(results{i}(j), 3) '%']);
+  end
   for j = 1:length(spokes{i})
-    [X Y Z] = sphere(10);
-    X = sphere_size_x.*X + spokes{i}(j,1);
-    Y = sphere_size_y.*Y + spokes{i}(j,2);
-    Z = sphere_size_z.*Z + results{i}(j);
-    % C = zeros(size(X));
-    plot3([spokes{i}(j, 1) spokes{i}(j, 1)], ...
-          [spokes{i}(j, 2),spokes{i}(j, 2)], [0 results{i}(j)], '-k.', ...
-          'LineWidth', 1, 'Color', [0.8 0.8 0.8]);
-    surf(X, Y, Z, ones(size(Z)) .* (results{i}(j)), 'LineStyle', 'none');
+	if (plot_numbers)
+	  text(spokes{i}(j,1), spokes{i}(j,2), results{i}(j), [num2str(results{i}(j), 3) '%']);
+	else
+      [X Y Z] = sphere(10);
+      X = sphere_size_x.*X + spokes{i}(j,1);
+      Y = sphere_size_y.*Y + spokes{i}(j,2);
+      Z = sphere_size_z.*Z + results{i}(j);
+      % C = zeros(size(X));
+      plot3([spokes{i}(j, 1) spokes{i}(j, 1)], ...
+            [spokes{i}(j, 2),spokes{i}(j, 2)], [0 results{i}(j)], '-k.', ...
+            'LineWidth', 1, 'Color', [0.8 0.8 0.8]);
+      surf(X, Y, Z, ones(size(Z)) .* (results{i}(j)), 'LineStyle', 'none');
+    end
   end
-  plot3(spokes{i}(:,1), spokes{i}(:,2), results{i}(:), '-', 'LineWidth', 2, ...
-       'Color', [0.2 0.2 0.2]);
+  if (~plot_numbers)
+    plot3(spokes{i}(:,1), spokes{i}(:,2), results{i}(:), '-', 'LineWidth', 2, ...
+         'Color', [0.2 0.2 0.2]);
+  end
 end
 % Plot a zero-sized sphere at zero to get the autoscaling of the colour bar
 % correct
@@ -99,8 +114,9 @@
 set(axes1, 'YDir', 'reverse');
 set(axes1, 'ZTick', [0 20 40 60 80 100]);
 hold('all');
-print('-depsc', 'results_plot.eps');
-!open results_plot.eps
+%print('-depsc', [exp_path 'results_plot_iteration_' num2str(iteration) '.eps']);
+%   saveas(gcf, [exp_path 'results_plot_iteration_' num2str(iteration) '.fig']);
+%!open results_plot.eps
 
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experiments/scripts/cnbh-syllables/results_plotting/spider_plot.py	Mon Sep 13 18:34:23 2010 +0000
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+"""
+spider_plot.py
+
+Created by Thomas Walters on 2010-09-12.
+Copyright 2010 Google. All rights reserved.
+"""
+
+import numpy as np
+import pylab as p
+import mpl_toolkits.mplot3d.axes3d as p3
+import matplotlib as mpl
+from matplotlib import cm
+import matplotlib.ticker as ticker 
+
+total_value_count=185
+
+central_vtl=15
+central_vtl_scaling=112.32
+
+# Read in a file with lines in the form
+# Pitch Scale Percentage
+xs=[]
+ys=[]
+zs=[]
+f = open('plottable_results.txt', 'r')
+for line in f:
+  if line[0] != "#":
+    values = line.strip().split(' ')
+    xs.append(central_vtl*central_vtl_scaling/float(values[1]))
+    ys.append(float(values[0]))
+    zs.append(float(values[2]))
+
+
+# Define a tiny sphere, centered on the origin, which
+# we'll shift to the desired position. 
+u=np.r_[0:2*np.pi:50j]
+v=np.r_[0:np.pi:50j]
+sx=0.01*np.outer(np.cos(u),np.sin(v))
+sy=0.01*np.outer(np.sin(u),np.sin(v))
+sz=2.5*np.outer(np.ones(np.size(u)),np.cos(v))
+
+fig=p.figure()
+ax = p3.Axes3D(fig, azim=-80, elev=60)
+
+colormap = cm.get_cmap('jet', 100)
+
+# Note: here I fake out the lack of proper logarihmic scales on 3D axes in
+# matplotlib by just plotting log values on a linear scale and renaming
+# the labels.
+# (This doesn't work: ax.w_yaxis.set_scale('log') ax.w_xaxis.set_scale('log'))
+
+# Plot the values seven at a time as dark lines.
+# These are the individual spokes of the spoke pattern.
+n=7
+for i in xrange(0,8):
+  ax.plot(np.log(xs[i*n:(i+1)*n]), np.log(ys[i*n:(i+1)*n]), zs[i*n:(i+1)*n], color=[0,0,0])
+
+for x,y,z in zip(xs,ys,zs):
+  ax.plot(np.log([x, x]), np.log([y, y]), [z, 0], color=[0.8,0.8,0.8])
+  ax.plot_surface(sx+np.log(x),sy+np.log(y),sz+z, color=colormap(int(z)), linewidth=0)
+
+ax.set_ylabel('GPR/Hz')
+ax.set_xlabel('VTL/cm')
+ax.set_zlabel('Percent correct')
+ax.set_ylim3d(np.log([131,225]))
+ax.set_xlim3d(np.log([9.9, 22.1]))
+ax.set_zlim3d([-1, 101])
+ax.w_zaxis.set_major_locator(ticker.FixedLocator([0, 20, 40, 60, 80, 100]))
+
+ax.w_xaxis.set_major_locator(ticker.FixedLocator(np.log([10,15,22])))
+ax.w_xaxis.set_ticklabels(['10', '15', '22'])
+ax.w_yaxis.set_major_locator(ticker.FixedLocator(np.log([132, 172, 224])))
+ax.w_yaxis.set_ticklabels(['132', '172', '224'])
+
+#for a in ax.w_xaxis.get_ticklines()+ax.w_xaxis.get_ticklabels(): 
+#    a.set_visible(False) 
+
+#for a in ax.w_yaxis.get_ticklines()+ax.w_yaxis.get_ticklabels(): 
+#    a.set_visible(False) 
+
+
+#p.show()
+p.savefig('results.png')
+
--- a/experiments/scripts/cnbh-syllables/run_training_and_testing/run_test_instance.sh	Tue Sep 07 01:14:22 2010 +0000
+++ b/experiments/scripts/cnbh-syllables/run_training_and_testing/run_test_instance.sh	Mon Sep 13 18:34:23 2010 +0000
@@ -88,5 +88,6 @@
   HResults -e "???" ${SILENCE} -I $TEST_MLF $WORKING_DIRECTORY/$SYLLIST_COMPLETE $WORKING_DIRECTORY/$hmm_type/$RECOUT
   HResults -p -t -e "???" ${SILENCE} \
     -I $TEST_MLF $WORKING_DIRECTORY/$SYLLIST_COMPLETE $WORKING_DIRECTORY/$hmm_type/$RECOUT > $WORKING_DIRECTORY/$hmm_type/${RESULTS_FILE}_iteration_$iter
+  # Count the number of instances of each talker appearing in the list of errors.
   grep Aligned $WORKING_DIRECTORY/$hmm_type/${RESULTS_FILE}_iteration_$iter| sed -E "s/.*\/..\/([a-z]{2})([0-9]{2,3}\.[0-9])p([0-9]{2,3}\.[0-9])s.*/\2 \3/" | sort | uniq -c > $WORKING_DIRECTORY/$hmm_type/${MISCLASSIFIED}_iteration_$iter
 done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/gen_spoke_points/spoke_pattern.txt	Mon Sep 13 18:34:23 2010 +0000
@@ -0,0 +1,9 @@
+171.7p112.3s
+137.0p104.3s 145.5p106.3s 153.0p108.1s 159.5p109.6s 164.7p110.8s 168.6p111.6s 170.9p112.2s
+151.6p83.9s 156.7p90.6s 161.1p96.8s 164.9p102.1s 167.8p106.5s 170.0p109.7s 171.3p111.7s
+180.4p80.1s 178.1p87.6s 176.1p94.5s 174.5p100.6s 173.3p105.6s 172.4p109.3s 171.9p111.5s
+208.6p93.2s 198.1p97.9s 189.6p102.1s 183.0p105.7s 178.0p108.5s 174.5p110.6s 172.4p111.9s
+215.2p121.0s 202.7p118.6s 192.7p116.7s 184.8p115.1s 179.0p113.9s 174.9p113.0s 172.5p112.5s
+194.5p150.4s 188.1p139.2s 183.0p130.4s 178.8p123.6s 175.7p118.5s 173.5p115.0s 172.1p113.0s
+163.4p157.6s 165.6p144.0s 167.4p133.5s 168.9p125.4s 170.1p119.5s 171.0p115.5s 171.5p113.1s
+141.3p135.4s 148.8p128.8s 155.5p123.5s 161.1p119.4s 165.7p116.2s 169.0p114.0s 171.0p112.8s
--- a/experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/generate_train_test_lists.sh	Tue Sep 07 01:14:22 2010 +0000
+++ b/experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/generate_train_test_lists.sh	Mon Sep 13 18:34:23 2010 +0000
@@ -33,6 +33,11 @@
     $BASEDIR/train_on_extrema.sh $WORK/training_talkers $WORK/testing_talkers
 fi
 
+# In general, we want to do our testing on all the talkers (training talkers and
+# testing talkers) so the train and test talkers are combined here to make a single
+# testing set.
+cat $WORK/training_talkers $WORK/testing_talkers > $WORK/all_talkers
+
 # The vowels and consonants that make up the CNBH database
 VOWELS="a e i o u"
 CONSONANTS="b d f g h k l m n p r s t v w x y z"
@@ -83,7 +88,7 @@
     echo "$SILENCE" >&4
     echo "." >&4
   done
-  for speaker in $(cat $WORK/testing_talkers); do
+  for speaker in $(cat $WORK/all_talkers); do
     DEST_FILENAME=$FEATURES_DIR/$syllable/${syllable}${speaker} 
     echo "'\"${DEST_FILENAME}.lab\"'" >&6
     echo "$SILENCE" >&6
@@ -111,7 +116,7 @@
     DEST_FILENAME=$FEATURES_DIR/$syllable/${syllable}${speaker}
     echo "'${DEST_FILENAME}.${FEATURE_NAME}'" >&7
   done
-  for speaker in $(cat $WORK/testing_talkers); do
+  for speaker in $(cat $WORK/all_talkers); do
     DEST_FILENAME=$FEATURES_DIR/$syllable/${syllable}${speaker}
       echo "'${DEST_FILENAME}.${FEATURE_NAME}'" >&8
   done
@@ -120,4 +125,7 @@
 exec 8>&-
 
 rm $WORK/${SYLLIST}
-rm $WORK/training_talkers $WORK/testing_talkers
\ No newline at end of file
+# Note: don't delete 'all_talkers', 'training_talkers' or 'testing_talkers' because
+# they're used later by the plotting scripts.
+
+
--- a/experiments/scripts/master.sh	Tue Sep 07 01:14:22 2010 +0000
+++ b/experiments/scripts/master.sh	Mon Sep 13 18:34:23 2010 +0000
@@ -33,7 +33,7 @@
 
 if [ ! -e $SYLLABLES_DATABASE_TAR ]; then
   sudo mkdir -p `dirname $SYLLABLES_DATABASE_TAR`
-  sudo chown ubuntu `dirname $SYLLABLES_DATABASE_TAR`
+  sudo chown `whoami` `dirname $SYLLABLES_DATABASE_TAR`
   wget -O $SYLLABLES_DATABASE_TAR $SYLLABLES_DATABASE_URL
 fi
 
@@ -117,7 +117,7 @@
 done 
 
 sudo mkdir -p $HMMS_ROOT
-sudo chown ubuntu $HMMS_ROOT
+sudo chown `whoami` $HMMS_ROOT
 
 # Now run a bunch of experiments.
 # For each of the feature types, we want to run HMMs with a bunch of
--- a/experiments/scripts/setup_aws_instance.sh	Tue Sep 07 01:14:22 2010 +0000
+++ b/experiments/scripts/setup_aws_instance.sh	Mon Sep 13 18:34:23 2010 +0000
@@ -2,6 +2,7 @@
 # Run ami-2fc2e95b (32 bit) or ami-05c2e971 (64 bit) in eu-west zone 
 # ec2-run-instances  --user-data-file ec2_user_data.sh --key tom_eu_west --instance-type m1.small --instance-count 1 --region eu-west-1 --availability-zone eu-west-1b ami-2fc2e95b
 # ec2-run-instances --user-data-file ec2_user_data.sh --key tom_eu_west --instance-type c1.xlarge --instance-count 1 --region eu-west-1 --availability-zone eu-west-1b ami-05c2e971
+su ubuntu
 sudo apt-get -y update
 sudo apt-get -y install bc subversion scons pkg-config libsndfile1-dev build-essential libboost-dev python sox
 
@@ -9,9 +10,9 @@
 sudo apt-get -y install libc6-dev-i386
 
 sudo mkdir -p /mnt/aimc
-sudo chown ubuntu /mnt/aimc
+sudo chown `whoami` /mnt/aimc
 sudo mkdir -p /mnt/log
-sudo chown ubuntu /mnt/log
+sudo chown `whoami` /mnt/log
 cd /mnt/aimc
 svn checkout http://aimc.googlecode.com/svn/trunk/ aimc-read-only
 cd aimc-read-only/experiments/scripts/
--- a/scripts/aws_prepare.sh	Tue Sep 07 01:14:22 2010 +0000
+++ b/scripts/aws_prepare.sh	Mon Sep 13 18:34:23 2010 +0000
@@ -11,7 +11,7 @@
 su ubuntu
 cd /mnt/
 sudo mkdir work
-sudo chown ubuntu work
+sudo chown `whoami` work
 cd work
 svn checkout http://aimc.googlecode.com/svn/trunk/ aimc-read-only
 cd aimc-read-only