# HG changeset patch # User tomwalters # Date 1284402863 0 # Node ID ae195c41c7bd48ce781414bda932fc2d891753ce # Parent be3bbd8b3fcda9f106f5d80d5fe2b9ce0cf232de - Python results plotting (finally). - Proper results reporting script. - Test on ALL talkers. The results script then generates a summary based on all the various subsets. - Fixed chown users (hopefully sudos to be deleted entirely soon) - More... diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/HTK/install_htk.sh --- a/experiments/scripts/HTK/install_htk.sh Tue Sep 07 01:14:22 2010 +0000 +++ b/experiments/scripts/HTK/install_htk.sh Mon Sep 13 18:34:23 2010 +0000 @@ -3,7 +3,7 @@ set -u if [ ! -e /mnt/experiments/htk/.htk_installed_success ]; then sudo mkdir -p /mnt/experiments/htk -sudo chown ubuntu /mnt/experiments/htk +sudo chown `whoami` /mnt/experiments/htk cd /mnt/experiments/htk wget --user $HTK_USERNAME --password $HTK_PASSWORD http://htk.eng.cam.ac.uk/ftp/software/HTK-3.4.1.tar.gz tar -xzf HTK-3.4.1.tar.gz diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/aimc/build_aimc.sh --- a/experiments/scripts/aimc/build_aimc.sh Tue Sep 07 01:14:22 2010 +0000 +++ b/experiments/scripts/aimc/build_aimc.sh Mon Sep 13 18:34:23 2010 +0000 @@ -6,7 +6,7 @@ AIMC_DIR=/mnt/experiments/aimc sudo mkdir -p $AIMC_DIR -sudo chown ubuntu $AIMC_DIR +sudo chown `whoami` $AIMC_DIR cd $AIMC_DIR svn checkout http://aimc.googlecode.com/svn/trunk/ aimc-read-only cd aimc-read-only diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/cnbh-syllables/results_plotting/gen_results.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/experiments/scripts/cnbh-syllables/results_plotting/gen_results.py Mon Sep 13 18:34:23 2010 +0000 @@ -0,0 +1,142 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +gen_results.py + +Created by Thomas Walters on 2010-09-12. +""" + +import sys +import getopt +import re + + +help_message = ''' +Generate a file containing all the results for a run of the +syllable recognition experiment. Expected input is a 'misclassificaions' +file of the type generated by run_test_instance.sh, along with the locations +of files containing the training talkers and all the talkers that the system +was tested on. + +Arguments: +-i --input_file +-t --train_talkers +-s --test_talkers +-o --output_filename +-c --value_count +-p --spoke_pattern +''' + +class Usage(Exception): + def __init__(self, msg): + self.msg = msg + + +def main(argv=None): + if argv is None: + argv = sys.argv + try: + try: + opts, args = getopt.getopt(argv[1:], "hi:t:s:o:c:p:v", + ["help", "input_file=", "train_talkers=", + "test_talkers=", "output_filename=", + "value_count=", "spoke_pattern="]) + except getopt.error, msg: + raise Usage(msg) + + # defaults + input_file = "misclassified_syllables_iteration_15" + train_talkers = "training_talkers" + test_talkers = "testing_talkers" + output_filename = "results.txt" + total_value_count = 185 + spoke_pattern_file = "spoke_pattern.txt" + + # option processing + for option, value in opts: + if option == "-v": + verbose = True + if option in ("-h", "--help"): + raise Usage(help_message) + if option in ("-i", "--input_file"): + input_file = value + if option in ("-t", "--train_talkers"): + train_talkers = value + if option in ("-s", "--test_talkers"): + test_talkers = value + if option in ("-c", "--value_count"): + total_value_count = int(value) + if option in ("-p", "--spoke_pattern_file"): + spoke_pattern_file = value + + except Usage, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + print >> sys.stderr, "\t for help use --help" + return 2 + + results = dict() + f = open(input_file, 'r') + for line in f: + values = line.strip().split(' ') + results[values[1]]=100*(1-float(values[0])/total_value_count) + + f = open(test_talkers, 'r') + test_talkers_list = f.readlines() + f.close() + + f = open(train_talkers, 'r') + train_talkers_list = f.readlines() + f.close() + + spoke_pattern = [] + f = open(spoke_pattern_file, 'r') + for line in f: + spoke_pattern.append(line.strip().split(' ')) + + all_talkers_list = [] + all_talkers_list.extend(train_talkers_list) + all_talkers_list.extend(test_talkers_list) + + # Here I make the rather rash assumption that the model was tested on all talkers + # this should be true if the training and testing was done using my scripts. + for t in all_talkers_list: + results.setdefault(t.strip(), 100.0) + + total_score = 0.0 + element_count = 0 + for t in train_talkers_list: + total_score += results[t.strip()] + element_count += 1 + score = total_score / element_count + print ("# Score on training talkers: %.1f%%" % score) + + total_score = 0.0 + element_count = 0 + for t in all_talkers_list: + total_score += results[t.strip()] + element_count += 1 + score = total_score / element_count + print ("# Score on all talkers: %.1f%%" % score) + + total_score = 0.0 + element_count = 0 + for t in test_talkers_list: + total_score += results[t.strip()] + element_count += 1 + score = total_score / element_count + print ("# Score on test talkers: %.1f%%" % score) + + score = results[spoke_pattern[0][0]] + print ("# Score on central talker: %.1f" % score) + + for s in xrange(1,9): + print ("# Results for spoke %d" % s) + for p in xrange(0, 7): + score = results[spoke_pattern[s][p]] + m = re.match('(.*)p(.*)s', spoke_pattern[s][p]) + gpr = m.group(1) + vtl=m.group(2) + print ("%s %s %s" % (gpr, vtl, score)) + +if __name__ == "__main__": + sys.exit(main()) diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/cnbh-syllables/results_plotting/plot_all_results.m --- a/experiments/scripts/cnbh-syllables/results_plotting/plot_all_results.m Tue Sep 07 01:14:22 2010 +0000 +++ b/experiments/scripts/cnbh-syllables/results_plotting/plot_all_results.m Mon Sep 13 18:34:23 2010 +0000 @@ -3,16 +3,21 @@ % Nick Fyson and Martin Vestergaard. % Copyright 2009 University of Cambridge % Author: Tom Walters -function plot_all_results(exp_path) +function plot_all_results(exp_path, iteration, plot_end_numbers) + +plot_numbers = true; +if nargin < 3 + plot_end_numbers = false; +end % Load the results from the experimental directory -load([exp_path 'misclassified.txt_iter15']); +misclassified = load([exp_path 'misclassified_syllables_iteration_' num2str(iteration)]); % The total number of syllables in the CNBH syllable database num_points = 185; target_VTL = 15; -misclassified(:, 1) = 1 - misclassified(:, 1) / num_points; %#ok +misclassified(:, 1) = 1 - misclassified(:, 1) / num_points; % The individual data points are plotted as spheres sphere_size_x = 1.2; @@ -61,20 +66,30 @@ y_pos_2 = spokes{i}(j, 2); z_pos_2 = results{i}(j); - text(x_pos + 0.3*(x_pos - x_pos_2), y_pos + 0.3*(y_pos - y_pos_2), z_pos + 0.3*(z_pos - z_pos_2) , [num2str(results{i}(j), 3) '%']); + j=1; + + if (~plot_numbers && plot_end_numbers) + text(x_pos + 0.3*(x_pos - x_pos_2), y_pos + 0.3*(y_pos - y_pos_2), z_pos + 0.3*(z_pos - z_pos_2) , [num2str(results{i}(j), 3) '%']); + end for j = 1:length(spokes{i}) - [X Y Z] = sphere(10); - X = sphere_size_x.*X + spokes{i}(j,1); - Y = sphere_size_y.*Y + spokes{i}(j,2); - Z = sphere_size_z.*Z + results{i}(j); - % C = zeros(size(X)); - plot3([spokes{i}(j, 1) spokes{i}(j, 1)], ... - [spokes{i}(j, 2),spokes{i}(j, 2)], [0 results{i}(j)], '-k.', ... - 'LineWidth', 1, 'Color', [0.8 0.8 0.8]); - surf(X, Y, Z, ones(size(Z)) .* (results{i}(j)), 'LineStyle', 'none'); + if (plot_numbers) + text(spokes{i}(j,1), spokes{i}(j,2), results{i}(j), [num2str(results{i}(j), 3) '%']); + else + [X Y Z] = sphere(10); + X = sphere_size_x.*X + spokes{i}(j,1); + Y = sphere_size_y.*Y + spokes{i}(j,2); + Z = sphere_size_z.*Z + results{i}(j); + % C = zeros(size(X)); + plot3([spokes{i}(j, 1) spokes{i}(j, 1)], ... + [spokes{i}(j, 2),spokes{i}(j, 2)], [0 results{i}(j)], '-k.', ... + 'LineWidth', 1, 'Color', [0.8 0.8 0.8]); + surf(X, Y, Z, ones(size(Z)) .* (results{i}(j)), 'LineStyle', 'none'); + end end - plot3(spokes{i}(:,1), spokes{i}(:,2), results{i}(:), '-', 'LineWidth', 2, ... - 'Color', [0.2 0.2 0.2]); + if (~plot_numbers) + plot3(spokes{i}(:,1), spokes{i}(:,2), results{i}(:), '-', 'LineWidth', 2, ... + 'Color', [0.2 0.2 0.2]); + end end % Plot a zero-sized sphere at zero to get the autoscaling of the colour bar % correct @@ -99,8 +114,9 @@ set(axes1, 'YDir', 'reverse'); set(axes1, 'ZTick', [0 20 40 60 80 100]); hold('all'); -print('-depsc', 'results_plot.eps'); -!open results_plot.eps +%print('-depsc', [exp_path 'results_plot_iteration_' num2str(iteration) '.eps']); +% saveas(gcf, [exp_path 'results_plot_iteration_' num2str(iteration) '.fig']); +%!open results_plot.eps diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/cnbh-syllables/results_plotting/spider_plot.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/experiments/scripts/cnbh-syllables/results_plotting/spider_plot.py Mon Sep 13 18:34:23 2010 +0000 @@ -0,0 +1,85 @@ +#!/usr/bin/env python +""" +spider_plot.py + +Created by Thomas Walters on 2010-09-12. +Copyright 2010 Google. All rights reserved. +""" + +import numpy as np +import pylab as p +import mpl_toolkits.mplot3d.axes3d as p3 +import matplotlib as mpl +from matplotlib import cm +import matplotlib.ticker as ticker + +total_value_count=185 + +central_vtl=15 +central_vtl_scaling=112.32 + +# Read in a file with lines in the form +# Pitch Scale Percentage +xs=[] +ys=[] +zs=[] +f = open('plottable_results.txt', 'r') +for line in f: + if line[0] != "#": + values = line.strip().split(' ') + xs.append(central_vtl*central_vtl_scaling/float(values[1])) + ys.append(float(values[0])) + zs.append(float(values[2])) + + +# Define a tiny sphere, centered on the origin, which +# we'll shift to the desired position. +u=np.r_[0:2*np.pi:50j] +v=np.r_[0:np.pi:50j] +sx=0.01*np.outer(np.cos(u),np.sin(v)) +sy=0.01*np.outer(np.sin(u),np.sin(v)) +sz=2.5*np.outer(np.ones(np.size(u)),np.cos(v)) + +fig=p.figure() +ax = p3.Axes3D(fig, azim=-80, elev=60) + +colormap = cm.get_cmap('jet', 100) + +# Note: here I fake out the lack of proper logarihmic scales on 3D axes in +# matplotlib by just plotting log values on a linear scale and renaming +# the labels. +# (This doesn't work: ax.w_yaxis.set_scale('log') ax.w_xaxis.set_scale('log')) + +# Plot the values seven at a time as dark lines. +# These are the individual spokes of the spoke pattern. +n=7 +for i in xrange(0,8): + ax.plot(np.log(xs[i*n:(i+1)*n]), np.log(ys[i*n:(i+1)*n]), zs[i*n:(i+1)*n], color=[0,0,0]) + +for x,y,z in zip(xs,ys,zs): + ax.plot(np.log([x, x]), np.log([y, y]), [z, 0], color=[0.8,0.8,0.8]) + ax.plot_surface(sx+np.log(x),sy+np.log(y),sz+z, color=colormap(int(z)), linewidth=0) + +ax.set_ylabel('GPR/Hz') +ax.set_xlabel('VTL/cm') +ax.set_zlabel('Percent correct') +ax.set_ylim3d(np.log([131,225])) +ax.set_xlim3d(np.log([9.9, 22.1])) +ax.set_zlim3d([-1, 101]) +ax.w_zaxis.set_major_locator(ticker.FixedLocator([0, 20, 40, 60, 80, 100])) + +ax.w_xaxis.set_major_locator(ticker.FixedLocator(np.log([10,15,22]))) +ax.w_xaxis.set_ticklabels(['10', '15', '22']) +ax.w_yaxis.set_major_locator(ticker.FixedLocator(np.log([132, 172, 224]))) +ax.w_yaxis.set_ticklabels(['132', '172', '224']) + +#for a in ax.w_xaxis.get_ticklines()+ax.w_xaxis.get_ticklabels(): +# a.set_visible(False) + +#for a in ax.w_yaxis.get_ticklines()+ax.w_yaxis.get_ticklabels(): +# a.set_visible(False) + + +#p.show() +p.savefig('results.png') + diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/cnbh-syllables/run_training_and_testing/run_test_instance.sh --- a/experiments/scripts/cnbh-syllables/run_training_and_testing/run_test_instance.sh Tue Sep 07 01:14:22 2010 +0000 +++ b/experiments/scripts/cnbh-syllables/run_training_and_testing/run_test_instance.sh Mon Sep 13 18:34:23 2010 +0000 @@ -88,5 +88,6 @@ HResults -e "???" ${SILENCE} -I $TEST_MLF $WORKING_DIRECTORY/$SYLLIST_COMPLETE $WORKING_DIRECTORY/$hmm_type/$RECOUT HResults -p -t -e "???" ${SILENCE} \ -I $TEST_MLF $WORKING_DIRECTORY/$SYLLIST_COMPLETE $WORKING_DIRECTORY/$hmm_type/$RECOUT > $WORKING_DIRECTORY/$hmm_type/${RESULTS_FILE}_iteration_$iter + # Count the number of instances of each talker appearing in the list of errors. grep Aligned $WORKING_DIRECTORY/$hmm_type/${RESULTS_FILE}_iteration_$iter| sed -E "s/.*\/..\/([a-z]{2})([0-9]{2,3}\.[0-9])p([0-9]{2,3}\.[0-9])s.*/\2 \3/" | sort | uniq -c > $WORKING_DIRECTORY/$hmm_type/${MISCLASSIFIED}_iteration_$iter done diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/gen_spoke_points/spoke_pattern.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/gen_spoke_points/spoke_pattern.txt Mon Sep 13 18:34:23 2010 +0000 @@ -0,0 +1,9 @@ +171.7p112.3s +137.0p104.3s 145.5p106.3s 153.0p108.1s 159.5p109.6s 164.7p110.8s 168.6p111.6s 170.9p112.2s +151.6p83.9s 156.7p90.6s 161.1p96.8s 164.9p102.1s 167.8p106.5s 170.0p109.7s 171.3p111.7s +180.4p80.1s 178.1p87.6s 176.1p94.5s 174.5p100.6s 173.3p105.6s 172.4p109.3s 171.9p111.5s +208.6p93.2s 198.1p97.9s 189.6p102.1s 183.0p105.7s 178.0p108.5s 174.5p110.6s 172.4p111.9s +215.2p121.0s 202.7p118.6s 192.7p116.7s 184.8p115.1s 179.0p113.9s 174.9p113.0s 172.5p112.5s +194.5p150.4s 188.1p139.2s 183.0p130.4s 178.8p123.6s 175.7p118.5s 173.5p115.0s 172.1p113.0s +163.4p157.6s 165.6p144.0s 167.4p133.5s 168.9p125.4s 170.1p119.5s 171.0p115.5s 171.5p113.1s +141.3p135.4s 148.8p128.8s 155.5p123.5s 161.1p119.4s 165.7p116.2s 169.0p114.0s 171.0p112.8s diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/generate_train_test_lists.sh --- a/experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/generate_train_test_lists.sh Tue Sep 07 01:14:22 2010 +0000 +++ b/experiments/scripts/cnbh-syllables/run_training_and_testing/train_test_sets/generate_train_test_lists.sh Mon Sep 13 18:34:23 2010 +0000 @@ -33,6 +33,11 @@ $BASEDIR/train_on_extrema.sh $WORK/training_talkers $WORK/testing_talkers fi +# In general, we want to do our testing on all the talkers (training talkers and +# testing talkers) so the train and test talkers are combined here to make a single +# testing set. +cat $WORK/training_talkers $WORK/testing_talkers > $WORK/all_talkers + # The vowels and consonants that make up the CNBH database VOWELS="a e i o u" CONSONANTS="b d f g h k l m n p r s t v w x y z" @@ -83,7 +88,7 @@ echo "$SILENCE" >&4 echo "." >&4 done - for speaker in $(cat $WORK/testing_talkers); do + for speaker in $(cat $WORK/all_talkers); do DEST_FILENAME=$FEATURES_DIR/$syllable/${syllable}${speaker} echo "'\"${DEST_FILENAME}.lab\"'" >&6 echo "$SILENCE" >&6 @@ -111,7 +116,7 @@ DEST_FILENAME=$FEATURES_DIR/$syllable/${syllable}${speaker} echo "'${DEST_FILENAME}.${FEATURE_NAME}'" >&7 done - for speaker in $(cat $WORK/testing_talkers); do + for speaker in $(cat $WORK/all_talkers); do DEST_FILENAME=$FEATURES_DIR/$syllable/${syllable}${speaker} echo "'${DEST_FILENAME}.${FEATURE_NAME}'" >&8 done @@ -120,4 +125,7 @@ exec 8>&- rm $WORK/${SYLLIST} -rm $WORK/training_talkers $WORK/testing_talkers \ No newline at end of file +# Note: don't delete 'all_talkers', 'training_talkers' or 'testing_talkers' because +# they're used later by the plotting scripts. + + diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/master.sh --- a/experiments/scripts/master.sh Tue Sep 07 01:14:22 2010 +0000 +++ b/experiments/scripts/master.sh Mon Sep 13 18:34:23 2010 +0000 @@ -33,7 +33,7 @@ if [ ! -e $SYLLABLES_DATABASE_TAR ]; then sudo mkdir -p `dirname $SYLLABLES_DATABASE_TAR` - sudo chown ubuntu `dirname $SYLLABLES_DATABASE_TAR` + sudo chown `whoami` `dirname $SYLLABLES_DATABASE_TAR` wget -O $SYLLABLES_DATABASE_TAR $SYLLABLES_DATABASE_URL fi @@ -117,7 +117,7 @@ done sudo mkdir -p $HMMS_ROOT -sudo chown ubuntu $HMMS_ROOT +sudo chown `whoami` $HMMS_ROOT # Now run a bunch of experiments. # For each of the feature types, we want to run HMMs with a bunch of diff -r be3bbd8b3fcd -r ae195c41c7bd experiments/scripts/setup_aws_instance.sh --- a/experiments/scripts/setup_aws_instance.sh Tue Sep 07 01:14:22 2010 +0000 +++ b/experiments/scripts/setup_aws_instance.sh Mon Sep 13 18:34:23 2010 +0000 @@ -2,6 +2,7 @@ # Run ami-2fc2e95b (32 bit) or ami-05c2e971 (64 bit) in eu-west zone # ec2-run-instances --user-data-file ec2_user_data.sh --key tom_eu_west --instance-type m1.small --instance-count 1 --region eu-west-1 --availability-zone eu-west-1b ami-2fc2e95b # ec2-run-instances --user-data-file ec2_user_data.sh --key tom_eu_west --instance-type c1.xlarge --instance-count 1 --region eu-west-1 --availability-zone eu-west-1b ami-05c2e971 +su ubuntu sudo apt-get -y update sudo apt-get -y install bc subversion scons pkg-config libsndfile1-dev build-essential libboost-dev python sox @@ -9,9 +10,9 @@ sudo apt-get -y install libc6-dev-i386 sudo mkdir -p /mnt/aimc -sudo chown ubuntu /mnt/aimc +sudo chown `whoami` /mnt/aimc sudo mkdir -p /mnt/log -sudo chown ubuntu /mnt/log +sudo chown `whoami` /mnt/log cd /mnt/aimc svn checkout http://aimc.googlecode.com/svn/trunk/ aimc-read-only cd aimc-read-only/experiments/scripts/ diff -r be3bbd8b3fcd -r ae195c41c7bd scripts/aws_prepare.sh --- a/scripts/aws_prepare.sh Tue Sep 07 01:14:22 2010 +0000 +++ b/scripts/aws_prepare.sh Mon Sep 13 18:34:23 2010 +0000 @@ -11,7 +11,7 @@ su ubuntu cd /mnt/ sudo mkdir work -sudo chown ubuntu work +sudo chown `whoami` work cd work svn checkout http://aimc.googlecode.com/svn/trunk/ aimc-read-only cd aimc-read-only