tomwalters@54
|
1 #!/bin/bash
|
tomwalters@54
|
2 # Script to run a single HMM train/test cycle with the given parameters.
|
tomwalters@54
|
3 # This script expects the following variables to be set
|
tomwalters@54
|
4 #
|
tomwalters@54
|
5 # total_hmm_states - total number of HMM states (including the 2 non-emitting states)
|
tomwalters@54
|
6 # mixture_components - number of components in the output distribution for each emitting state
|
tomwalters@54
|
7 # input_vector_size - number or elements in the input vector (normally 39 for MFCCs, 12 for AIM)
|
tomwalters@54
|
8 # feature_code - HTK feature type code for the features being used (normally MFCC_0_D_A for MFCCs and USER_E_D_A for AIM features)
|
tomwalters@54
|
9 # FEATURE_SUFFIXES - List of suffixes appended to the feature filenames. For the MFCCs this is just "mfc" but for the AIM feature, there can be multiple features generated from each run of AIMCopy
|
tomwalters@76
|
10 # WORKING_DIRECTORY - working directory
|
tomwalters@54
|
11 # SYLLIST_COMPLETE
|
tomwalters@54
|
12
|
tomwalters@103
|
13 set -e
|
tomwalters@103
|
14 set -u
|
tomwalters@103
|
15
|
tomwalters@54
|
16 # Filenames generated here
|
tomwalters@86
|
17
|
tomwalters@86
|
18 # This must be named 'proto' to keep HCompV happy.
|
tomwalters@86
|
19 HMMPROTO=proto
|
tomwalters@54
|
20 HHED_SCRIPT=hhed_change_components_script
|
tomwalters@54
|
21 RECOUT=recognition_output
|
tomwalters@54
|
22 RESULTS_FILE=results
|
tomwalters@54
|
23 MISCLASSIFIED=misclassified_syllables
|
tomwalters@54
|
24
|
tomwalters@54
|
25 # Filenames used here
|
tomwalters@95
|
26 #TRAIN_SCRIPT=training_script
|
tomwalters@95
|
27 #TEST_SCRIPT=testing_script
|
tomwalters@54
|
28 SYLLIST_COMPLETE=syllable_list_with_silence
|
tomwalters@95
|
29 #TEST_MLF=testing_master_label_file
|
tomwalters@54
|
30
|
tomwalters@89
|
31 DICT=dictionary
|
tomwalters@89
|
32 WDNET=word_network
|
tomwalters@89
|
33
|
tomwalters@76
|
34 SILENCE=sil
|
tomwalters@76
|
35
|
tomwalters@83
|
36 THIS_DIR=`dirname $0`
|
tomwalters@83
|
37
|
tomwalters@76
|
38 hmm_type=${total_hmm_states}_states_${mixture_components}_mixture_components
|
tomwalters@91
|
39 echo "HMM type: ${hmm_type}..."
|
tomwalters@91
|
40
|
tomwalters@101
|
41 if [ -e $WORKING_DIRECTORY/$hmm_type/.hmm_success ]; then
|
tomwalters@91
|
42 echo " already done"
|
tomwalters@91
|
43 return 0
|
tomwalters@91
|
44 fi
|
tomwalters@91
|
45
|
tomwalters@76
|
46 mkdir -p $WORKING_DIRECTORY/$hmm_type
|
tomwalters@54
|
47
|
tomwalters@54
|
48 echo "Creating HMM structure..."
|
tomwalters@83
|
49 $THIS_DIR/gen_hmmproto.py --input_size ${input_vector_size} --total_hmm_states ${total_hmm_states} --feature_type ${feature_code} > $WORKING_DIRECTORY/$hmm_type/$HMMPROTO
|
tomwalters@54
|
50
|
tomwalters@54
|
51 echo "Adding output mixture components..."
|
tomwalters@83
|
52 $THIS_DIR/gen_hhed_script.py --num_means ${mixture_components} --total_hmm_states ${total_hmm_states} > $WORKING_DIRECTORY/$hmm_type/$HHED_SCRIPT
|
tomwalters@54
|
53
|
tomwalters@54
|
54
|
tomwalters@54
|
55 echo "Training HMM..."
|
tomwalters@54
|
56 echo "Setting up prototype HMM..."
|
tomwalters@76
|
57 mkdir -p $WORKING_DIRECTORY/$hmm_type/hmm0
|
tomwalters@95
|
58 HCompV -C $WORKING_DIRECTORY/$HMMCONFIG -f 0.01 -m -S $TRAIN_SCRIPT -M $WORKING_DIRECTORY/$hmm_type/hmm0 $WORKING_DIRECTORY/$hmm_type/$HMMPROTO
|
tomwalters@54
|
59
|
tomwalters@54
|
60 echo "Generating HMM definitions..."
|
tomwalters@54
|
61 # Now take the prototype file from hmm0, and create the other HMM definitions
|
tomwalters@54
|
62 # from it
|
tomwalters@76
|
63 grep -A 9999 "<BEGINHMM>" $WORKING_DIRECTORY/$hmm_type/hmm0/$HMMPROTO > $WORKING_DIRECTORY/$hmm_type/hmm0/hmms
|
tomwalters@112
|
64 if [ -e $WORKING_DIRECTORY/$hmm_type/hmm0/hmmdefs ]; then
|
tomwalters@112
|
65 rm $WORKING_DIRECTORY/$hmm_type/hmm0/hmmdefs
|
tomwalters@112
|
66 done
|
tomwalters@87
|
67 for syllable in $(cat $WORKING_DIRECTORY/$SYLLIST_COMPLETE); do
|
tomwalters@87
|
68 echo "~h $syllable" >> $WORKING_DIRECTORY/$hmm_type/hmm0/hmmdefs
|
tomwalters@87
|
69 cat $WORKING_DIRECTORY/$hmm_type/hmm0/hmms >> $WORKING_DIRECTORY/$hmm_type/hmm0/hmmdefs
|
tomwalters@54
|
70 done
|
tomwalters@101
|
71
|
tomwalters@87
|
72 echo -n "~o<STREAMINFO> 1 ${input_vector_size}<VECSIZE> ${input_vector_size}<NULLD><${feature_code}><DIAGC>" > $WORKING_DIRECTORY/$hmm_type/hmm0/macros
|
tomwalters@54
|
73
|
tomwalters@87
|
74 cat $WORKING_DIRECTORY/$hmm_type/hmm0/vFloors >> $WORKING_DIRECTORY/$hmm_type/hmm0/macros
|
tomwalters@54
|
75
|
tomwalters@87
|
76 HHEd -H $WORKING_DIRECTORY/$hmm_type//hmm0/macros -H $WORKING_DIRECTORY/$hmm_type/hmm0/hmmdefs $WORKING_DIRECTORY/$hmm_type/$HHED_SCRIPT $WORKING_DIRECTORY/$SYLLIST_COMPLETE
|
tomwalters@54
|
77
|
tomwalters@54
|
78 for iter in $TRAINING_ITERATIONS_LIST; do
|
tomwalters@54
|
79 echo "Training iteration ${iter}..."
|
tomwalters@54
|
80 let "nextiter=$iter+1"
|
tomwalters@103
|
81 if [ ! -d $WORKING_DIRECTORY/$hmm_type/hmm$nextiter ]; then
|
tomwalters@103
|
82 mkdir $WORKING_DIRECTORY/$hmm_type/hmm$nextiter
|
tomwalters@103
|
83 HERest -C $WORKING_DIRECTORY/$HMMCONFIG -I $TRAIN_MLF \
|
tomwalters@103
|
84 -t 250.0 150.0 1000.0 -S $TRAIN_SCRIPT \
|
tomwalters@103
|
85 -H $WORKING_DIRECTORY/$hmm_type/hmm$iter/macros -H $WORKING_DIRECTORY/$hmm_type/hmm$iter/hmmdefs \
|
tomwalters@103
|
86 -M $WORKING_DIRECTORY/$hmm_type/hmm$nextiter $WORKING_DIRECTORY/$SYLLIST_COMPLETE
|
tomwalters@103
|
87 fi
|
tomwalters@54
|
88 done
|
tomwalters@54
|
89
|
tomwalters@54
|
90 for iter in $TESTING_ITERATIONS_LIST; do
|
tomwalters@54
|
91 echo "Testing iteration ${iter}..."
|
tomwalters@103
|
92 if [ ! -f $WORKING_DIRECTORY/$hmm_type/${RESULTS_FILE}_iteration_$iter ]; then
|
tomwalters@103
|
93 HVite -H $WORKING_DIRECTORY/$hmm_type/hmm$iter/macros -H $WORKING_DIRECTORY/$hmm_type/hmm$iter/hmmdefs \
|
tomwalters@103
|
94 -C $WORKING_DIRECTORY/$HMMCONFIG -S $TEST_SCRIPT -i $WORKING_DIRECTORY/$hmm_type/$RECOUT \
|
tomwalters@103
|
95 -w $WORKING_DIRECTORY/$WDNET -p 0.0 -s 5.0 $WORKING_DIRECTORY/$DICT $WORKING_DIRECTORY/$SYLLIST_COMPLETE
|
tomwalters@103
|
96 echo "Results from testing on iteration ${iter}..."
|
tomwalters@103
|
97 HResults -e "???" ${SILENCE} -I $TEST_MLF $WORKING_DIRECTORY/$SYLLIST_COMPLETE $WORKING_DIRECTORY/$hmm_type/$RECOUT
|
tomwalters@103
|
98 HResults -p -t -e "???" ${SILENCE} \
|
tomwalters@103
|
99 -I $TEST_MLF $WORKING_DIRECTORY/$SYLLIST_COMPLETE $WORKING_DIRECTORY/$hmm_type/$RECOUT > $WORKING_DIRECTORY/$hmm_type/${RESULTS_FILE}_iteration_$iter
|
tomwalters@103
|
100 fi
|
tomwalters@100
|
101 # Count the number of instances of each talker appearing in the list of errors.
|
tomwalters@106
|
102 grep Aligned $WORKING_DIRECTORY/$hmm_type/${RESULTS_FILE}_iteration_$iter| sed -E "s/.*\/..\/([a-z]{2})([0-9]{2,3}\.[0-9])p([0-9]{2,3}\.[0-9])s.*/\2p\3s/" | sort | uniq -c > $WORKING_DIRECTORY/$hmm_type/${MISCLASSIFIED}_iteration_$iter
|
tomwalters@107
|
103 python ./cnbh-syllables/results_plotting/gen_results.py --input_file=$WORKING_DIRECTORY/$hmm_type/${MISCLASSIFIED}_iteration_$iter --train_talkers=$WORKING_DIRECTORY/training_talkers --test_talkers=$WORKING_DIRECTORY/testing_talkers --spoke_pattern=$SPOKE_PATTERN_FILE > $WORKING_DIRECTORY/$hmm_type/results_iteration_${iter}.txt
|
tomwalters@110
|
104 python ./cnbh-syllables/results_plotting/spider_plot.py --input_file=$WORKING_DIRECTORY/$hmm_type/results_iteration_${iter}.txt --output_file=$WORKING_DIRECTORY/$hmm_type/results_iteration_${iter}.pdf
|
tomwalters@54
|
105 done
|
tomwalters@101
|
106 touch $WORKING_DIRECTORY/$hmm_type/.hmm_success
|
tomwalters@101
|
107
|
tomwalters@101
|
108
|