annotate scripts/HTKAimfeat.sh @ 31:fa06bfacf004

-Typo in file path
author tomwalters
date Wed, 24 Feb 2010 15:18:00 +0000
parents 5b1586a234f1
children
rev   line source
tomwalters@28 1 #!/bin/bash
tomwalters@28 2 # using getopts
tomwalters@28 3 #
tomwalters@28 4 # Train and test an HTK monophone model using AIM
tomwalters@28 5 # features and the CNBH syllable databse
tomwalters@28 6 #
tomwalters@28 7 # Copyright 2009-2010 University of Cambridge
tomwalters@28 8 # Author: Thomas Walters <tom@acousticscale.org>
tomwalters@28 9 # Based on the MATLAB scripts by Jess Monaghan and
tomwalters@28 10 # modelled on HTKTimit.sh from Cantab Research
tomwalters@28 11
tomwalters@28 12 skip_features=
tomwalters@28 13 skip_init=
tomwalters@28 14 matlab_plot=
tomwalters@28 15 while getopts 'fim' OPTION
tomwalters@28 16 do
tomwalters@28 17 case $OPTION in
tomwalters@28 18 f) skip_features=1
tomwalters@28 19 ;;
tomwalters@28 20 i) skip_init=1
tomwalters@28 21 ;;
tomwalters@28 22 m) matlab_plot=1
tomwalters@28 23 ;;
tomwalters@28 24 # b) bflag=1
tomwalters@28 25 # bval="$OPTARG"
tomwalters@28 26 # ;;
tomwalters@28 27 ?) printf "Usage: %s: [-f] [-i] [-m] args\n" $(basename $0) >&2
tomwalters@28 28 exit 2
tomwalters@28 29 ;;
tomwalters@28 30 esac
tomwalters@28 31 done
tomwalters@28 32 shift $(($OPTIND - 1))
tomwalters@28 33
tomwalters@28 34 # Machine list
tomwalters@28 35 USE_MULTIPLE_MACHINES=
tomwalters@28 36 MACHINE_LIST="db-xserve2 db-xserve3 db-xserve5 db-xserve6 db-xserve7 db-xserve8"
tomwalters@28 37 MACHINE_COUNT=`echo $MACHINE_LIST | wc -w | sed 's/ *//'`
tomwalters@28 38 # Cores per machine
tomwalters@28 39 MACHINE_CORES=4
tomwalters@28 40
tomwalters@28 41 # Set to true / 1 to enable MFCC features rather than AIM features
tomwalters@28 42 # (leave blank for AIM features)
tomwalters@28 43 MFCC_FEATURES=
tomwalters@28 44
tomwalters@28 45 # Source directory for all the sound files
tomwalters@31 46 SOUND_SOURCE="/media/sounds-database/cnbh-sounds"
tomwalters@28 47
tomwalters@28 48 # Location of the AIMCopy binary if not in the path
tomwalters@30 49 AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/"
tomwalters@28 50
tomwalters@28 51 # Location of HTK binaries if not in the path
tomwalters@30 52 HTK_PREFIX=""
tomwalters@28 53
tomwalters@28 54 # Names of various internal files and directories.
tomwalters@28 55 # Rename here if you don't like them for some reason.
tomwalters@28 56 SYLLIST=syls
tomwalters@28 57 SYLLIST_COMPLETE=syllist
tomwalters@28 58 GRAM=gram
tomwalters@28 59 DICT=dict
tomwalters@28 60 WDNET=wdnet
tomwalters@28 61 TRAIN_SPEAKERS=train_speakers
tomwalters@28 62 TEST_SPEAKERS=test_speakers
tomwalters@28 63 WORK_PREFIX=work
tomwalters@28 64 TRAIN_LIST=train.list
tomwalters@28 65 TEST_LIST=test.list
tomwalters@28 66 COMBINED_LIST=combined.list
tomwalters@28 67 TRAIN_MLF=train.mlf
tomwalters@28 68 TEST_MLF=test.mlf
tomwalters@28 69 TRAIN_SCRIPT=train.scp
tomwalters@28 70 TEST_SCRIPT=test.scp
tomwalters@28 71 FEATURES_DIR=features
tomwalters@28 72 AIMCOPY_CONFIG=aimcopy.cfg
tomwalters@28 73 AIMCOPY_LOG_TRAIN=aimcopy_train.log
tomwalters@28 74 AIMCOPY_LOG_TEST=aimcopy_test.log
tomwalters@28 75 HCOPY_CONFIG=hcopy.cfg
tomwalters@28 76 HMMCONFIG=hmmconfig
tomwalters@28 77 HMMPROTO=proto
tomwalters@28 78 RECOUT=recout.mlf
tomwalters@28 79 RESULTS_FILE=results.txt
tomwalters@28 80 MISCLASSIFIED=misclassified.txt
tomwalters@28 81 HHED_SCRIPT=cmdscript
tomwalters@28 82
tomwalters@28 83 # The vowels and consonants that make up the CNBH database
tomwalters@28 84 VOWELS="a e i o u"
tomwalters@28 85 CONSONANTS="b d f g h k l m n p r s t v w x y z"
tomwalters@28 86 SILENCE="sil"
tomwalters@28 87
tomwalters@28 88 WORK=${WORK_PREFIX}`echo $1 | tr -d ' '`
tomwalters@28 89 mkdir -p $WORK
tomwalters@28 90
tomwalters@28 91 # Make a copy of this script in the experimental directory
tomwalters@28 92 cp -p $0 $WORK
tomwalters@28 93
tomwalters@28 94 if [ "$skip_init" ]
tomwalters@28 95 then
tomwalters@28 96 echo "Skipping initialisation"
tomwalters@28 97 else
tomwalters@28 98 # Make the sets of VC, CV, and vowel only labels, plus silence and use them to
tomwalters@28 99 # generate the grammar, dictionary and list of syllables
tomwalters@28 100 echo "Generating grammar, dictionary and syllable list..."
tomwalters@28 101 echo -n '$word = ' > $WORK/$GRAM
tomwalters@28 102 FIRST=true;
tomwalters@28 103 for v in $VOWELS; do
tomwalters@28 104 echo $v$v >> $WORK/$SYLLIST.tmp
tomwalters@28 105 echo "$v$v [$v$v] $v$v" >> $WORK/$DICT.tmp
tomwalters@28 106 if $FIRST; then
tomwalters@28 107 echo -n "$v$v" >> $WORK/$GRAM
tomwalters@28 108 FIRST=false
tomwalters@28 109 else
tomwalters@28 110 echo -n " | $v$v" >> $WORK/$GRAM
tomwalters@28 111 fi
tomwalters@28 112 for c in $CONSONANTS; do
tomwalters@28 113 echo $v$c >> $WORK/$SYLLIST.tmp
tomwalters@28 114 echo "$v$c [$v$c] $v$c" >> $WORK/$DICT.tmp
tomwalters@28 115 echo -n " | $v$c" >> $WORK/$GRAM
tomwalters@28 116 echo $c$v >> $WORK/$SYLLIST.tmp
tomwalters@28 117 echo "$c$v [$c$v] $c$v" >> $WORK/$DICT.tmp
tomwalters@28 118 echo -n " | $c$v" >> $WORK/$GRAM
tomwalters@28 119 done
tomwalters@28 120 done
tomwalters@28 121 echo ';' >> $WORK/$GRAM
tomwalters@28 122
tomwalters@28 123 # Sort the syllable list and the dictionary and delete the
tomwalters@28 124 # temporary, unsorted version
tomwalters@28 125 sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST
tomwalters@28 126 rm $WORK/$SYLLIST.tmp
tomwalters@28 127 sort $WORK/$DICT.tmp > $WORK/$DICT
tomwalters@28 128 rm $WORK/$DICT.tmp
tomwalters@28 129
tomwalters@28 130 # Add silence to the end of the various files just generated
tomwalters@28 131 cp $WORK/$SYLLIST $WORK/$SYLLIST_COMPLETE
tomwalters@28 132 echo $SILENCE >> $WORK/$SYLLIST_COMPLETE
tomwalters@28 133 echo "end_$SILENCE [$SILENCE] $SILENCE" >> $WORK/$DICT
tomwalters@28 134 echo "start_$SILENCE [$SILENCE] $SILENCE" >> $WORK/$DICT
tomwalters@28 135 echo "( start_$SILENCE \$word end_$SILENCE )" >> $WORK/$GRAM
tomwalters@28 136
tomwalters@28 137 # Use HParse to parse the grammar into a wordnet
tomwalters@28 138 echo "Generating wordnet from grammar..."
tomwalters@28 139 ${HTK_PREFIX}HParse $WORK/$GRAM $WORK/$WDNET
tomwalters@28 140
tomwalters@28 141 # Generate a list of filenames from the spoke pattern
tomwalters@28 142 cat <<"EOF" > $WORK/$TRAIN_SPEAKERS
tomwalters@28 143 170.9p112.2s100.0t+000itd
tomwalters@28 144 171.0p112.8s100.0t+000itd
tomwalters@28 145 171.3p111.7s100.0t+000itd
tomwalters@28 146 171.5p113.1s100.0t+000itd
tomwalters@28 147 171.9p111.5s100.0t+000itd
tomwalters@28 148 172.1p113.0s100.0t+000itd
tomwalters@28 149 172.4p111.9s100.0t+000itd
tomwalters@28 150 172.5p112.5s100.0t+000itd
tomwalters@28 151 EOF
tomwalters@28 152
tomwalters@28 153 cat <<"EOF" > $WORK/$TEST_SPEAKERS
tomwalters@28 154 137.0p104.3s100.0t+000itd
tomwalters@28 155 141.3p135.4s100.0t+000itd
tomwalters@28 156 145.5p106.3s100.0t+000itd
tomwalters@28 157 148.8p128.8s100.0t+000itd
tomwalters@28 158 151.6p83.9s100.0t+000itd
tomwalters@28 159 153.0p108.1s100.0t+000itd
tomwalters@28 160 155.5p123.5s100.0t+000itd
tomwalters@28 161 156.7p90.6s100.0t+000itd
tomwalters@28 162 159.5p109.6s100.0t+000itd
tomwalters@28 163 161.1p119.4s100.0t+000itd
tomwalters@28 164 161.1p96.8s100.0t+000itd
tomwalters@28 165 163.4p157.6s100.0t+000itd
tomwalters@28 166 164.7p110.8s100.0t+000itd
tomwalters@28 167 164.9p102.1s100.0t+000itd
tomwalters@28 168 165.6p144.0s100.0t+000itd
tomwalters@28 169 165.7p116.2s100.0t+000itd
tomwalters@28 170 167.4p133.5s100.0t+000itd
tomwalters@28 171 167.8p106.5s100.0t+000itd
tomwalters@28 172 168.6p111.6s100.0t+000itd
tomwalters@28 173 168.9p125.4s100.0t+000itd
tomwalters@28 174 169.0p114.0s100.0t+000itd
tomwalters@28 175 170.0p109.7s100.0t+000itd
tomwalters@28 176 170.1p119.5s100.0t+000itd
tomwalters@28 177 171.0p115.5s100.0t+000itd
tomwalters@28 178 171.7p112.3s100.0t+000itd
tomwalters@28 179 172.4p109.3s100.0t+000itd
tomwalters@28 180 173.3p105.6s100.0t+000itd
tomwalters@28 181 173.5p115.0s100.0t+000itd
tomwalters@28 182 174.5p100.6s100.0t+000itd
tomwalters@28 183 174.5p110.6s100.0t+000itd
tomwalters@28 184 174.9p113.0s100.0t+000itd
tomwalters@28 185 175.7p118.5s100.0t+000itd
tomwalters@28 186 176.1p94.5s100.0t+000itd
tomwalters@28 187 178.0p108.5s100.0t+000itd
tomwalters@28 188 178.1p87.6s100.0t+000itd
tomwalters@28 189 178.8p123.6s100.0t+000itd
tomwalters@28 190 179.0p113.9s100.0t+000itd
tomwalters@28 191 180.4p80.1s100.0t+000itd
tomwalters@28 192 183.0p105.7s100.0t+000itd
tomwalters@28 193 183.0p130.4s100.0t+000itd
tomwalters@28 194 184.8p115.1s100.0t+000itd
tomwalters@28 195 188.1p139.2s100.0t+000itd
tomwalters@28 196 189.6p102.1s100.0t+000itd
tomwalters@28 197 192.7p116.7s100.0t+000itd
tomwalters@28 198 194.5p150.4s100.0t+000itd
tomwalters@28 199 198.1p97.9s100.0t+000itd
tomwalters@28 200 202.7p118.6s100.0t+000itd
tomwalters@28 201 208.6p93.2s100.0t+000itd
tomwalters@28 202 215.2p121.0s100.0t+000itd
tomwalters@28 203 EOF
tomwalters@28 204
tomwalters@28 205 # Construct the conversion scripts for AIMCopy (or HCopy) and
tomwalters@28 206 # the master label files for the train and test sets
tomwalters@28 207
tomwalters@28 208 echo "Generating train and test scripts and master label files..."
tomwalters@28 209 exec 4> $WORK/$TRAIN_MLF
tomwalters@28 210 exec 6> $WORK/$TEST_MLF
tomwalters@28 211 echo '#!MLF!#' >&4
tomwalters@28 212 echo '#!MLF!#' >&6
tomwalters@28 213 if [ -a $WORK/$TRAIN_LIST ]
tomwalters@28 214 then
tomwalters@28 215 rm $WORK/$TRAIN_LIST
tomwalters@28 216 fi
tomwalters@28 217 if [ -a $WORK/$TEST_LIST ]
tomwalters@28 218 then
tomwalters@28 219 rm $WORK/$TEST_LIST
tomwalters@28 220 fi
tomwalters@28 221 if [ -a $WORK/$TRAIN_SCRIPT ]
tomwalters@28 222 then
tomwalters@28 223 rm $WORK/$TRAIN_SCRIPT
tomwalters@28 224 fi
tomwalters@28 225 if [ -a $WORK/$TEST_SCRIPT ]
tomwalters@28 226 then
tomwalters@28 227 rm $WORK/$TEST_SCRIPT
tomwalters@28 228 fi
tomwalters@28 229 exec 3> $WORK/$TRAIN_LIST
tomwalters@28 230 exec 5> $WORK/$TEST_LIST
tomwalters@28 231 exec 7> $WORK/$TRAIN_SCRIPT
tomwalters@28 232 exec 8> $WORK/$TEST_SCRIPT
tomwalters@28 233 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@28 234 for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do
tomwalters@28 235 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@28 236 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@28 237 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3
tomwalters@28 238 echo "'${DEST_FILENAME}.htk'" >&7
tomwalters@28 239 echo "'\"${DEST_FILENAME}.lab\"'" >&4
tomwalters@28 240 echo "$SILENCE" >&4
tomwalters@28 241 echo $syllable >&4
tomwalters@28 242 echo "$SILENCE" >&4
tomwalters@28 243 echo "." >&4
tomwalters@28 244 done
tomwalters@28 245 for speaker in $(cat $WORK/$TEST_SPEAKERS); do
tomwalters@28 246 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@28 247 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@28 248 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5
tomwalters@28 249 echo "'${DEST_FILENAME}.htk'" >&8
tomwalters@28 250 echo "'\"${DEST_FILENAME}.lab\"'" >&6
tomwalters@28 251 echo "$SILENCE" >&6
tomwalters@28 252 echo $syllable >&6
tomwalters@28 253 echo "$SILENCE" >&6
tomwalters@28 254 echo "." >&6
tomwalters@28 255 done
tomwalters@28 256 done
tomwalters@28 257 exec 3>&-
tomwalters@28 258 exec 4>&-
tomwalters@28 259 exec 5>&-
tomwalters@28 260 exec 6>&-
tomwalters@28 261 exec 7>&-
tomwalters@28 262 exec 8>&-
tomwalters@28 263 fi
tomwalters@28 264
tomwalters@28 265 if [ "$skip_features" ]
tomwalters@28 266 then
tomwalters@28 267 echo "Skipping feature generation"
tomwalters@28 268 else
tomwalters@28 269 # Make the necessary directories for the computed features
tomwalters@28 270 echo "Making directory structure..."
tomwalters@28 271 mkdir $WORK/$FEATURES_DIR
tomwalters@28 272 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@28 273 mkdir $WORK/$FEATURES_DIR/$syllable
tomwalters@28 274 done
tomwalters@28 275
tomwalters@28 276 if [ "$MFCC_FEATURES" ]
tomwalters@28 277 then
tomwalters@28 278 # Write the HCopy config file
tomwalters@28 279 echo "Creating HCopy config file..."
tomwalters@28 280 cat <<"EOF" > $WORK/$HCOPY_CONFIG
tomwalters@28 281 # Coding parameters
tomwalters@28 282 SOURCEFORMAT= WAV
tomwalters@28 283 TARGETKIND = MFCC_0_D_A
tomwalters@28 284 TARGETRATE = 100000.0
tomwalters@28 285 SAVECOMPRESSED = T
tomwalters@28 286 SAVEWITHCRC = T
tomwalters@28 287 WINDOWSIZE = 250000.0
tomwalters@28 288 USEHAMMING = T
tomwalters@28 289 PREEMCOEF = 0.97
tomwalters@28 290 NUMCHANS = 200
tomwalters@28 291 CEPLIFTER = 22
tomwalters@28 292 NUMCEPS = 12
tomwalters@28 293 ENORMALISE = F
tomwalters@28 294 EOF
tomwalters@28 295 echo "Generating features for training..."
tomwalters@28 296 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $WORK/${TRAIN_LIST}
tomwalters@28 297
tomwalters@28 298 echo "Generating features for testing..."
tomwalters@28 299 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $WORK/${TEST_LIST}
tomwalters@28 300 else
tomwalters@28 301 # Write the AIMCopy config file
tomwalters@28 302 echo "Creating AIMCopy config file..."
tomwalters@28 303 cat <<"EOF" > $WORK/$AIMCOPY_CONFIG
tomwalters@28 304 input.buffersize=480
tomwalters@28 305 gtfb.channel_count=200
tomwalters@28 306 gtfb.min_frequency=86.0
tomwalters@28 307 gtfb.max_frequency=16000.0
tomwalters@28 308 nap.do_lowpass=true
tomwalters@28 309 nap.lowpass_cutoff=100.0
tomwalters@28 310 slice.temporal=false
tomwalters@28 311 slice.all=true
tomwalters@28 312 slice.normalize=true
tomwalters@28 313 EOF
tomwalters@28 314 if [ "$USE_MULTIPLE_MACHINES" ]
tomwalters@28 315 then
tomwalters@28 316 echo "Splitting data files..."
tomwalters@28 317 cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
tomwalters@28 318 total_cores=$(($MACHINE_COUNT*$MACHINE_CORES))
tomwalters@28 319 echo -n $total_cores
tomwalters@28 320 echo " cores available"
tomwalters@28 321 total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'`
tomwalters@28 322 echo -n $total_files
tomwalters@28 323 echo " files to process"
tomwalters@28 324 files_per_core=$(($total_files/$total_cores+1))
tomwalters@28 325 echo -n $files_per_core
tomwalters@28 326 echo " files per core"
tomwalters@28 327 split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list
tomwalters@28 328 splits=( $(ls $WORK/split_list*))
tomwalters@28 329 element=0
tomwalters@28 330 echo "Spawning tasks..."
tomwalters@28 331 for m in $MACHINE_LIST; do
tomwalters@28 332 for ((c=1;c<=$MACHINE_CORES;c+=1)); do
tomwalters@28 333 s=${splits[$element]}
tomwalters@28 334 echo "ssh $m \"cd HTK-AIM;${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s\" &"
tomwalters@28 335 #ssh $m "cd HTK-AIM;${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s" &
tomwalters@28 336 let element=element+1
tomwalters@28 337 done
tomwalters@28 338 done
tomwalters@28 339 echo "Waiting for tasks to complete..."
tomwalters@28 340 wait
tomwalters@28 341 else
tomwalters@28 342 echo "Generating features for training..."
tomwalters@28 343 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $WORK/${TRAIN_LIST} -D $WORK/$AIMCOPY_LOG_TRAIN
tomwalters@28 344
tomwalters@28 345 echo "Generating features for testing..."
tomwalters@28 346 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $WORK/${TEST_LIST} -D $WORK/$AIMCOPY_LOG_TEST
tomwalters@28 347 fi
tomwalters@28 348 fi
tomwalters@28 349 fi
tomwalters@28 350
tomwalters@28 351 if [ "$MFCC_FEATURES" ]
tomwalters@28 352 then
tomwalters@28 353 cat <<"EOF" > $WORK/$HMMCONFIG
tomwalters@28 354 # Coding parameters
tomwalters@28 355 SOURCEFORMAT= HTK
tomwalters@28 356 EOF
tomwalters@28 357 else
tomwalters@28 358 cat <<"EOF" > $WORK/$HMMCONFIG
tomwalters@28 359 # Coding parameters
tomwalters@28 360 SOURCEFORMAT= HTK
tomwalters@28 361 SOURCEKIND= USER_E
tomwalters@28 362 TARGETKIND = USER_E_D_A
tomwalters@28 363 EOF
tomwalters@28 364 fi
tomwalters@28 365
tomwalters@28 366 echo "Creating HMM structure..."
tomwalters@28 367 if [ "$MFCC_FEATURES" ]
tomwalters@28 368 then
tomwalters@28 369 cat <<"EOF" > $WORK/$HMMPROTO
tomwalters@28 370 ~o<VECSIZE> 39<NULLD><MFCC_0_D_A>
tomwalters@28 371 ~h "proto"
tomwalters@28 372 <BEGINHMM>
tomwalters@28 373 <NUMSTATES> 6
tomwalters@28 374 <State> 2
tomwalters@28 375 <Mean>39
tomwalters@28 376 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@28 377 <Variance> 39
tomwalters@28 378 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@28 379 <State> 3
tomwalters@28 380 <Mean>39
tomwalters@28 381 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@28 382 <Variance> 39
tomwalters@28 383 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@28 384 <State> 4
tomwalters@28 385 <Mean>39
tomwalters@28 386 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@28 387 <Variance> 39
tomwalters@28 388 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@28 389 <State> 5
tomwalters@28 390 <Mean>39
tomwalters@28 391 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@28 392 <Variance> 39
tomwalters@28 393 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@28 394
tomwalters@28 395 <TransP> 6
tomwalters@28 396 0.0 1.0 0.0 0.0 0.0 0.0
tomwalters@28 397 0.0 0.6 0.4 0.0 0.0 0.0
tomwalters@28 398 0.0 0.0 0.6 0.4 0.0 0.0
tomwalters@28 399 0.0 0.0 0.0 0.6 0.4 0.0
tomwalters@28 400 0.0 0.0 0.0 0.0 0.6 0.4
tomwalters@28 401 0.0 0.0 0.0 0.0 0.0 0.0
tomwalters@28 402 <EndHMM>
tomwalters@28 403 EOF
tomwalters@28 404 else
tomwalters@28 405 cat <<"EOF" > $WORK/$HMMPROTO
tomwalters@28 406 ~o<VECSIZE> 12<NULLD><USER_E_D_A>
tomwalters@28 407 ~h "proto"
tomwalters@28 408 <BEGINHMM>
tomwalters@28 409 <NUMSTATES> 4
tomwalters@28 410 <State> 2
tomwalters@28 411 <Mean>12
tomwalters@28 412 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@28 413 <Variance> 12
tomwalters@28 414 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@28 415 <State> 3
tomwalters@28 416 <Mean>12
tomwalters@28 417 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@28 418 <Variance> 12
tomwalters@28 419 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@28 420 <TransP> 4
tomwalters@28 421 0.0 1.0 0.0 0.0
tomwalters@28 422 0.0 0.6 0.4 0.0
tomwalters@28 423 0.0 0.0 0.6 0.4
tomwalters@28 424 0.0 0.0 0.0 0.0
tomwalters@28 425 <EndHMM>
tomwalters@28 426 EOF
tomwalters@28 427 fi
tomwalters@28 428
tomwalters@28 429
tomwalters@28 430 echo "Training HMM..."
tomwalters@28 431 echo "Setting up prototype HMM..."
tomwalters@28 432 mkdir $WORK/hmm0
tomwalters@28 433 ${HTK_PREFIX}HCompV -C $WORK/$HMMCONFIG -f 0.01 -m -S $WORK/$TRAIN_SCRIPT -M $WORK/hmm0 $WORK/$HMMPROTO
tomwalters@28 434
tomwalters@28 435 echo "Generating HMM definitions..."
tomwalters@28 436 # Now take the prototype file from hmm0, and create the other HMM definitions from it
tomwalters@28 437 grep -A 9999 "<BEGINHMM>" $WORK/hmm0/$HMMPROTO > $WORK/hmm0/hmms
tomwalters@28 438 for syllable in $(cat $WORK/$SYLLIST_COMPLETE); do
tomwalters@28 439 echo "~h $syllable" >> $WORK/hmm0/hmmdefs
tomwalters@28 440 cat $WORK/hmm0/hmms >> $WORK/hmm0/hmmdefs
tomwalters@28 441 done
tomwalters@28 442 if [ "$MFCC_FEATURES" ]
tomwalters@28 443 then
tomwalters@28 444 echo -n "~o<STREAMINFO> 1 39<VECSIZE> 39<NULLD><MFCC_0_D_A><DIAGC>" > $WORK/hmm0/macros
tomwalters@28 445 else
tomwalters@28 446 echo -n "~o<STREAMINFO> 1 12<VECSIZE> 12<NULLD><USER_E_D_A><DIAGC>" > $WORK/hmm0/macros
tomwalters@28 447 fi
tomwalters@28 448 cat $WORK/hmm0/vFloors >> $WORK/hmm0/macros
tomwalters@28 449
tomwalters@28 450 echo "Adding output mixture components..."
tomwalters@28 451 cat <<"EOF" > $WORK/$HHED_SCRIPT
tomwalters@28 452 MU 4 {*.state[2].mix} MU 4 {*.state[3].mix} MU 4 {*.state[4].mix} MU 4 {*.state[5].mix}
tomwalters@28 453 EOF
tomwalters@28 454
tomwalters@28 455 ${HTK_PREFIX}HHEd -H $WORK/hmm0/macros -H $WORK/hmm0/hmmdefs $WORK/$HHED_SCRIPT $WORK/$SYLLIST_COMPLETE
tomwalters@28 456
tomwalters@28 457 for iter in 0 1 2 3 4 5 6 7 8 9; do
tomwalters@28 458 echo "Training iteration ${iter}..."
tomwalters@28 459 let "nextiter=$iter+1"
tomwalters@28 460 mkdir $WORK/hmm$nextiter
tomwalters@28 461 ${HTK_PREFIX}HERest -C $WORK/$HMMCONFIG -I $WORK/$TRAIN_MLF \
tomwalters@28 462 -t 250.0 150.0 1000.0 -S $WORK/$TRAIN_SCRIPT \
tomwalters@28 463 -H $WORK/hmm$iter/macros -H $WORK/hmm$iter/hmmdefs \
tomwalters@28 464 -M $WORK/hmm$nextiter $WORK/$SYLLIST_COMPLETE
tomwalters@28 465 done
tomwalters@28 466
tomwalters@28 467 echo "Testing..."
tomwalters@28 468 for iter in 9; do
tomwalters@28 469 ${HTK_PREFIX}HVite -H $WORK/hmm$iter/macros -H $WORK/hmm$iter/hmmdefs \
tomwalters@28 470 -C $WORK/$HMMCONFIG -S $WORK/$TEST_SCRIPT -i $WORK/$RECOUT \
tomwalters@28 471 -w $WORK/$WDNET -p 0.0 -s 5.0 $WORK/$DICT $WORK/$SYLLIST_COMPLETE
tomwalters@28 472 echo "Results from testing on iteration ${iter}..."
tomwalters@28 473 ${HTK_PREFIX}HResults -e "???" ${SILENCE} -I $WORK/$TEST_MLF $WORK/$SYLLIST_COMPLETE $WORK/$RECOUT
tomwalters@28 474 done
tomwalters@28 475
tomwalters@28 476 ${HTK_PREFIX}HResults -p -t -e "???" ${SILENCE} \
tomwalters@28 477 -I $WORK/$TEST_MLF $WORK/$SYLLIST_COMPLETE $WORK/$RECOUT > $WORK/$RESULTS_FILE
tomwalters@28 478
tomwalters@28 479 grep Aligned $WORK/$RESULTS_FILE | sed -E "s/.*\/..\/([a-z]{2})([0-9]{2,3}\.[0-9])p([0-9]{2,3}\.[0-9])s.*/\2 \3/" | sort | uniq -c > $WORK/$MISCLASSIFIED
tomwalters@28 480
tomwalters@28 481 echo "Final results, errors, and confusion matrix in file $WORK/$RESULTS_FILE"
tomwalters@28 482 echo "Statstics on misclassification in file $WORK/$MISCLASSIFIED"
tomwalters@28 483 echo "`wc -l $WORK/$SYLLIST` sounds in total"
tomwalters@28 484
tomwalters@28 485 if [ "$matlab_plot" ]
tomwalters@28 486 then
tomwalters@28 487 echo "Plotting results figure in MATLAB..."
tomwalters@28 488 cd matlab
tomwalters@28 489 /Applications/MATLAB_R2007b/bin/matlab -nojvm -nosplash -r "plot_results('../${WORK}/'); exit"
tomwalters@28 490 cd ..
tomwalters@28 491 fi