annotate trunk/scripts/HTKAimfeat.sh @ 706:f8e90b5d85fd tip

Delete CARFAC code from this repository. It has been moved to https://github.com/google/carfac Please email me with your github username to get access. I've also created a new mailing list to discuss CARFAC development: https://groups.google.com/forum/#!forum/carfac-dev
author ronw@google.com
date Thu, 18 Jul 2013 20:56:51 +0000
parents e4f704f67ca6
children
rev   line source
tomwalters@301 1 #!/bin/bash
tomwalters@301 2 # using getopts
tomwalters@301 3 #
tomwalters@301 4 # Train and test an HTK monophone model using AIM
tomwalters@301 5 # features and the CNBH syllable databse
tomwalters@301 6 #
tomwalters@301 7 # Copyright 2009-2010 University of Cambridge
tomwalters@301 8 # Author: Thomas Walters <tom@acousticscale.org>
tomwalters@301 9 # Based on the MATLAB scripts by Jess Monaghan and
tomwalters@301 10 # modelled on HTKTimit.sh from Cantab Research
tomwalters@301 11
tomwalters@301 12 skip_features=
tomwalters@301 13 skip_init=
tomwalters@301 14 matlab_plot=
tomwalters@301 15 while getopts 'fim' OPTION
tomwalters@301 16 do
tomwalters@301 17 case $OPTION in
tomwalters@301 18 f) skip_features=1
tomwalters@301 19 ;;
tomwalters@301 20 i) skip_init=1
tomwalters@301 21 ;;
tomwalters@301 22 m) matlab_plot=1
tomwalters@301 23 ;;
tomwalters@301 24 # b) bflag=1
tomwalters@301 25 # bval="$OPTARG"
tomwalters@301 26 # ;;
tomwalters@301 27 ?) printf "Usage: %s: [-f] [-i] [-m] args\n" $(basename $0) >&2
tomwalters@301 28 exit 2
tomwalters@301 29 ;;
tomwalters@301 30 esac
tomwalters@301 31 done
tomwalters@301 32 shift $(($OPTIND - 1))
tomwalters@301 33
tomwalters@301 34 # Machine list
tomwalters@301 35 USE_MULTIPLE_MACHINES=
tomwalters@301 36 MACHINE_LIST="db-xserve2 db-xserve3 db-xserve5 db-xserve6 db-xserve7 db-xserve8"
tomwalters@301 37 MACHINE_COUNT=`echo $MACHINE_LIST | wc -w | sed 's/ *//'`
tomwalters@301 38 # Cores per machine
tomwalters@301 39 MACHINE_CORES=4
tomwalters@301 40
tomwalters@301 41 # Set to true / 1 to enable MFCC features rather than AIM features
tomwalters@301 42 # (leave blank for AIM features)
tomwalters@301 43 MFCC_FEATURES=
tomwalters@301 44
tomwalters@301 45 # Source directory for all the sound files
tomwalters@304 46 SOUND_SOURCE="/media/sounds-database/cnbh-sounds"
tomwalters@301 47
tomwalters@301 48 # Location of the AIMCopy binary if not in the path
tomwalters@303 49 AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/"
tomwalters@301 50
tomwalters@301 51 # Location of HTK binaries if not in the path
tomwalters@303 52 HTK_PREFIX=""
tomwalters@301 53
tomwalters@301 54 # Names of various internal files and directories.
tomwalters@301 55 # Rename here if you don't like them for some reason.
tomwalters@301 56 SYLLIST=syls
tomwalters@301 57 SYLLIST_COMPLETE=syllist
tomwalters@301 58 GRAM=gram
tomwalters@301 59 DICT=dict
tomwalters@301 60 WDNET=wdnet
tomwalters@301 61 TRAIN_SPEAKERS=train_speakers
tomwalters@301 62 TEST_SPEAKERS=test_speakers
tomwalters@301 63 WORK_PREFIX=work
tomwalters@301 64 TRAIN_LIST=train.list
tomwalters@301 65 TEST_LIST=test.list
tomwalters@301 66 COMBINED_LIST=combined.list
tomwalters@301 67 TRAIN_MLF=train.mlf
tomwalters@301 68 TEST_MLF=test.mlf
tomwalters@301 69 TRAIN_SCRIPT=train.scp
tomwalters@301 70 TEST_SCRIPT=test.scp
tomwalters@301 71 FEATURES_DIR=features
tomwalters@301 72 AIMCOPY_CONFIG=aimcopy.cfg
tomwalters@301 73 AIMCOPY_LOG_TRAIN=aimcopy_train.log
tomwalters@301 74 AIMCOPY_LOG_TEST=aimcopy_test.log
tomwalters@301 75 HCOPY_CONFIG=hcopy.cfg
tomwalters@301 76 HMMCONFIG=hmmconfig
tomwalters@301 77 HMMPROTO=proto
tomwalters@301 78 RECOUT=recout.mlf
tomwalters@301 79 RESULTS_FILE=results.txt
tomwalters@301 80 MISCLASSIFIED=misclassified.txt
tomwalters@301 81 HHED_SCRIPT=cmdscript
tomwalters@301 82
tomwalters@301 83 # The vowels and consonants that make up the CNBH database
tomwalters@301 84 VOWELS="a e i o u"
tomwalters@301 85 CONSONANTS="b d f g h k l m n p r s t v w x y z"
tomwalters@301 86 SILENCE="sil"
tomwalters@301 87
tomwalters@301 88 WORK=${WORK_PREFIX}`echo $1 | tr -d ' '`
tomwalters@301 89 mkdir -p $WORK
tomwalters@301 90
tomwalters@301 91 # Make a copy of this script in the experimental directory
tomwalters@301 92 cp -p $0 $WORK
tomwalters@301 93
tomwalters@301 94 if [ "$skip_init" ]
tomwalters@301 95 then
tomwalters@301 96 echo "Skipping initialisation"
tomwalters@301 97 else
tomwalters@301 98 # Make the sets of VC, CV, and vowel only labels, plus silence and use them to
tomwalters@301 99 # generate the grammar, dictionary and list of syllables
tomwalters@301 100 echo "Generating grammar, dictionary and syllable list..."
tomwalters@301 101 echo -n '$word = ' > $WORK/$GRAM
tomwalters@301 102 FIRST=true;
tomwalters@301 103 for v in $VOWELS; do
tomwalters@301 104 echo $v$v >> $WORK/$SYLLIST.tmp
tomwalters@301 105 echo "$v$v [$v$v] $v$v" >> $WORK/$DICT.tmp
tomwalters@301 106 if $FIRST; then
tomwalters@301 107 echo -n "$v$v" >> $WORK/$GRAM
tomwalters@301 108 FIRST=false
tomwalters@301 109 else
tomwalters@301 110 echo -n " | $v$v" >> $WORK/$GRAM
tomwalters@301 111 fi
tomwalters@301 112 for c in $CONSONANTS; do
tomwalters@301 113 echo $v$c >> $WORK/$SYLLIST.tmp
tomwalters@301 114 echo "$v$c [$v$c] $v$c" >> $WORK/$DICT.tmp
tomwalters@301 115 echo -n " | $v$c" >> $WORK/$GRAM
tomwalters@301 116 echo $c$v >> $WORK/$SYLLIST.tmp
tomwalters@301 117 echo "$c$v [$c$v] $c$v" >> $WORK/$DICT.tmp
tomwalters@301 118 echo -n " | $c$v" >> $WORK/$GRAM
tomwalters@301 119 done
tomwalters@301 120 done
tomwalters@301 121 echo ';' >> $WORK/$GRAM
tomwalters@301 122
tomwalters@301 123 # Sort the syllable list and the dictionary and delete the
tomwalters@301 124 # temporary, unsorted version
tomwalters@301 125 sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST
tomwalters@301 126 rm $WORK/$SYLLIST.tmp
tomwalters@301 127 sort $WORK/$DICT.tmp > $WORK/$DICT
tomwalters@301 128 rm $WORK/$DICT.tmp
tomwalters@301 129
tomwalters@301 130 # Add silence to the end of the various files just generated
tomwalters@301 131 cp $WORK/$SYLLIST $WORK/$SYLLIST_COMPLETE
tomwalters@301 132 echo $SILENCE >> $WORK/$SYLLIST_COMPLETE
tomwalters@301 133 echo "end_$SILENCE [$SILENCE] $SILENCE" >> $WORK/$DICT
tomwalters@301 134 echo "start_$SILENCE [$SILENCE] $SILENCE" >> $WORK/$DICT
tomwalters@301 135 echo "( start_$SILENCE \$word end_$SILENCE )" >> $WORK/$GRAM
tomwalters@301 136
tomwalters@301 137 # Use HParse to parse the grammar into a wordnet
tomwalters@301 138 echo "Generating wordnet from grammar..."
tomwalters@301 139 ${HTK_PREFIX}HParse $WORK/$GRAM $WORK/$WDNET
tomwalters@301 140
tomwalters@301 141 # Generate a list of filenames from the spoke pattern
tomwalters@301 142 cat <<"EOF" > $WORK/$TRAIN_SPEAKERS
tomwalters@301 143 170.9p112.2s100.0t+000itd
tomwalters@301 144 171.0p112.8s100.0t+000itd
tomwalters@301 145 171.3p111.7s100.0t+000itd
tomwalters@301 146 171.5p113.1s100.0t+000itd
tomwalters@301 147 171.9p111.5s100.0t+000itd
tomwalters@301 148 172.1p113.0s100.0t+000itd
tomwalters@301 149 172.4p111.9s100.0t+000itd
tomwalters@301 150 172.5p112.5s100.0t+000itd
tomwalters@301 151 EOF
tomwalters@301 152
tomwalters@301 153 cat <<"EOF" > $WORK/$TEST_SPEAKERS
tomwalters@301 154 137.0p104.3s100.0t+000itd
tomwalters@301 155 141.3p135.4s100.0t+000itd
tomwalters@301 156 145.5p106.3s100.0t+000itd
tomwalters@301 157 148.8p128.8s100.0t+000itd
tomwalters@301 158 151.6p83.9s100.0t+000itd
tomwalters@301 159 153.0p108.1s100.0t+000itd
tomwalters@301 160 155.5p123.5s100.0t+000itd
tomwalters@301 161 156.7p90.6s100.0t+000itd
tomwalters@301 162 159.5p109.6s100.0t+000itd
tomwalters@301 163 161.1p119.4s100.0t+000itd
tomwalters@301 164 161.1p96.8s100.0t+000itd
tomwalters@301 165 163.4p157.6s100.0t+000itd
tomwalters@301 166 164.7p110.8s100.0t+000itd
tomwalters@301 167 164.9p102.1s100.0t+000itd
tomwalters@301 168 165.6p144.0s100.0t+000itd
tomwalters@301 169 165.7p116.2s100.0t+000itd
tomwalters@301 170 167.4p133.5s100.0t+000itd
tomwalters@301 171 167.8p106.5s100.0t+000itd
tomwalters@301 172 168.6p111.6s100.0t+000itd
tomwalters@301 173 168.9p125.4s100.0t+000itd
tomwalters@301 174 169.0p114.0s100.0t+000itd
tomwalters@301 175 170.0p109.7s100.0t+000itd
tomwalters@301 176 170.1p119.5s100.0t+000itd
tomwalters@301 177 171.0p115.5s100.0t+000itd
tomwalters@301 178 171.7p112.3s100.0t+000itd
tomwalters@301 179 172.4p109.3s100.0t+000itd
tomwalters@301 180 173.3p105.6s100.0t+000itd
tomwalters@301 181 173.5p115.0s100.0t+000itd
tomwalters@301 182 174.5p100.6s100.0t+000itd
tomwalters@301 183 174.5p110.6s100.0t+000itd
tomwalters@301 184 174.9p113.0s100.0t+000itd
tomwalters@301 185 175.7p118.5s100.0t+000itd
tomwalters@301 186 176.1p94.5s100.0t+000itd
tomwalters@301 187 178.0p108.5s100.0t+000itd
tomwalters@301 188 178.1p87.6s100.0t+000itd
tomwalters@301 189 178.8p123.6s100.0t+000itd
tomwalters@301 190 179.0p113.9s100.0t+000itd
tomwalters@301 191 180.4p80.1s100.0t+000itd
tomwalters@301 192 183.0p105.7s100.0t+000itd
tomwalters@301 193 183.0p130.4s100.0t+000itd
tomwalters@301 194 184.8p115.1s100.0t+000itd
tomwalters@301 195 188.1p139.2s100.0t+000itd
tomwalters@301 196 189.6p102.1s100.0t+000itd
tomwalters@301 197 192.7p116.7s100.0t+000itd
tomwalters@301 198 194.5p150.4s100.0t+000itd
tomwalters@301 199 198.1p97.9s100.0t+000itd
tomwalters@301 200 202.7p118.6s100.0t+000itd
tomwalters@301 201 208.6p93.2s100.0t+000itd
tomwalters@301 202 215.2p121.0s100.0t+000itd
tomwalters@301 203 EOF
tomwalters@301 204
tomwalters@301 205 # Construct the conversion scripts for AIMCopy (or HCopy) and
tomwalters@301 206 # the master label files for the train and test sets
tomwalters@301 207
tomwalters@301 208 echo "Generating train and test scripts and master label files..."
tomwalters@301 209 exec 4> $WORK/$TRAIN_MLF
tomwalters@301 210 exec 6> $WORK/$TEST_MLF
tomwalters@301 211 echo '#!MLF!#' >&4
tomwalters@301 212 echo '#!MLF!#' >&6
tomwalters@301 213 if [ -a $WORK/$TRAIN_LIST ]
tomwalters@301 214 then
tomwalters@301 215 rm $WORK/$TRAIN_LIST
tomwalters@301 216 fi
tomwalters@301 217 if [ -a $WORK/$TEST_LIST ]
tomwalters@301 218 then
tomwalters@301 219 rm $WORK/$TEST_LIST
tomwalters@301 220 fi
tomwalters@301 221 if [ -a $WORK/$TRAIN_SCRIPT ]
tomwalters@301 222 then
tomwalters@301 223 rm $WORK/$TRAIN_SCRIPT
tomwalters@301 224 fi
tomwalters@301 225 if [ -a $WORK/$TEST_SCRIPT ]
tomwalters@301 226 then
tomwalters@301 227 rm $WORK/$TEST_SCRIPT
tomwalters@301 228 fi
tomwalters@301 229 exec 3> $WORK/$TRAIN_LIST
tomwalters@301 230 exec 5> $WORK/$TEST_LIST
tomwalters@301 231 exec 7> $WORK/$TRAIN_SCRIPT
tomwalters@301 232 exec 8> $WORK/$TEST_SCRIPT
tomwalters@301 233 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@301 234 for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do
tomwalters@301 235 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@301 236 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@301 237 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3
tomwalters@301 238 echo "'${DEST_FILENAME}.htk'" >&7
tomwalters@301 239 echo "'\"${DEST_FILENAME}.lab\"'" >&4
tomwalters@301 240 echo "$SILENCE" >&4
tomwalters@301 241 echo $syllable >&4
tomwalters@301 242 echo "$SILENCE" >&4
tomwalters@301 243 echo "." >&4
tomwalters@301 244 done
tomwalters@301 245 for speaker in $(cat $WORK/$TEST_SPEAKERS); do
tomwalters@301 246 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@301 247 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@301 248 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5
tomwalters@301 249 echo "'${DEST_FILENAME}.htk'" >&8
tomwalters@301 250 echo "'\"${DEST_FILENAME}.lab\"'" >&6
tomwalters@301 251 echo "$SILENCE" >&6
tomwalters@301 252 echo $syllable >&6
tomwalters@301 253 echo "$SILENCE" >&6
tomwalters@301 254 echo "." >&6
tomwalters@301 255 done
tomwalters@301 256 done
tomwalters@301 257 exec 3>&-
tomwalters@301 258 exec 4>&-
tomwalters@301 259 exec 5>&-
tomwalters@301 260 exec 6>&-
tomwalters@301 261 exec 7>&-
tomwalters@301 262 exec 8>&-
tomwalters@301 263 fi
tomwalters@301 264
tomwalters@301 265 if [ "$skip_features" ]
tomwalters@301 266 then
tomwalters@301 267 echo "Skipping feature generation"
tomwalters@301 268 else
tomwalters@301 269 # Make the necessary directories for the computed features
tomwalters@301 270 echo "Making directory structure..."
tomwalters@301 271 mkdir $WORK/$FEATURES_DIR
tomwalters@301 272 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@301 273 mkdir $WORK/$FEATURES_DIR/$syllable
tomwalters@301 274 done
tomwalters@301 275
tomwalters@301 276 if [ "$MFCC_FEATURES" ]
tomwalters@301 277 then
tomwalters@301 278 # Write the HCopy config file
tomwalters@301 279 echo "Creating HCopy config file..."
tomwalters@301 280 cat <<"EOF" > $WORK/$HCOPY_CONFIG
tomwalters@301 281 # Coding parameters
tomwalters@301 282 SOURCEFORMAT= WAV
tomwalters@301 283 TARGETKIND = MFCC_0_D_A
tomwalters@301 284 TARGETRATE = 100000.0
tomwalters@301 285 SAVECOMPRESSED = T
tomwalters@301 286 SAVEWITHCRC = T
tomwalters@301 287 WINDOWSIZE = 250000.0
tomwalters@301 288 USEHAMMING = T
tomwalters@301 289 PREEMCOEF = 0.97
tomwalters@301 290 NUMCHANS = 200
tomwalters@301 291 CEPLIFTER = 22
tomwalters@301 292 NUMCEPS = 12
tomwalters@301 293 ENORMALISE = F
tomwalters@301 294 EOF
tomwalters@301 295 echo "Generating features for training..."
tomwalters@301 296 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $WORK/${TRAIN_LIST}
tomwalters@301 297
tomwalters@301 298 echo "Generating features for testing..."
tomwalters@301 299 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $WORK/${TEST_LIST}
tomwalters@301 300 else
tomwalters@301 301 # Write the AIMCopy config file
tomwalters@301 302 echo "Creating AIMCopy config file..."
tomwalters@301 303 cat <<"EOF" > $WORK/$AIMCOPY_CONFIG
tomwalters@301 304 input.buffersize=480
tomwalters@301 305 gtfb.channel_count=200
tomwalters@301 306 gtfb.min_frequency=86.0
tomwalters@301 307 gtfb.max_frequency=16000.0
tomwalters@301 308 nap.do_lowpass=true
tomwalters@301 309 nap.lowpass_cutoff=100.0
tomwalters@301 310 slice.temporal=false
tomwalters@301 311 slice.all=true
tomwalters@301 312 slice.normalize=true
tomwalters@301 313 EOF
tomwalters@301 314 if [ "$USE_MULTIPLE_MACHINES" ]
tomwalters@301 315 then
tomwalters@301 316 echo "Splitting data files..."
tomwalters@301 317 cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
tomwalters@301 318 total_cores=$(($MACHINE_COUNT*$MACHINE_CORES))
tomwalters@301 319 echo -n $total_cores
tomwalters@301 320 echo " cores available"
tomwalters@301 321 total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'`
tomwalters@301 322 echo -n $total_files
tomwalters@301 323 echo " files to process"
tomwalters@301 324 files_per_core=$(($total_files/$total_cores+1))
tomwalters@301 325 echo -n $files_per_core
tomwalters@301 326 echo " files per core"
tomwalters@301 327 split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list
tomwalters@301 328 splits=( $(ls $WORK/split_list*))
tomwalters@301 329 element=0
tomwalters@301 330 echo "Spawning tasks..."
tomwalters@301 331 for m in $MACHINE_LIST; do
tomwalters@301 332 for ((c=1;c<=$MACHINE_CORES;c+=1)); do
tomwalters@301 333 s=${splits[$element]}
tomwalters@301 334 echo "ssh $m \"cd HTK-AIM;${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s\" &"
tomwalters@301 335 #ssh $m "cd HTK-AIM;${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s" &
tomwalters@301 336 let element=element+1
tomwalters@301 337 done
tomwalters@301 338 done
tomwalters@301 339 echo "Waiting for tasks to complete..."
tomwalters@301 340 wait
tomwalters@301 341 else
tomwalters@301 342 echo "Generating features for training..."
tomwalters@301 343 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $WORK/${TRAIN_LIST} -D $WORK/$AIMCOPY_LOG_TRAIN
tomwalters@301 344
tomwalters@301 345 echo "Generating features for testing..."
tomwalters@301 346 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $WORK/${TEST_LIST} -D $WORK/$AIMCOPY_LOG_TEST
tomwalters@301 347 fi
tomwalters@301 348 fi
tomwalters@301 349 fi
tomwalters@301 350
tomwalters@301 351 if [ "$MFCC_FEATURES" ]
tomwalters@301 352 then
tomwalters@301 353 cat <<"EOF" > $WORK/$HMMCONFIG
tomwalters@301 354 # Coding parameters
tomwalters@301 355 SOURCEFORMAT= HTK
tomwalters@301 356 EOF
tomwalters@301 357 else
tomwalters@301 358 cat <<"EOF" > $WORK/$HMMCONFIG
tomwalters@301 359 # Coding parameters
tomwalters@301 360 SOURCEFORMAT= HTK
tomwalters@301 361 SOURCEKIND= USER_E
tomwalters@301 362 TARGETKIND = USER_E_D_A
tomwalters@301 363 EOF
tomwalters@301 364 fi
tomwalters@301 365
tomwalters@301 366 echo "Creating HMM structure..."
tomwalters@301 367 if [ "$MFCC_FEATURES" ]
tomwalters@301 368 then
tomwalters@301 369 cat <<"EOF" > $WORK/$HMMPROTO
tomwalters@301 370 ~o<VECSIZE> 39<NULLD><MFCC_0_D_A>
tomwalters@301 371 ~h "proto"
tomwalters@301 372 <BEGINHMM>
tomwalters@301 373 <NUMSTATES> 6
tomwalters@301 374 <State> 2
tomwalters@301 375 <Mean>39
tomwalters@301 376 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@301 377 <Variance> 39
tomwalters@301 378 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@301 379 <State> 3
tomwalters@301 380 <Mean>39
tomwalters@301 381 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@301 382 <Variance> 39
tomwalters@301 383 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@301 384 <State> 4
tomwalters@301 385 <Mean>39
tomwalters@301 386 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@301 387 <Variance> 39
tomwalters@301 388 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@301 389 <State> 5
tomwalters@301 390 <Mean>39
tomwalters@301 391 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@301 392 <Variance> 39
tomwalters@301 393 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@301 394
tomwalters@301 395 <TransP> 6
tomwalters@301 396 0.0 1.0 0.0 0.0 0.0 0.0
tomwalters@301 397 0.0 0.6 0.4 0.0 0.0 0.0
tomwalters@301 398 0.0 0.0 0.6 0.4 0.0 0.0
tomwalters@301 399 0.0 0.0 0.0 0.6 0.4 0.0
tomwalters@301 400 0.0 0.0 0.0 0.0 0.6 0.4
tomwalters@301 401 0.0 0.0 0.0 0.0 0.0 0.0
tomwalters@301 402 <EndHMM>
tomwalters@301 403 EOF
tomwalters@301 404 else
tomwalters@301 405 cat <<"EOF" > $WORK/$HMMPROTO
tomwalters@301 406 ~o<VECSIZE> 12<NULLD><USER_E_D_A>
tomwalters@301 407 ~h "proto"
tomwalters@301 408 <BEGINHMM>
tomwalters@301 409 <NUMSTATES> 4
tomwalters@301 410 <State> 2
tomwalters@301 411 <Mean>12
tomwalters@301 412 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@301 413 <Variance> 12
tomwalters@301 414 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@301 415 <State> 3
tomwalters@301 416 <Mean>12
tomwalters@301 417 0 0 0 0 0 0 0 0 0 0 0 0
tomwalters@301 418 <Variance> 12
tomwalters@301 419 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
tomwalters@301 420 <TransP> 4
tomwalters@301 421 0.0 1.0 0.0 0.0
tomwalters@301 422 0.0 0.6 0.4 0.0
tomwalters@301 423 0.0 0.0 0.6 0.4
tomwalters@301 424 0.0 0.0 0.0 0.0
tomwalters@301 425 <EndHMM>
tomwalters@301 426 EOF
tomwalters@301 427 fi
tomwalters@301 428
tomwalters@301 429
tomwalters@301 430 echo "Training HMM..."
tomwalters@301 431 echo "Setting up prototype HMM..."
tomwalters@301 432 mkdir $WORK/hmm0
tomwalters@301 433 ${HTK_PREFIX}HCompV -C $WORK/$HMMCONFIG -f 0.01 -m -S $WORK/$TRAIN_SCRIPT -M $WORK/hmm0 $WORK/$HMMPROTO
tomwalters@301 434
tomwalters@301 435 echo "Generating HMM definitions..."
tomwalters@301 436 # Now take the prototype file from hmm0, and create the other HMM definitions from it
tomwalters@301 437 grep -A 9999 "<BEGINHMM>" $WORK/hmm0/$HMMPROTO > $WORK/hmm0/hmms
tomwalters@301 438 for syllable in $(cat $WORK/$SYLLIST_COMPLETE); do
tomwalters@301 439 echo "~h $syllable" >> $WORK/hmm0/hmmdefs
tomwalters@301 440 cat $WORK/hmm0/hmms >> $WORK/hmm0/hmmdefs
tomwalters@301 441 done
tomwalters@301 442 if [ "$MFCC_FEATURES" ]
tomwalters@301 443 then
tomwalters@301 444 echo -n "~o<STREAMINFO> 1 39<VECSIZE> 39<NULLD><MFCC_0_D_A><DIAGC>" > $WORK/hmm0/macros
tomwalters@301 445 else
tomwalters@301 446 echo -n "~o<STREAMINFO> 1 12<VECSIZE> 12<NULLD><USER_E_D_A><DIAGC>" > $WORK/hmm0/macros
tomwalters@301 447 fi
tomwalters@301 448 cat $WORK/hmm0/vFloors >> $WORK/hmm0/macros
tomwalters@301 449
tomwalters@301 450 echo "Adding output mixture components..."
tomwalters@301 451 cat <<"EOF" > $WORK/$HHED_SCRIPT
tomwalters@301 452 MU 4 {*.state[2].mix} MU 4 {*.state[3].mix} MU 4 {*.state[4].mix} MU 4 {*.state[5].mix}
tomwalters@301 453 EOF
tomwalters@301 454
tomwalters@301 455 ${HTK_PREFIX}HHEd -H $WORK/hmm0/macros -H $WORK/hmm0/hmmdefs $WORK/$HHED_SCRIPT $WORK/$SYLLIST_COMPLETE
tomwalters@301 456
tomwalters@301 457 for iter in 0 1 2 3 4 5 6 7 8 9; do
tomwalters@301 458 echo "Training iteration ${iter}..."
tomwalters@301 459 let "nextiter=$iter+1"
tomwalters@301 460 mkdir $WORK/hmm$nextiter
tomwalters@301 461 ${HTK_PREFIX}HERest -C $WORK/$HMMCONFIG -I $WORK/$TRAIN_MLF \
tomwalters@301 462 -t 250.0 150.0 1000.0 -S $WORK/$TRAIN_SCRIPT \
tomwalters@301 463 -H $WORK/hmm$iter/macros -H $WORK/hmm$iter/hmmdefs \
tomwalters@301 464 -M $WORK/hmm$nextiter $WORK/$SYLLIST_COMPLETE
tomwalters@301 465 done
tomwalters@301 466
tomwalters@301 467 echo "Testing..."
tomwalters@301 468 for iter in 9; do
tomwalters@301 469 ${HTK_PREFIX}HVite -H $WORK/hmm$iter/macros -H $WORK/hmm$iter/hmmdefs \
tomwalters@301 470 -C $WORK/$HMMCONFIG -S $WORK/$TEST_SCRIPT -i $WORK/$RECOUT \
tomwalters@301 471 -w $WORK/$WDNET -p 0.0 -s 5.0 $WORK/$DICT $WORK/$SYLLIST_COMPLETE
tomwalters@301 472 echo "Results from testing on iteration ${iter}..."
tomwalters@301 473 ${HTK_PREFIX}HResults -e "???" ${SILENCE} -I $WORK/$TEST_MLF $WORK/$SYLLIST_COMPLETE $WORK/$RECOUT
tomwalters@301 474 done
tomwalters@301 475
tomwalters@301 476 ${HTK_PREFIX}HResults -p -t -e "???" ${SILENCE} \
tomwalters@301 477 -I $WORK/$TEST_MLF $WORK/$SYLLIST_COMPLETE $WORK/$RECOUT > $WORK/$RESULTS_FILE
tomwalters@301 478
tomwalters@301 479 grep Aligned $WORK/$RESULTS_FILE | sed -E "s/.*\/..\/([a-z]{2})([0-9]{2,3}\.[0-9])p([0-9]{2,3}\.[0-9])s.*/\2 \3/" | sort | uniq -c > $WORK/$MISCLASSIFIED
tomwalters@301 480
tomwalters@301 481 echo "Final results, errors, and confusion matrix in file $WORK/$RESULTS_FILE"
tomwalters@301 482 echo "Statstics on misclassification in file $WORK/$MISCLASSIFIED"
tomwalters@301 483 echo "`wc -l $WORK/$SYLLIST` sounds in total"
tomwalters@301 484
tomwalters@301 485 if [ "$matlab_plot" ]
tomwalters@301 486 then
tomwalters@301 487 echo "Plotting results figure in MATLAB..."
tomwalters@301 488 cd matlab
tomwalters@301 489 /Applications/MATLAB_R2007b/bin/matlab -nojvm -nosplash -r "plot_results('../${WORK}/'); exit"
tomwalters@301 490 cd ..
tomwalters@301 491 fi