tomwalters@301
|
1 #!/bin/bash
|
tomwalters@301
|
2 # using getopts
|
tomwalters@301
|
3 #
|
tomwalters@301
|
4 # Train and test an HTK monophone model using AIM
|
tomwalters@301
|
5 # features and the CNBH syllable databse
|
tomwalters@301
|
6 #
|
tomwalters@301
|
7 # Copyright 2009-2010 University of Cambridge
|
tomwalters@301
|
8 # Author: Thomas Walters <tom@acousticscale.org>
|
tomwalters@301
|
9 # Based on the MATLAB scripts by Jess Monaghan and
|
tomwalters@301
|
10 # modelled on HTKTimit.sh from Cantab Research
|
tomwalters@301
|
11
|
tomwalters@301
|
12 skip_features=
|
tomwalters@301
|
13 skip_init=
|
tomwalters@301
|
14 matlab_plot=
|
tomwalters@301
|
15 while getopts 'fim' OPTION
|
tomwalters@301
|
16 do
|
tomwalters@301
|
17 case $OPTION in
|
tomwalters@301
|
18 f) skip_features=1
|
tomwalters@301
|
19 ;;
|
tomwalters@301
|
20 i) skip_init=1
|
tomwalters@301
|
21 ;;
|
tomwalters@301
|
22 m) matlab_plot=1
|
tomwalters@301
|
23 ;;
|
tomwalters@301
|
24 # b) bflag=1
|
tomwalters@301
|
25 # bval="$OPTARG"
|
tomwalters@301
|
26 # ;;
|
tomwalters@301
|
27 ?) printf "Usage: %s: [-f] [-i] [-m] args\n" $(basename $0) >&2
|
tomwalters@301
|
28 exit 2
|
tomwalters@301
|
29 ;;
|
tomwalters@301
|
30 esac
|
tomwalters@301
|
31 done
|
tomwalters@301
|
32 shift $(($OPTIND - 1))
|
tomwalters@301
|
33
|
tomwalters@301
|
34 # Machine list
|
tomwalters@301
|
35 USE_MULTIPLE_MACHINES=
|
tomwalters@301
|
36 MACHINE_LIST="db-xserve2 db-xserve3 db-xserve5 db-xserve6 db-xserve7 db-xserve8"
|
tomwalters@301
|
37 MACHINE_COUNT=`echo $MACHINE_LIST | wc -w | sed 's/ *//'`
|
tomwalters@301
|
38 # Cores per machine
|
tomwalters@301
|
39 MACHINE_CORES=4
|
tomwalters@301
|
40
|
tomwalters@301
|
41 # Set to true / 1 to enable MFCC features rather than AIM features
|
tomwalters@301
|
42 # (leave blank for AIM features)
|
tomwalters@301
|
43 MFCC_FEATURES=
|
tomwalters@301
|
44
|
tomwalters@301
|
45 # Source directory for all the sound files
|
tomwalters@304
|
46 SOUND_SOURCE="/media/sounds-database/cnbh-sounds"
|
tomwalters@301
|
47
|
tomwalters@301
|
48 # Location of the AIMCopy binary if not in the path
|
tomwalters@303
|
49 AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/"
|
tomwalters@301
|
50
|
tomwalters@301
|
51 # Location of HTK binaries if not in the path
|
tomwalters@303
|
52 HTK_PREFIX=""
|
tomwalters@301
|
53
|
tomwalters@301
|
54 # Names of various internal files and directories.
|
tomwalters@301
|
55 # Rename here if you don't like them for some reason.
|
tomwalters@301
|
56 SYLLIST=syls
|
tomwalters@301
|
57 SYLLIST_COMPLETE=syllist
|
tomwalters@301
|
58 GRAM=gram
|
tomwalters@301
|
59 DICT=dict
|
tomwalters@301
|
60 WDNET=wdnet
|
tomwalters@301
|
61 TRAIN_SPEAKERS=train_speakers
|
tomwalters@301
|
62 TEST_SPEAKERS=test_speakers
|
tomwalters@301
|
63 WORK_PREFIX=work
|
tomwalters@301
|
64 TRAIN_LIST=train.list
|
tomwalters@301
|
65 TEST_LIST=test.list
|
tomwalters@301
|
66 COMBINED_LIST=combined.list
|
tomwalters@301
|
67 TRAIN_MLF=train.mlf
|
tomwalters@301
|
68 TEST_MLF=test.mlf
|
tomwalters@301
|
69 TRAIN_SCRIPT=train.scp
|
tomwalters@301
|
70 TEST_SCRIPT=test.scp
|
tomwalters@301
|
71 FEATURES_DIR=features
|
tomwalters@301
|
72 AIMCOPY_CONFIG=aimcopy.cfg
|
tomwalters@301
|
73 AIMCOPY_LOG_TRAIN=aimcopy_train.log
|
tomwalters@301
|
74 AIMCOPY_LOG_TEST=aimcopy_test.log
|
tomwalters@301
|
75 HCOPY_CONFIG=hcopy.cfg
|
tomwalters@301
|
76 HMMCONFIG=hmmconfig
|
tomwalters@301
|
77 HMMPROTO=proto
|
tomwalters@301
|
78 RECOUT=recout.mlf
|
tomwalters@301
|
79 RESULTS_FILE=results.txt
|
tomwalters@301
|
80 MISCLASSIFIED=misclassified.txt
|
tomwalters@301
|
81 HHED_SCRIPT=cmdscript
|
tomwalters@301
|
82
|
tomwalters@301
|
83 # The vowels and consonants that make up the CNBH database
|
tomwalters@301
|
84 VOWELS="a e i o u"
|
tomwalters@301
|
85 CONSONANTS="b d f g h k l m n p r s t v w x y z"
|
tomwalters@301
|
86 SILENCE="sil"
|
tomwalters@301
|
87
|
tomwalters@301
|
88 WORK=${WORK_PREFIX}`echo $1 | tr -d ' '`
|
tomwalters@301
|
89 mkdir -p $WORK
|
tomwalters@301
|
90
|
tomwalters@301
|
91 # Make a copy of this script in the experimental directory
|
tomwalters@301
|
92 cp -p $0 $WORK
|
tomwalters@301
|
93
|
tomwalters@301
|
94 if [ "$skip_init" ]
|
tomwalters@301
|
95 then
|
tomwalters@301
|
96 echo "Skipping initialisation"
|
tomwalters@301
|
97 else
|
tomwalters@301
|
98 # Make the sets of VC, CV, and vowel only labels, plus silence and use them to
|
tomwalters@301
|
99 # generate the grammar, dictionary and list of syllables
|
tomwalters@301
|
100 echo "Generating grammar, dictionary and syllable list..."
|
tomwalters@301
|
101 echo -n '$word = ' > $WORK/$GRAM
|
tomwalters@301
|
102 FIRST=true;
|
tomwalters@301
|
103 for v in $VOWELS; do
|
tomwalters@301
|
104 echo $v$v >> $WORK/$SYLLIST.tmp
|
tomwalters@301
|
105 echo "$v$v [$v$v] $v$v" >> $WORK/$DICT.tmp
|
tomwalters@301
|
106 if $FIRST; then
|
tomwalters@301
|
107 echo -n "$v$v" >> $WORK/$GRAM
|
tomwalters@301
|
108 FIRST=false
|
tomwalters@301
|
109 else
|
tomwalters@301
|
110 echo -n " | $v$v" >> $WORK/$GRAM
|
tomwalters@301
|
111 fi
|
tomwalters@301
|
112 for c in $CONSONANTS; do
|
tomwalters@301
|
113 echo $v$c >> $WORK/$SYLLIST.tmp
|
tomwalters@301
|
114 echo "$v$c [$v$c] $v$c" >> $WORK/$DICT.tmp
|
tomwalters@301
|
115 echo -n " | $v$c" >> $WORK/$GRAM
|
tomwalters@301
|
116 echo $c$v >> $WORK/$SYLLIST.tmp
|
tomwalters@301
|
117 echo "$c$v [$c$v] $c$v" >> $WORK/$DICT.tmp
|
tomwalters@301
|
118 echo -n " | $c$v" >> $WORK/$GRAM
|
tomwalters@301
|
119 done
|
tomwalters@301
|
120 done
|
tomwalters@301
|
121 echo ';' >> $WORK/$GRAM
|
tomwalters@301
|
122
|
tomwalters@301
|
123 # Sort the syllable list and the dictionary and delete the
|
tomwalters@301
|
124 # temporary, unsorted version
|
tomwalters@301
|
125 sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST
|
tomwalters@301
|
126 rm $WORK/$SYLLIST.tmp
|
tomwalters@301
|
127 sort $WORK/$DICT.tmp > $WORK/$DICT
|
tomwalters@301
|
128 rm $WORK/$DICT.tmp
|
tomwalters@301
|
129
|
tomwalters@301
|
130 # Add silence to the end of the various files just generated
|
tomwalters@301
|
131 cp $WORK/$SYLLIST $WORK/$SYLLIST_COMPLETE
|
tomwalters@301
|
132 echo $SILENCE >> $WORK/$SYLLIST_COMPLETE
|
tomwalters@301
|
133 echo "end_$SILENCE [$SILENCE] $SILENCE" >> $WORK/$DICT
|
tomwalters@301
|
134 echo "start_$SILENCE [$SILENCE] $SILENCE" >> $WORK/$DICT
|
tomwalters@301
|
135 echo "( start_$SILENCE \$word end_$SILENCE )" >> $WORK/$GRAM
|
tomwalters@301
|
136
|
tomwalters@301
|
137 # Use HParse to parse the grammar into a wordnet
|
tomwalters@301
|
138 echo "Generating wordnet from grammar..."
|
tomwalters@301
|
139 ${HTK_PREFIX}HParse $WORK/$GRAM $WORK/$WDNET
|
tomwalters@301
|
140
|
tomwalters@301
|
141 # Generate a list of filenames from the spoke pattern
|
tomwalters@301
|
142 cat <<"EOF" > $WORK/$TRAIN_SPEAKERS
|
tomwalters@301
|
143 170.9p112.2s100.0t+000itd
|
tomwalters@301
|
144 171.0p112.8s100.0t+000itd
|
tomwalters@301
|
145 171.3p111.7s100.0t+000itd
|
tomwalters@301
|
146 171.5p113.1s100.0t+000itd
|
tomwalters@301
|
147 171.9p111.5s100.0t+000itd
|
tomwalters@301
|
148 172.1p113.0s100.0t+000itd
|
tomwalters@301
|
149 172.4p111.9s100.0t+000itd
|
tomwalters@301
|
150 172.5p112.5s100.0t+000itd
|
tomwalters@301
|
151 EOF
|
tomwalters@301
|
152
|
tomwalters@301
|
153 cat <<"EOF" > $WORK/$TEST_SPEAKERS
|
tomwalters@301
|
154 137.0p104.3s100.0t+000itd
|
tomwalters@301
|
155 141.3p135.4s100.0t+000itd
|
tomwalters@301
|
156 145.5p106.3s100.0t+000itd
|
tomwalters@301
|
157 148.8p128.8s100.0t+000itd
|
tomwalters@301
|
158 151.6p83.9s100.0t+000itd
|
tomwalters@301
|
159 153.0p108.1s100.0t+000itd
|
tomwalters@301
|
160 155.5p123.5s100.0t+000itd
|
tomwalters@301
|
161 156.7p90.6s100.0t+000itd
|
tomwalters@301
|
162 159.5p109.6s100.0t+000itd
|
tomwalters@301
|
163 161.1p119.4s100.0t+000itd
|
tomwalters@301
|
164 161.1p96.8s100.0t+000itd
|
tomwalters@301
|
165 163.4p157.6s100.0t+000itd
|
tomwalters@301
|
166 164.7p110.8s100.0t+000itd
|
tomwalters@301
|
167 164.9p102.1s100.0t+000itd
|
tomwalters@301
|
168 165.6p144.0s100.0t+000itd
|
tomwalters@301
|
169 165.7p116.2s100.0t+000itd
|
tomwalters@301
|
170 167.4p133.5s100.0t+000itd
|
tomwalters@301
|
171 167.8p106.5s100.0t+000itd
|
tomwalters@301
|
172 168.6p111.6s100.0t+000itd
|
tomwalters@301
|
173 168.9p125.4s100.0t+000itd
|
tomwalters@301
|
174 169.0p114.0s100.0t+000itd
|
tomwalters@301
|
175 170.0p109.7s100.0t+000itd
|
tomwalters@301
|
176 170.1p119.5s100.0t+000itd
|
tomwalters@301
|
177 171.0p115.5s100.0t+000itd
|
tomwalters@301
|
178 171.7p112.3s100.0t+000itd
|
tomwalters@301
|
179 172.4p109.3s100.0t+000itd
|
tomwalters@301
|
180 173.3p105.6s100.0t+000itd
|
tomwalters@301
|
181 173.5p115.0s100.0t+000itd
|
tomwalters@301
|
182 174.5p100.6s100.0t+000itd
|
tomwalters@301
|
183 174.5p110.6s100.0t+000itd
|
tomwalters@301
|
184 174.9p113.0s100.0t+000itd
|
tomwalters@301
|
185 175.7p118.5s100.0t+000itd
|
tomwalters@301
|
186 176.1p94.5s100.0t+000itd
|
tomwalters@301
|
187 178.0p108.5s100.0t+000itd
|
tomwalters@301
|
188 178.1p87.6s100.0t+000itd
|
tomwalters@301
|
189 178.8p123.6s100.0t+000itd
|
tomwalters@301
|
190 179.0p113.9s100.0t+000itd
|
tomwalters@301
|
191 180.4p80.1s100.0t+000itd
|
tomwalters@301
|
192 183.0p105.7s100.0t+000itd
|
tomwalters@301
|
193 183.0p130.4s100.0t+000itd
|
tomwalters@301
|
194 184.8p115.1s100.0t+000itd
|
tomwalters@301
|
195 188.1p139.2s100.0t+000itd
|
tomwalters@301
|
196 189.6p102.1s100.0t+000itd
|
tomwalters@301
|
197 192.7p116.7s100.0t+000itd
|
tomwalters@301
|
198 194.5p150.4s100.0t+000itd
|
tomwalters@301
|
199 198.1p97.9s100.0t+000itd
|
tomwalters@301
|
200 202.7p118.6s100.0t+000itd
|
tomwalters@301
|
201 208.6p93.2s100.0t+000itd
|
tomwalters@301
|
202 215.2p121.0s100.0t+000itd
|
tomwalters@301
|
203 EOF
|
tomwalters@301
|
204
|
tomwalters@301
|
205 # Construct the conversion scripts for AIMCopy (or HCopy) and
|
tomwalters@301
|
206 # the master label files for the train and test sets
|
tomwalters@301
|
207
|
tomwalters@301
|
208 echo "Generating train and test scripts and master label files..."
|
tomwalters@301
|
209 exec 4> $WORK/$TRAIN_MLF
|
tomwalters@301
|
210 exec 6> $WORK/$TEST_MLF
|
tomwalters@301
|
211 echo '#!MLF!#' >&4
|
tomwalters@301
|
212 echo '#!MLF!#' >&6
|
tomwalters@301
|
213 if [ -a $WORK/$TRAIN_LIST ]
|
tomwalters@301
|
214 then
|
tomwalters@301
|
215 rm $WORK/$TRAIN_LIST
|
tomwalters@301
|
216 fi
|
tomwalters@301
|
217 if [ -a $WORK/$TEST_LIST ]
|
tomwalters@301
|
218 then
|
tomwalters@301
|
219 rm $WORK/$TEST_LIST
|
tomwalters@301
|
220 fi
|
tomwalters@301
|
221 if [ -a $WORK/$TRAIN_SCRIPT ]
|
tomwalters@301
|
222 then
|
tomwalters@301
|
223 rm $WORK/$TRAIN_SCRIPT
|
tomwalters@301
|
224 fi
|
tomwalters@301
|
225 if [ -a $WORK/$TEST_SCRIPT ]
|
tomwalters@301
|
226 then
|
tomwalters@301
|
227 rm $WORK/$TEST_SCRIPT
|
tomwalters@301
|
228 fi
|
tomwalters@301
|
229 exec 3> $WORK/$TRAIN_LIST
|
tomwalters@301
|
230 exec 5> $WORK/$TEST_LIST
|
tomwalters@301
|
231 exec 7> $WORK/$TRAIN_SCRIPT
|
tomwalters@301
|
232 exec 8> $WORK/$TEST_SCRIPT
|
tomwalters@301
|
233 for syllable in $(cat $WORK/$SYLLIST); do
|
tomwalters@301
|
234 for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do
|
tomwalters@301
|
235 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
|
tomwalters@301
|
236 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
|
tomwalters@301
|
237 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3
|
tomwalters@301
|
238 echo "'${DEST_FILENAME}.htk'" >&7
|
tomwalters@301
|
239 echo "'\"${DEST_FILENAME}.lab\"'" >&4
|
tomwalters@301
|
240 echo "$SILENCE" >&4
|
tomwalters@301
|
241 echo $syllable >&4
|
tomwalters@301
|
242 echo "$SILENCE" >&4
|
tomwalters@301
|
243 echo "." >&4
|
tomwalters@301
|
244 done
|
tomwalters@301
|
245 for speaker in $(cat $WORK/$TEST_SPEAKERS); do
|
tomwalters@301
|
246 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
|
tomwalters@301
|
247 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
|
tomwalters@301
|
248 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5
|
tomwalters@301
|
249 echo "'${DEST_FILENAME}.htk'" >&8
|
tomwalters@301
|
250 echo "'\"${DEST_FILENAME}.lab\"'" >&6
|
tomwalters@301
|
251 echo "$SILENCE" >&6
|
tomwalters@301
|
252 echo $syllable >&6
|
tomwalters@301
|
253 echo "$SILENCE" >&6
|
tomwalters@301
|
254 echo "." >&6
|
tomwalters@301
|
255 done
|
tomwalters@301
|
256 done
|
tomwalters@301
|
257 exec 3>&-
|
tomwalters@301
|
258 exec 4>&-
|
tomwalters@301
|
259 exec 5>&-
|
tomwalters@301
|
260 exec 6>&-
|
tomwalters@301
|
261 exec 7>&-
|
tomwalters@301
|
262 exec 8>&-
|
tomwalters@301
|
263 fi
|
tomwalters@301
|
264
|
tomwalters@301
|
265 if [ "$skip_features" ]
|
tomwalters@301
|
266 then
|
tomwalters@301
|
267 echo "Skipping feature generation"
|
tomwalters@301
|
268 else
|
tomwalters@301
|
269 # Make the necessary directories for the computed features
|
tomwalters@301
|
270 echo "Making directory structure..."
|
tomwalters@301
|
271 mkdir $WORK/$FEATURES_DIR
|
tomwalters@301
|
272 for syllable in $(cat $WORK/$SYLLIST); do
|
tomwalters@301
|
273 mkdir $WORK/$FEATURES_DIR/$syllable
|
tomwalters@301
|
274 done
|
tomwalters@301
|
275
|
tomwalters@301
|
276 if [ "$MFCC_FEATURES" ]
|
tomwalters@301
|
277 then
|
tomwalters@301
|
278 # Write the HCopy config file
|
tomwalters@301
|
279 echo "Creating HCopy config file..."
|
tomwalters@301
|
280 cat <<"EOF" > $WORK/$HCOPY_CONFIG
|
tomwalters@301
|
281 # Coding parameters
|
tomwalters@301
|
282 SOURCEFORMAT= WAV
|
tomwalters@301
|
283 TARGETKIND = MFCC_0_D_A
|
tomwalters@301
|
284 TARGETRATE = 100000.0
|
tomwalters@301
|
285 SAVECOMPRESSED = T
|
tomwalters@301
|
286 SAVEWITHCRC = T
|
tomwalters@301
|
287 WINDOWSIZE = 250000.0
|
tomwalters@301
|
288 USEHAMMING = T
|
tomwalters@301
|
289 PREEMCOEF = 0.97
|
tomwalters@301
|
290 NUMCHANS = 200
|
tomwalters@301
|
291 CEPLIFTER = 22
|
tomwalters@301
|
292 NUMCEPS = 12
|
tomwalters@301
|
293 ENORMALISE = F
|
tomwalters@301
|
294 EOF
|
tomwalters@301
|
295 echo "Generating features for training..."
|
tomwalters@301
|
296 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $WORK/${TRAIN_LIST}
|
tomwalters@301
|
297
|
tomwalters@301
|
298 echo "Generating features for testing..."
|
tomwalters@301
|
299 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $WORK/${TEST_LIST}
|
tomwalters@301
|
300 else
|
tomwalters@301
|
301 # Write the AIMCopy config file
|
tomwalters@301
|
302 echo "Creating AIMCopy config file..."
|
tomwalters@301
|
303 cat <<"EOF" > $WORK/$AIMCOPY_CONFIG
|
tomwalters@301
|
304 input.buffersize=480
|
tomwalters@301
|
305 gtfb.channel_count=200
|
tomwalters@301
|
306 gtfb.min_frequency=86.0
|
tomwalters@301
|
307 gtfb.max_frequency=16000.0
|
tomwalters@301
|
308 nap.do_lowpass=true
|
tomwalters@301
|
309 nap.lowpass_cutoff=100.0
|
tomwalters@301
|
310 slice.temporal=false
|
tomwalters@301
|
311 slice.all=true
|
tomwalters@301
|
312 slice.normalize=true
|
tomwalters@301
|
313 EOF
|
tomwalters@301
|
314 if [ "$USE_MULTIPLE_MACHINES" ]
|
tomwalters@301
|
315 then
|
tomwalters@301
|
316 echo "Splitting data files..."
|
tomwalters@301
|
317 cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
|
tomwalters@301
|
318 total_cores=$(($MACHINE_COUNT*$MACHINE_CORES))
|
tomwalters@301
|
319 echo -n $total_cores
|
tomwalters@301
|
320 echo " cores available"
|
tomwalters@301
|
321 total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'`
|
tomwalters@301
|
322 echo -n $total_files
|
tomwalters@301
|
323 echo " files to process"
|
tomwalters@301
|
324 files_per_core=$(($total_files/$total_cores+1))
|
tomwalters@301
|
325 echo -n $files_per_core
|
tomwalters@301
|
326 echo " files per core"
|
tomwalters@301
|
327 split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list
|
tomwalters@301
|
328 splits=( $(ls $WORK/split_list*))
|
tomwalters@301
|
329 element=0
|
tomwalters@301
|
330 echo "Spawning tasks..."
|
tomwalters@301
|
331 for m in $MACHINE_LIST; do
|
tomwalters@301
|
332 for ((c=1;c<=$MACHINE_CORES;c+=1)); do
|
tomwalters@301
|
333 s=${splits[$element]}
|
tomwalters@301
|
334 echo "ssh $m \"cd HTK-AIM;${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s\" &"
|
tomwalters@301
|
335 #ssh $m "cd HTK-AIM;${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s" &
|
tomwalters@301
|
336 let element=element+1
|
tomwalters@301
|
337 done
|
tomwalters@301
|
338 done
|
tomwalters@301
|
339 echo "Waiting for tasks to complete..."
|
tomwalters@301
|
340 wait
|
tomwalters@301
|
341 else
|
tomwalters@301
|
342 echo "Generating features for training..."
|
tomwalters@301
|
343 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $WORK/${TRAIN_LIST} -D $WORK/$AIMCOPY_LOG_TRAIN
|
tomwalters@301
|
344
|
tomwalters@301
|
345 echo "Generating features for testing..."
|
tomwalters@301
|
346 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $WORK/${TEST_LIST} -D $WORK/$AIMCOPY_LOG_TEST
|
tomwalters@301
|
347 fi
|
tomwalters@301
|
348 fi
|
tomwalters@301
|
349 fi
|
tomwalters@301
|
350
|
tomwalters@301
|
351 if [ "$MFCC_FEATURES" ]
|
tomwalters@301
|
352 then
|
tomwalters@301
|
353 cat <<"EOF" > $WORK/$HMMCONFIG
|
tomwalters@301
|
354 # Coding parameters
|
tomwalters@301
|
355 SOURCEFORMAT= HTK
|
tomwalters@301
|
356 EOF
|
tomwalters@301
|
357 else
|
tomwalters@301
|
358 cat <<"EOF" > $WORK/$HMMCONFIG
|
tomwalters@301
|
359 # Coding parameters
|
tomwalters@301
|
360 SOURCEFORMAT= HTK
|
tomwalters@301
|
361 SOURCEKIND= USER_E
|
tomwalters@301
|
362 TARGETKIND = USER_E_D_A
|
tomwalters@301
|
363 EOF
|
tomwalters@301
|
364 fi
|
tomwalters@301
|
365
|
tomwalters@301
|
366 echo "Creating HMM structure..."
|
tomwalters@301
|
367 if [ "$MFCC_FEATURES" ]
|
tomwalters@301
|
368 then
|
tomwalters@301
|
369 cat <<"EOF" > $WORK/$HMMPROTO
|
tomwalters@301
|
370 ~o<VECSIZE> 39<NULLD><MFCC_0_D_A>
|
tomwalters@301
|
371 ~h "proto"
|
tomwalters@301
|
372 <BEGINHMM>
|
tomwalters@301
|
373 <NUMSTATES> 6
|
tomwalters@301
|
374 <State> 2
|
tomwalters@301
|
375 <Mean>39
|
tomwalters@301
|
376 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
tomwalters@301
|
377 <Variance> 39
|
tomwalters@301
|
378 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
|
tomwalters@301
|
379 <State> 3
|
tomwalters@301
|
380 <Mean>39
|
tomwalters@301
|
381 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
tomwalters@301
|
382 <Variance> 39
|
tomwalters@301
|
383 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
|
tomwalters@301
|
384 <State> 4
|
tomwalters@301
|
385 <Mean>39
|
tomwalters@301
|
386 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
tomwalters@301
|
387 <Variance> 39
|
tomwalters@301
|
388 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
|
tomwalters@301
|
389 <State> 5
|
tomwalters@301
|
390 <Mean>39
|
tomwalters@301
|
391 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
tomwalters@301
|
392 <Variance> 39
|
tomwalters@301
|
393 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
|
tomwalters@301
|
394
|
tomwalters@301
|
395 <TransP> 6
|
tomwalters@301
|
396 0.0 1.0 0.0 0.0 0.0 0.0
|
tomwalters@301
|
397 0.0 0.6 0.4 0.0 0.0 0.0
|
tomwalters@301
|
398 0.0 0.0 0.6 0.4 0.0 0.0
|
tomwalters@301
|
399 0.0 0.0 0.0 0.6 0.4 0.0
|
tomwalters@301
|
400 0.0 0.0 0.0 0.0 0.6 0.4
|
tomwalters@301
|
401 0.0 0.0 0.0 0.0 0.0 0.0
|
tomwalters@301
|
402 <EndHMM>
|
tomwalters@301
|
403 EOF
|
tomwalters@301
|
404 else
|
tomwalters@301
|
405 cat <<"EOF" > $WORK/$HMMPROTO
|
tomwalters@301
|
406 ~o<VECSIZE> 12<NULLD><USER_E_D_A>
|
tomwalters@301
|
407 ~h "proto"
|
tomwalters@301
|
408 <BEGINHMM>
|
tomwalters@301
|
409 <NUMSTATES> 4
|
tomwalters@301
|
410 <State> 2
|
tomwalters@301
|
411 <Mean>12
|
tomwalters@301
|
412 0 0 0 0 0 0 0 0 0 0 0 0
|
tomwalters@301
|
413 <Variance> 12
|
tomwalters@301
|
414 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
|
tomwalters@301
|
415 <State> 3
|
tomwalters@301
|
416 <Mean>12
|
tomwalters@301
|
417 0 0 0 0 0 0 0 0 0 0 0 0
|
tomwalters@301
|
418 <Variance> 12
|
tomwalters@301
|
419 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
|
tomwalters@301
|
420 <TransP> 4
|
tomwalters@301
|
421 0.0 1.0 0.0 0.0
|
tomwalters@301
|
422 0.0 0.6 0.4 0.0
|
tomwalters@301
|
423 0.0 0.0 0.6 0.4
|
tomwalters@301
|
424 0.0 0.0 0.0 0.0
|
tomwalters@301
|
425 <EndHMM>
|
tomwalters@301
|
426 EOF
|
tomwalters@301
|
427 fi
|
tomwalters@301
|
428
|
tomwalters@301
|
429
|
tomwalters@301
|
430 echo "Training HMM..."
|
tomwalters@301
|
431 echo "Setting up prototype HMM..."
|
tomwalters@301
|
432 mkdir $WORK/hmm0
|
tomwalters@301
|
433 ${HTK_PREFIX}HCompV -C $WORK/$HMMCONFIG -f 0.01 -m -S $WORK/$TRAIN_SCRIPT -M $WORK/hmm0 $WORK/$HMMPROTO
|
tomwalters@301
|
434
|
tomwalters@301
|
435 echo "Generating HMM definitions..."
|
tomwalters@301
|
436 # Now take the prototype file from hmm0, and create the other HMM definitions from it
|
tomwalters@301
|
437 grep -A 9999 "<BEGINHMM>" $WORK/hmm0/$HMMPROTO > $WORK/hmm0/hmms
|
tomwalters@301
|
438 for syllable in $(cat $WORK/$SYLLIST_COMPLETE); do
|
tomwalters@301
|
439 echo "~h $syllable" >> $WORK/hmm0/hmmdefs
|
tomwalters@301
|
440 cat $WORK/hmm0/hmms >> $WORK/hmm0/hmmdefs
|
tomwalters@301
|
441 done
|
tomwalters@301
|
442 if [ "$MFCC_FEATURES" ]
|
tomwalters@301
|
443 then
|
tomwalters@301
|
444 echo -n "~o<STREAMINFO> 1 39<VECSIZE> 39<NULLD><MFCC_0_D_A><DIAGC>" > $WORK/hmm0/macros
|
tomwalters@301
|
445 else
|
tomwalters@301
|
446 echo -n "~o<STREAMINFO> 1 12<VECSIZE> 12<NULLD><USER_E_D_A><DIAGC>" > $WORK/hmm0/macros
|
tomwalters@301
|
447 fi
|
tomwalters@301
|
448 cat $WORK/hmm0/vFloors >> $WORK/hmm0/macros
|
tomwalters@301
|
449
|
tomwalters@301
|
450 echo "Adding output mixture components..."
|
tomwalters@301
|
451 cat <<"EOF" > $WORK/$HHED_SCRIPT
|
tomwalters@301
|
452 MU 4 {*.state[2].mix} MU 4 {*.state[3].mix} MU 4 {*.state[4].mix} MU 4 {*.state[5].mix}
|
tomwalters@301
|
453 EOF
|
tomwalters@301
|
454
|
tomwalters@301
|
455 ${HTK_PREFIX}HHEd -H $WORK/hmm0/macros -H $WORK/hmm0/hmmdefs $WORK/$HHED_SCRIPT $WORK/$SYLLIST_COMPLETE
|
tomwalters@301
|
456
|
tomwalters@301
|
457 for iter in 0 1 2 3 4 5 6 7 8 9; do
|
tomwalters@301
|
458 echo "Training iteration ${iter}..."
|
tomwalters@301
|
459 let "nextiter=$iter+1"
|
tomwalters@301
|
460 mkdir $WORK/hmm$nextiter
|
tomwalters@301
|
461 ${HTK_PREFIX}HERest -C $WORK/$HMMCONFIG -I $WORK/$TRAIN_MLF \
|
tomwalters@301
|
462 -t 250.0 150.0 1000.0 -S $WORK/$TRAIN_SCRIPT \
|
tomwalters@301
|
463 -H $WORK/hmm$iter/macros -H $WORK/hmm$iter/hmmdefs \
|
tomwalters@301
|
464 -M $WORK/hmm$nextiter $WORK/$SYLLIST_COMPLETE
|
tomwalters@301
|
465 done
|
tomwalters@301
|
466
|
tomwalters@301
|
467 echo "Testing..."
|
tomwalters@301
|
468 for iter in 9; do
|
tomwalters@301
|
469 ${HTK_PREFIX}HVite -H $WORK/hmm$iter/macros -H $WORK/hmm$iter/hmmdefs \
|
tomwalters@301
|
470 -C $WORK/$HMMCONFIG -S $WORK/$TEST_SCRIPT -i $WORK/$RECOUT \
|
tomwalters@301
|
471 -w $WORK/$WDNET -p 0.0 -s 5.0 $WORK/$DICT $WORK/$SYLLIST_COMPLETE
|
tomwalters@301
|
472 echo "Results from testing on iteration ${iter}..."
|
tomwalters@301
|
473 ${HTK_PREFIX}HResults -e "???" ${SILENCE} -I $WORK/$TEST_MLF $WORK/$SYLLIST_COMPLETE $WORK/$RECOUT
|
tomwalters@301
|
474 done
|
tomwalters@301
|
475
|
tomwalters@301
|
476 ${HTK_PREFIX}HResults -p -t -e "???" ${SILENCE} \
|
tomwalters@301
|
477 -I $WORK/$TEST_MLF $WORK/$SYLLIST_COMPLETE $WORK/$RECOUT > $WORK/$RESULTS_FILE
|
tomwalters@301
|
478
|
tomwalters@301
|
479 grep Aligned $WORK/$RESULTS_FILE | sed -E "s/.*\/..\/([a-z]{2})([0-9]{2,3}\.[0-9])p([0-9]{2,3}\.[0-9])s.*/\2 \3/" | sort | uniq -c > $WORK/$MISCLASSIFIED
|
tomwalters@301
|
480
|
tomwalters@301
|
481 echo "Final results, errors, and confusion matrix in file $WORK/$RESULTS_FILE"
|
tomwalters@301
|
482 echo "Statstics on misclassification in file $WORK/$MISCLASSIFIED"
|
tomwalters@301
|
483 echo "`wc -l $WORK/$SYLLIST` sounds in total"
|
tomwalters@301
|
484
|
tomwalters@301
|
485 if [ "$matlab_plot" ]
|
tomwalters@301
|
486 then
|
tomwalters@301
|
487 echo "Plotting results figure in MATLAB..."
|
tomwalters@301
|
488 cd matlab
|
tomwalters@301
|
489 /Applications/MATLAB_R2007b/bin/matlab -nojvm -nosplash -r "plot_results('../${WORK}/'); exit"
|
tomwalters@301
|
490 cd ..
|
tomwalters@301
|
491 fi |