tomwalters@306
|
1 #!/bin/bash
|
tomwalters@306
|
2 #
|
tomwalters@306
|
3 # Run the syllables database 'spider pattern' through AIM-C
|
tomwalters@306
|
4 #
|
tomwalters@306
|
5 # Copyright 2009-2010 University of Cambridge
|
tomwalters@306
|
6 # Author: Thomas Walters <tom@acousticscale.org>
|
tomwalters@306
|
7
|
tomwalters@306
|
8 # Source directory for all the sound files
|
tomwalters@312
|
9 #SOUND_SOURCE="/Users/Tom/Documents/Work/PhD/HTk-AIM/Sounds/"
|
tomwalters@314
|
10 #SOUND_SOURCE="/media/sounds-database/cnbh-sounds/"
|
tomwalters@314
|
11 SOUND_SOURCE="/mnt/sounds/snr_+0dB/"
|
tomwalters@306
|
12
|
tomwalters@306
|
13 # Location of the AIMCopy binary if not in the path
|
tomwalters@312
|
14 AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/"
|
tomwalters@306
|
15
|
tomwalters@314
|
16 MACHINE_CORES=8
|
tomwalters@306
|
17
|
tomwalters@316
|
18 # Set to true / 1 to enable MFCC features rather than AIM features
|
tomwalters@316
|
19 # (leave blank for AIM features)
|
tomwalters@316
|
20 MFCC_FEATURES=
|
tomwalters@316
|
21
|
tomwalters@306
|
22 # Names of various internal files and directories.
|
tomwalters@306
|
23 # Rename here if you don't like them for some reason.
|
tomwalters@306
|
24 SYLLIST=syls
|
tomwalters@306
|
25 TRAIN_SPEAKERS=train_speakers
|
tomwalters@306
|
26 TEST_SPEAKERS=test_speakers
|
tomwalters@306
|
27 WORK_PREFIX=snr
|
tomwalters@306
|
28 TRAIN_LIST=train.list
|
tomwalters@306
|
29 TEST_LIST=test.list
|
tomwalters@306
|
30 COMBINED_LIST=combined.list
|
tomwalters@306
|
31 FEATURES_DIR=features
|
tomwalters@306
|
32 AIMCOPY_CONFIG=aimcopy.cfg
|
tomwalters@316
|
33 HCOPY_CONFIG=hcopy.cfg
|
tomwalters@306
|
34 AIMCOPY_LOG_TRAIN=aimcopy_train.log
|
tomwalters@306
|
35 AIMCOPY_LOG_TEST=aimcopy_test.log
|
tomwalters@316
|
36 HTK_PREFIX=""
|
tomwalters@306
|
37
|
tomwalters@306
|
38
|
tomwalters@306
|
39 # The vowels and consonants that make up the CNBH database
|
tomwalters@306
|
40 VOWELS="a e i o u"
|
tomwalters@306
|
41 CONSONANTS="b d f g h k l m n p r s t v w x y z"
|
tomwalters@306
|
42 SILENCE="sil"
|
tomwalters@306
|
43
|
tomwalters@306
|
44 WORK=${WORK_PREFIX}`echo $1 | tr -d ' '`
|
tomwalters@306
|
45 mkdir -p $WORK
|
tomwalters@306
|
46
|
tomwalters@306
|
47 # Make a copy of this script in the experimental directory
|
tomwalters@306
|
48 cp -p $0 $WORK
|
tomwalters@306
|
49
|
tomwalters@306
|
50
|
tomwalters@306
|
51 echo "Generating syllable list..."
|
tomwalters@306
|
52
|
tomwalters@306
|
53 for v in $VOWELS; do
|
tomwalters@308
|
54 echo $v$v >> $WORK/$SYLLIST.tmp
|
tomwalters@306
|
55 for c in $CONSONANTS; do
|
tomwalters@308
|
56 echo $v$c >> $WORK/$SYLLIST.tmp
|
tomwalters@307
|
57 echo $c$v >> $WORK/$SYLLIST.tmp
|
tomwalters@306
|
58 done
|
tomwalters@306
|
59 done
|
tomwalters@306
|
60
|
tomwalters@306
|
61 # Sort the syllable list and delete the
|
tomwalters@306
|
62 # temporary, unsorted version
|
tomwalters@306
|
63 sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST
|
tomwalters@306
|
64
|
tomwalters@306
|
65
|
tomwalters@306
|
66 # Generate a list of filenames from the spoke pattern
|
tomwalters@306
|
67 cat <<"EOF" > $WORK/$TRAIN_SPEAKERS
|
tomwalters@306
|
68 170.9p112.2s100.0t+000itd
|
tomwalters@306
|
69 171.0p112.8s100.0t+000itd
|
tomwalters@306
|
70 171.3p111.7s100.0t+000itd
|
tomwalters@306
|
71 171.5p113.1s100.0t+000itd
|
tomwalters@306
|
72 171.9p111.5s100.0t+000itd
|
tomwalters@306
|
73 172.1p113.0s100.0t+000itd
|
tomwalters@306
|
74 172.4p111.9s100.0t+000itd
|
tomwalters@306
|
75 172.5p112.5s100.0t+000itd
|
tomwalters@306
|
76 EOF
|
tomwalters@306
|
77
|
tomwalters@306
|
78 cat <<"EOF" > $WORK/$TEST_SPEAKERS
|
tomwalters@306
|
79 137.0p104.3s100.0t+000itd
|
tomwalters@306
|
80 141.3p135.4s100.0t+000itd
|
tomwalters@306
|
81 145.5p106.3s100.0t+000itd
|
tomwalters@306
|
82 148.8p128.8s100.0t+000itd
|
tomwalters@306
|
83 151.6p83.9s100.0t+000itd
|
tomwalters@306
|
84 153.0p108.1s100.0t+000itd
|
tomwalters@306
|
85 155.5p123.5s100.0t+000itd
|
tomwalters@306
|
86 156.7p90.6s100.0t+000itd
|
tomwalters@306
|
87 159.5p109.6s100.0t+000itd
|
tomwalters@306
|
88 161.1p119.4s100.0t+000itd
|
tomwalters@306
|
89 161.1p96.8s100.0t+000itd
|
tomwalters@306
|
90 163.4p157.6s100.0t+000itd
|
tomwalters@306
|
91 164.7p110.8s100.0t+000itd
|
tomwalters@306
|
92 164.9p102.1s100.0t+000itd
|
tomwalters@306
|
93 165.6p144.0s100.0t+000itd
|
tomwalters@306
|
94 165.7p116.2s100.0t+000itd
|
tomwalters@306
|
95 167.4p133.5s100.0t+000itd
|
tomwalters@306
|
96 167.8p106.5s100.0t+000itd
|
tomwalters@306
|
97 168.6p111.6s100.0t+000itd
|
tomwalters@306
|
98 168.9p125.4s100.0t+000itd
|
tomwalters@306
|
99 169.0p114.0s100.0t+000itd
|
tomwalters@306
|
100 170.0p109.7s100.0t+000itd
|
tomwalters@306
|
101 170.1p119.5s100.0t+000itd
|
tomwalters@306
|
102 171.0p115.5s100.0t+000itd
|
tomwalters@306
|
103 171.7p112.3s100.0t+000itd
|
tomwalters@306
|
104 172.4p109.3s100.0t+000itd
|
tomwalters@306
|
105 173.3p105.6s100.0t+000itd
|
tomwalters@306
|
106 173.5p115.0s100.0t+000itd
|
tomwalters@306
|
107 174.5p100.6s100.0t+000itd
|
tomwalters@306
|
108 174.5p110.6s100.0t+000itd
|
tomwalters@306
|
109 174.9p113.0s100.0t+000itd
|
tomwalters@306
|
110 175.7p118.5s100.0t+000itd
|
tomwalters@306
|
111 176.1p94.5s100.0t+000itd
|
tomwalters@306
|
112 178.0p108.5s100.0t+000itd
|
tomwalters@306
|
113 178.1p87.6s100.0t+000itd
|
tomwalters@306
|
114 178.8p123.6s100.0t+000itd
|
tomwalters@306
|
115 179.0p113.9s100.0t+000itd
|
tomwalters@306
|
116 180.4p80.1s100.0t+000itd
|
tomwalters@306
|
117 183.0p105.7s100.0t+000itd
|
tomwalters@306
|
118 183.0p130.4s100.0t+000itd
|
tomwalters@306
|
119 184.8p115.1s100.0t+000itd
|
tomwalters@306
|
120 188.1p139.2s100.0t+000itd
|
tomwalters@306
|
121 189.6p102.1s100.0t+000itd
|
tomwalters@306
|
122 192.7p116.7s100.0t+000itd
|
tomwalters@306
|
123 194.5p150.4s100.0t+000itd
|
tomwalters@306
|
124 198.1p97.9s100.0t+000itd
|
tomwalters@306
|
125 202.7p118.6s100.0t+000itd
|
tomwalters@306
|
126 208.6p93.2s100.0t+000itd
|
tomwalters@306
|
127 215.2p121.0s100.0t+000itd
|
tomwalters@306
|
128 EOF
|
tomwalters@306
|
129
|
tomwalters@306
|
130 # Construct the conversion scripts for AIMCopy (or HCopy) and
|
tomwalters@306
|
131 # the master label files for the train and test sets
|
tomwalters@306
|
132
|
tomwalters@306
|
133 echo "Generating train and test scripts..."
|
tomwalters@306
|
134 if [ -a $WORK/$TRAIN_LIST ]
|
tomwalters@306
|
135 then
|
tomwalters@306
|
136 rm $WORK/$TRAIN_LIST
|
tomwalters@306
|
137 fi
|
tomwalters@306
|
138 if [ -a $WORK/$TEST_LIST ]
|
tomwalters@306
|
139 then
|
tomwalters@306
|
140 rm $WORK/$TEST_LIST
|
tomwalters@306
|
141 fi
|
tomwalters@306
|
142
|
tomwalters@306
|
143 exec 3> $WORK/$TRAIN_LIST
|
tomwalters@306
|
144 exec 5> $WORK/$TEST_LIST
|
tomwalters@306
|
145 for syllable in $(cat $WORK/$SYLLIST); do
|
tomwalters@306
|
146 for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do
|
tomwalters@306
|
147 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
|
tomwalters@306
|
148 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
|
tomwalters@306
|
149 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3
|
tomwalters@306
|
150 done
|
tomwalters@306
|
151 for speaker in $(cat $WORK/$TEST_SPEAKERS); do
|
tomwalters@306
|
152 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
|
tomwalters@306
|
153 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
|
tomwalters@306
|
154 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5
|
tomwalters@306
|
155 done
|
tomwalters@306
|
156 done
|
tomwalters@306
|
157 exec 3>&-
|
tomwalters@306
|
158 exec 5>&-
|
tomwalters@306
|
159
|
tomwalters@306
|
160 # Make the necessary directories for the computed features
|
tomwalters@306
|
161 echo "Making directory structure..."
|
tomwalters@306
|
162 mkdir $WORK/$FEATURES_DIR
|
tomwalters@306
|
163 for syllable in $(cat $WORK/$SYLLIST); do
|
tomwalters@306
|
164 mkdir $WORK/$FEATURES_DIR/$syllable
|
tomwalters@306
|
165 done
|
tomwalters@306
|
166
|
tomwalters@306
|
167 # Write the AIMCopy config file
|
tomwalters@306
|
168 echo "Creating AIMCopy config file..."
|
tomwalters@306
|
169 cat <<"EOF" > $WORK/$AIMCOPY_CONFIG
|
tomwalters@306
|
170 input.buffersize=480
|
tomwalters@306
|
171 sai.frame_period_ms=10.0
|
tomwalters@306
|
172 slice.normalize=true
|
tomwalters@306
|
173 ssi.weight_by_cutoff=false
|
tomwalters@306
|
174 ssi.weight_by_scaling=true
|
tomwalters@306
|
175 ssi.log_cycles_axis=true
|
tomwalters@306
|
176 EOF
|
tomwalters@306
|
177 echo "noise.level_db=$1" >> $WORK/$AIMCOPY_CONFIG
|
tomwalters@316
|
178
|
tomwalters@316
|
179 echo "Creating HCopy config file..."
|
tomwalters@316
|
180 cat <<"EOF" > $WORK/$HCOPY_CONFIG
|
tomwalters@316
|
181 # Coding parameters
|
tomwalters@316
|
182 SOURCEFORMAT= WAV
|
tomwalters@316
|
183 TARGETKIND = MFCC_0_D_A
|
tomwalters@316
|
184 TARGETRATE = 100000.0
|
tomwalters@316
|
185 SAVECOMPRESSED = T
|
tomwalters@316
|
186 SAVEWITHCRC = T
|
tomwalters@316
|
187 WINDOWSIZE = 250000.0
|
tomwalters@316
|
188 USEHAMMING = T
|
tomwalters@316
|
189 PREEMCOEF = 0.97
|
tomwalters@316
|
190 NUMCHANS = 200
|
tomwalters@316
|
191 CEPLIFTER = 22
|
tomwalters@316
|
192 NUMCEPS = 12
|
tomwalters@316
|
193 ENORMALISE = F
|
tomwalters@316
|
194 EOF
|
tomwalters@306
|
195
|
tomwalters@306
|
196 echo "Splitting data files..."
|
tomwalters@306
|
197 cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
|
tomwalters@306
|
198
|
tomwalters@306
|
199 total_cores=$(($MACHINE_CORES))
|
tomwalters@306
|
200 echo -n $total_cores
|
tomwalters@306
|
201 echo " cores available"
|
tomwalters@306
|
202 total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'`
|
tomwalters@306
|
203 echo -n $total_files
|
tomwalters@306
|
204 echo " files to process"
|
tomwalters@306
|
205 files_per_core=$(($total_files/$total_cores+1))
|
tomwalters@306
|
206 echo -n $files_per_core
|
tomwalters@306
|
207 echo " files per core"
|
tomwalters@306
|
208 split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list
|
tomwalters@306
|
209 splits=( $(ls $WORK/split_list*))
|
tomwalters@306
|
210 element=0
|
tomwalters@306
|
211 echo "Spawning tasks..."
|
tomwalters@306
|
212 for ((c=1;c<=$MACHINE_CORES;c+=1)); do
|
tomwalters@306
|
213 s=${splits[$element]}
|
tomwalters@316
|
214 if [ "$MFCC_FEATURES" ]
|
tomwalters@316
|
215 then
|
tomwalters@316
|
216 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $s &
|
tomwalters@316
|
217 else
|
tomwalters@316
|
218 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s &
|
tomwalters@316
|
219 fi
|
tomwalters@306
|
220 let element=element+1
|
tomwalters@306
|
221 done
|
tomwalters@306
|
222
|
tomwalters@306
|
223 echo "Waiting for tasks to complete..."
|
tomwalters@306
|
224 wait
|
tomwalters@306
|
225 echo "Done!"
|