tomwalters@335
|
1 #!/bin/bash
|
tomwalters@335
|
2 #
|
tomwalters@335
|
3
|
tomwalters@335
|
4 HCOPY=`which HCopy`
|
tomwalters@335
|
5 if [ $HCOPY == "" ]; then
|
tomwalters@335
|
6 echo "Please build HTK and make the binaries available in the path"
|
tomwalters@335
|
7 fi
|
tomwalters@335
|
8
|
tomwalters@335
|
9 set -e
|
tomwalters@335
|
10 set -u
|
tomwalters@335
|
11
|
tomwalters@335
|
12 FEATURES_DIR=$1
|
tomwalters@335
|
13 SOUND_SOURCE=$2
|
tomwalters@335
|
14
|
tomwalters@335
|
15 FULL_LIST=feature_generation_script
|
tomwalters@335
|
16
|
tomwalters@335
|
17 # Temporary file names
|
tomwalters@335
|
18 SYLLIST=syllable_list
|
tomwalters@335
|
19 TALKERS=talker_list
|
tomwalters@335
|
20
|
tomwalters@335
|
21 # The vowels and consonants that make up the CNBH database
|
tomwalters@335
|
22 VOWELS="a e i o u"
|
tomwalters@335
|
23 CONSONANTS="b d f g h k l m n p r s t v w x y z"
|
tomwalters@335
|
24
|
tomwalters@335
|
25 if [ ! -e $FEATURES_DIR/.features_script_success ]; then
|
tomwalters@335
|
26 mkdir -p $FEATURES_DIR
|
tomwalters@335
|
27
|
tomwalters@335
|
28 # Make the sets of VC, CV, and vowel only labels, plus silence and use them to
|
tomwalters@335
|
29 # generate the grammar, dictionary and list of syllables
|
tomwalters@335
|
30 if [ -a $FEATURES_DIR/$SYLLIST.tmp.tmp ]
|
tomwalters@335
|
31 then
|
tomwalters@335
|
32 rm $FEATURES_DIR/$SYLLIST.tmp.tmp
|
tomwalters@335
|
33 fi
|
tomwalters@335
|
34
|
tomwalters@335
|
35 for v in $VOWELS; do
|
tomwalters@335
|
36 for c in $CONSONANTS; do
|
tomwalters@335
|
37 echo $v$c >> $FEATURES_DIR/$SYLLIST.tmp.tmp
|
tomwalters@335
|
38 echo $c$v >> $FEATURES_DIR/$SYLLIST.tmp.tmp
|
tomwalters@335
|
39 done
|
tomwalters@335
|
40 done
|
tomwalters@335
|
41
|
tomwalters@335
|
42 # Sort the syllable list and delete the
|
tomwalters@335
|
43 # temporary, unsorted version
|
tomwalters@335
|
44 sort $FEATURES_DIR/$SYLLIST.tmp.tmp > $FEATURES_DIR/$SYLLIST.tmp
|
tomwalters@335
|
45 rm $FEATURES_DIR/$SYLLIST.tmp.tmp
|
tomwalters@335
|
46
|
tomwalters@335
|
47 cat <<"EOF" > $FEATURES_DIR/${TALKERS}.tmp
|
tomwalters@335
|
48 170.9p112.2s
|
tomwalters@335
|
49 171.0p112.8s
|
tomwalters@335
|
50 171.3p111.7s
|
tomwalters@335
|
51 171.5p113.1s
|
tomwalters@335
|
52 171.9p111.5s
|
tomwalters@335
|
53 172.1p113.0s
|
tomwalters@335
|
54 172.4p111.9s
|
tomwalters@335
|
55 172.5p112.5s
|
tomwalters@335
|
56 171.7p112.3s
|
tomwalters@335
|
57 137.0p104.3s
|
tomwalters@335
|
58 141.3p135.4s
|
tomwalters@335
|
59 145.5p106.3s
|
tomwalters@335
|
60 148.8p128.8s
|
tomwalters@335
|
61 151.6p83.9s
|
tomwalters@335
|
62 153.0p108.1s
|
tomwalters@335
|
63 155.5p123.5s
|
tomwalters@335
|
64 156.7p90.6s
|
tomwalters@335
|
65 159.5p109.6s
|
tomwalters@335
|
66 161.1p119.4s
|
tomwalters@335
|
67 161.1p96.8s
|
tomwalters@335
|
68 163.4p157.6s
|
tomwalters@335
|
69 164.7p110.8s
|
tomwalters@335
|
70 164.9p102.1s
|
tomwalters@335
|
71 165.6p144.0s
|
tomwalters@335
|
72 165.7p116.2s
|
tomwalters@335
|
73 167.4p133.5s
|
tomwalters@335
|
74 167.8p106.5s
|
tomwalters@335
|
75 168.6p111.6s
|
tomwalters@335
|
76 168.9p125.4s
|
tomwalters@335
|
77 169.0p114.0s
|
tomwalters@335
|
78 170.0p109.7s
|
tomwalters@335
|
79 170.1p119.5s
|
tomwalters@335
|
80 171.0p115.5s
|
tomwalters@335
|
81 172.4p109.3s
|
tomwalters@335
|
82 173.3p105.6s
|
tomwalters@335
|
83 173.5p115.0s
|
tomwalters@335
|
84 174.5p100.6s
|
tomwalters@335
|
85 174.5p110.6s
|
tomwalters@335
|
86 174.9p113.0s
|
tomwalters@335
|
87 175.7p118.5s
|
tomwalters@335
|
88 176.1p94.5s
|
tomwalters@335
|
89 178.0p108.5s
|
tomwalters@335
|
90 178.1p87.6s
|
tomwalters@335
|
91 178.8p123.6s
|
tomwalters@335
|
92 179.0p113.9s
|
tomwalters@335
|
93 180.4p80.1s
|
tomwalters@335
|
94 183.0p105.7s
|
tomwalters@335
|
95 183.0p130.4s
|
tomwalters@335
|
96 184.8p115.1s
|
tomwalters@335
|
97 188.1p139.2s
|
tomwalters@335
|
98 189.6p102.1s
|
tomwalters@335
|
99 192.7p116.7s
|
tomwalters@335
|
100 194.5p150.4s
|
tomwalters@335
|
101 198.1p97.9s
|
tomwalters@335
|
102 202.7p118.6s
|
tomwalters@335
|
103 208.6p93.2s
|
tomwalters@335
|
104 215.2p121.0s
|
tomwalters@335
|
105 EOF
|
tomwalters@335
|
106
|
tomwalters@335
|
107 echo "Generating script..."
|
tomwalters@335
|
108 exec 3> $FEATURES_DIR/$FULL_LIST
|
tomwalters@335
|
109 for syllable in $(cat $FEATURES_DIR/${SYLLIST}.tmp); do
|
tomwalters@335
|
110 for speaker in $(cat $FEATURES_DIR/${TALKERS}.tmp); do
|
tomwalters@335
|
111 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
|
tomwalters@335
|
112 DEST_FILENAME=$FEATURES_DIR/$syllable/${syllable}${speaker}.htk
|
tomwalters@335
|
113 echo "$SOURCE_FILENAME ${DEST_FILENAME}" >&3
|
tomwalters@335
|
114 done
|
tomwalters@335
|
115 done
|
tomwalters@335
|
116 exec 3>&-
|
tomwalters@335
|
117
|
tomwalters@335
|
118 # Make the necessary directories for the computed features
|
tomwalters@335
|
119 echo "Making directory structure..."
|
tomwalters@335
|
120 for syllable in $(cat $FEATURES_DIR/${SYLLIST}.tmp); do
|
tomwalters@335
|
121 mkdir -p $FEATURES_DIR/$syllable
|
tomwalters@335
|
122 done
|
tomwalters@335
|
123
|
tomwalters@335
|
124
|
tomwalters@335
|
125 echo "Creating HCopy config file..."
|
tomwalters@335
|
126 cat <<"EOF" > $FEATURES_DIR/${HCOPY_CONFIG}
|
tomwalters@335
|
127 # Coding parameters
|
tomwalters@335
|
128 SOURCEFORMAT= WAV
|
tomwalters@335
|
129 TARGETKIND = MFCC_0_D_A
|
tomwalters@335
|
130 TARGETRATE = 100000.0
|
tomwalters@335
|
131 SAVECOMPRESSED = T
|
tomwalters@335
|
132 SAVEWITHCRC = T
|
tomwalters@335
|
133 WINDOWSIZE = 250000.0
|
tomwalters@335
|
134 USEHAMMING = T
|
tomwalters@335
|
135 PREEMCOEF = 0.97
|
tomwalters@335
|
136 NUMCHANS = 200
|
tomwalters@335
|
137 CEPLIFTER = 22
|
tomwalters@335
|
138 NUMCEPS = 12
|
tomwalters@335
|
139 ENORMALISE = F
|
tomwalters@335
|
140 # Parameters a bit like Welling and Ney (2002)
|
tomwalters@335
|
141 # Can't do zero, it seems.
|
tomwalters@335
|
142 WARPLCUTOFF = 10
|
tomwalters@335
|
143 # Upper frequency is the Nyquist freq. (24000Hz)
|
tomwalters@335
|
144 # so choose the break freq. close to that
|
tomwalters@335
|
145 WARPUCUTOFF = 23000
|
tomwalters@335
|
146 EOF
|
tomwalters@335
|
147
|
tomwalters@335
|
148 #echo "Splitting data files..."
|
tomwalters@335
|
149 #cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
|
tomwalters@335
|
150
|
tomwalters@335
|
151 for TALKER in $(cat $FEATURES_DIR/$TALKERS); do
|
tomwalters@335
|
152 cp $FEATURES_DIR/${HCOPY_CONFIG} $FEATURES_DIR/${HCOPY_CONFIG}_$TALKER
|
tomwalters@335
|
153 scale=`echo $speaker | sed 's/.*p//' | sed 's/s.*//'`
|
tomwalters@335
|
154 warpfactor=0`echo "scale=4; 100.0/$scale" | bc`
|
tomwalters@335
|
155 echo "WARPFREQ = $warpfactor" >> $WORK/${HCOPY_CONFIG}_$TALKER
|
tomwalters@335
|
156 ${HTK_PREFIX}HCopy -T 1 -C $FEATURES_DIR/${HCOPY_CONFIG}_$TALKER -S $FEATURES_DIR/${TRAIN_LIST}_$TALKER &
|
tomwalters@335
|
157 done
|
tomwalters@335
|
158
|
tomwalters@335
|
159 echo "Waiting for tasks to complete..."
|
tomwalters@335
|
160 wait
|
tomwalters@335
|
161 echo "Done!"
|
tomwalters@335
|
162
|
tomwalters@335
|
163 rm $FEATURES_DIR/$SYLLIST.tmp
|
tomwalters@335
|
164 rm $FEATURES_DIR/${TALKERS}.tmp
|
tomwalters@335
|
165 touch $FEATURES_DIR/.features_script_success
|
tomwalters@335
|
166 fi |