annotate scripts/gen_features.sh @ 627:7c82250c8dd2

Just update AGC param generation.
author dicklyon@google.com
date Thu, 23 May 2013 04:28:43 +0000
parents ce361fbd55fd
children
rev   line source
tomwalters@33 1 #!/bin/bash
tomwalters@33 2 #
tomwalters@33 3 # Run the syllables database 'spider pattern' through AIM-C
tomwalters@33 4 #
tomwalters@33 5 # Copyright 2009-2010 University of Cambridge
tomwalters@33 6 # Author: Thomas Walters <tom@acousticscale.org>
tomwalters@33 7
tomwalters@33 8 # Source directory for all the sound files
tomwalters@39 9 #SOUND_SOURCE="/Users/Tom/Documents/Work/PhD/HTk-AIM/Sounds/"
tomwalters@41 10 #SOUND_SOURCE="/media/sounds-database/cnbh-sounds/"
tomwalters@41 11 SOUND_SOURCE="/mnt/sounds/snr_+0dB/"
tomwalters@33 12
tomwalters@33 13 # Location of the AIMCopy binary if not in the path
tomwalters@39 14 AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/"
tomwalters@33 15
tomwalters@41 16 MACHINE_CORES=8
tomwalters@33 17
tomwalters@43 18 # Set to true / 1 to enable MFCC features rather than AIM features
tomwalters@43 19 # (leave blank for AIM features)
tomwalters@43 20 MFCC_FEATURES=
tomwalters@43 21
tomwalters@33 22 # Names of various internal files and directories.
tomwalters@33 23 # Rename here if you don't like them for some reason.
tomwalters@33 24 SYLLIST=syls
tomwalters@33 25 TRAIN_SPEAKERS=train_speakers
tomwalters@33 26 TEST_SPEAKERS=test_speakers
tomwalters@33 27 WORK_PREFIX=snr
tomwalters@33 28 TRAIN_LIST=train.list
tomwalters@33 29 TEST_LIST=test.list
tomwalters@33 30 COMBINED_LIST=combined.list
tomwalters@33 31 FEATURES_DIR=features
tomwalters@33 32 AIMCOPY_CONFIG=aimcopy.cfg
tomwalters@43 33 HCOPY_CONFIG=hcopy.cfg
tomwalters@33 34 AIMCOPY_LOG_TRAIN=aimcopy_train.log
tomwalters@33 35 AIMCOPY_LOG_TEST=aimcopy_test.log
tomwalters@43 36 HTK_PREFIX=""
tomwalters@33 37
tomwalters@33 38
tomwalters@33 39 # The vowels and consonants that make up the CNBH database
tomwalters@33 40 VOWELS="a e i o u"
tomwalters@33 41 CONSONANTS="b d f g h k l m n p r s t v w x y z"
tomwalters@33 42 SILENCE="sil"
tomwalters@33 43
tomwalters@33 44 WORK=${WORK_PREFIX}`echo $1 | tr -d ' '`
tomwalters@33 45 mkdir -p $WORK
tomwalters@33 46
tomwalters@33 47 # Make a copy of this script in the experimental directory
tomwalters@33 48 cp -p $0 $WORK
tomwalters@33 49
tomwalters@33 50
tomwalters@33 51 echo "Generating syllable list..."
tomwalters@33 52
tomwalters@33 53 for v in $VOWELS; do
tomwalters@35 54 echo $v$v >> $WORK/$SYLLIST.tmp
tomwalters@33 55 for c in $CONSONANTS; do
tomwalters@35 56 echo $v$c >> $WORK/$SYLLIST.tmp
tomwalters@34 57 echo $c$v >> $WORK/$SYLLIST.tmp
tomwalters@33 58 done
tomwalters@33 59 done
tomwalters@33 60
tomwalters@33 61 # Sort the syllable list and delete the
tomwalters@33 62 # temporary, unsorted version
tomwalters@33 63 sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST
tomwalters@33 64
tomwalters@33 65
tomwalters@33 66 # Generate a list of filenames from the spoke pattern
tomwalters@33 67 cat <<"EOF" > $WORK/$TRAIN_SPEAKERS
tomwalters@33 68 170.9p112.2s100.0t+000itd
tomwalters@33 69 171.0p112.8s100.0t+000itd
tomwalters@33 70 171.3p111.7s100.0t+000itd
tomwalters@33 71 171.5p113.1s100.0t+000itd
tomwalters@33 72 171.9p111.5s100.0t+000itd
tomwalters@33 73 172.1p113.0s100.0t+000itd
tomwalters@33 74 172.4p111.9s100.0t+000itd
tomwalters@33 75 172.5p112.5s100.0t+000itd
tomwalters@33 76 EOF
tomwalters@33 77
tomwalters@33 78 cat <<"EOF" > $WORK/$TEST_SPEAKERS
tomwalters@33 79 137.0p104.3s100.0t+000itd
tomwalters@33 80 141.3p135.4s100.0t+000itd
tomwalters@33 81 145.5p106.3s100.0t+000itd
tomwalters@33 82 148.8p128.8s100.0t+000itd
tomwalters@33 83 151.6p83.9s100.0t+000itd
tomwalters@33 84 153.0p108.1s100.0t+000itd
tomwalters@33 85 155.5p123.5s100.0t+000itd
tomwalters@33 86 156.7p90.6s100.0t+000itd
tomwalters@33 87 159.5p109.6s100.0t+000itd
tomwalters@33 88 161.1p119.4s100.0t+000itd
tomwalters@33 89 161.1p96.8s100.0t+000itd
tomwalters@33 90 163.4p157.6s100.0t+000itd
tomwalters@33 91 164.7p110.8s100.0t+000itd
tomwalters@33 92 164.9p102.1s100.0t+000itd
tomwalters@33 93 165.6p144.0s100.0t+000itd
tomwalters@33 94 165.7p116.2s100.0t+000itd
tomwalters@33 95 167.4p133.5s100.0t+000itd
tomwalters@33 96 167.8p106.5s100.0t+000itd
tomwalters@33 97 168.6p111.6s100.0t+000itd
tomwalters@33 98 168.9p125.4s100.0t+000itd
tomwalters@33 99 169.0p114.0s100.0t+000itd
tomwalters@33 100 170.0p109.7s100.0t+000itd
tomwalters@33 101 170.1p119.5s100.0t+000itd
tomwalters@33 102 171.0p115.5s100.0t+000itd
tomwalters@33 103 171.7p112.3s100.0t+000itd
tomwalters@33 104 172.4p109.3s100.0t+000itd
tomwalters@33 105 173.3p105.6s100.0t+000itd
tomwalters@33 106 173.5p115.0s100.0t+000itd
tomwalters@33 107 174.5p100.6s100.0t+000itd
tomwalters@33 108 174.5p110.6s100.0t+000itd
tomwalters@33 109 174.9p113.0s100.0t+000itd
tomwalters@33 110 175.7p118.5s100.0t+000itd
tomwalters@33 111 176.1p94.5s100.0t+000itd
tomwalters@33 112 178.0p108.5s100.0t+000itd
tomwalters@33 113 178.1p87.6s100.0t+000itd
tomwalters@33 114 178.8p123.6s100.0t+000itd
tomwalters@33 115 179.0p113.9s100.0t+000itd
tomwalters@33 116 180.4p80.1s100.0t+000itd
tomwalters@33 117 183.0p105.7s100.0t+000itd
tomwalters@33 118 183.0p130.4s100.0t+000itd
tomwalters@33 119 184.8p115.1s100.0t+000itd
tomwalters@33 120 188.1p139.2s100.0t+000itd
tomwalters@33 121 189.6p102.1s100.0t+000itd
tomwalters@33 122 192.7p116.7s100.0t+000itd
tomwalters@33 123 194.5p150.4s100.0t+000itd
tomwalters@33 124 198.1p97.9s100.0t+000itd
tomwalters@33 125 202.7p118.6s100.0t+000itd
tomwalters@33 126 208.6p93.2s100.0t+000itd
tomwalters@33 127 215.2p121.0s100.0t+000itd
tomwalters@33 128 EOF
tomwalters@33 129
tomwalters@33 130 # Construct the conversion scripts for AIMCopy (or HCopy) and
tomwalters@33 131 # the master label files for the train and test sets
tomwalters@33 132
tomwalters@33 133 echo "Generating train and test scripts..."
tomwalters@33 134 if [ -a $WORK/$TRAIN_LIST ]
tomwalters@33 135 then
tomwalters@33 136 rm $WORK/$TRAIN_LIST
tomwalters@33 137 fi
tomwalters@33 138 if [ -a $WORK/$TEST_LIST ]
tomwalters@33 139 then
tomwalters@33 140 rm $WORK/$TEST_LIST
tomwalters@33 141 fi
tomwalters@33 142
tomwalters@33 143 exec 3> $WORK/$TRAIN_LIST
tomwalters@33 144 exec 5> $WORK/$TEST_LIST
tomwalters@33 145 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@33 146 for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do
tomwalters@33 147 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@33 148 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@33 149 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3
tomwalters@33 150 done
tomwalters@33 151 for speaker in $(cat $WORK/$TEST_SPEAKERS); do
tomwalters@33 152 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@33 153 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@33 154 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5
tomwalters@33 155 done
tomwalters@33 156 done
tomwalters@33 157 exec 3>&-
tomwalters@33 158 exec 5>&-
tomwalters@33 159
tomwalters@33 160 # Make the necessary directories for the computed features
tomwalters@33 161 echo "Making directory structure..."
tomwalters@33 162 mkdir $WORK/$FEATURES_DIR
tomwalters@33 163 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@33 164 mkdir $WORK/$FEATURES_DIR/$syllable
tomwalters@33 165 done
tomwalters@33 166
tomwalters@33 167 # Write the AIMCopy config file
tomwalters@33 168 echo "Creating AIMCopy config file..."
tomwalters@33 169 cat <<"EOF" > $WORK/$AIMCOPY_CONFIG
tomwalters@33 170 input.buffersize=480
tomwalters@33 171 sai.frame_period_ms=10.0
tomwalters@33 172 slice.normalize=true
tomwalters@33 173 ssi.weight_by_cutoff=false
tomwalters@33 174 ssi.weight_by_scaling=true
tomwalters@33 175 ssi.log_cycles_axis=true
tomwalters@33 176 EOF
tomwalters@33 177 echo "noise.level_db=$1" >> $WORK/$AIMCOPY_CONFIG
tomwalters@43 178
tomwalters@43 179 echo "Creating HCopy config file..."
tomwalters@43 180 cat <<"EOF" > $WORK/$HCOPY_CONFIG
tomwalters@43 181 # Coding parameters
tomwalters@43 182 SOURCEFORMAT= WAV
tomwalters@43 183 TARGETKIND = MFCC_0_D_A
tomwalters@43 184 TARGETRATE = 100000.0
tomwalters@43 185 SAVECOMPRESSED = T
tomwalters@43 186 SAVEWITHCRC = T
tomwalters@43 187 WINDOWSIZE = 250000.0
tomwalters@43 188 USEHAMMING = T
tomwalters@43 189 PREEMCOEF = 0.97
tomwalters@43 190 NUMCHANS = 200
tomwalters@43 191 CEPLIFTER = 22
tomwalters@43 192 NUMCEPS = 12
tomwalters@43 193 ENORMALISE = F
tomwalters@43 194 EOF
tomwalters@33 195
tomwalters@33 196 echo "Splitting data files..."
tomwalters@33 197 cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
tomwalters@33 198
tomwalters@33 199 total_cores=$(($MACHINE_CORES))
tomwalters@33 200 echo -n $total_cores
tomwalters@33 201 echo " cores available"
tomwalters@33 202 total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'`
tomwalters@33 203 echo -n $total_files
tomwalters@33 204 echo " files to process"
tomwalters@33 205 files_per_core=$(($total_files/$total_cores+1))
tomwalters@33 206 echo -n $files_per_core
tomwalters@33 207 echo " files per core"
tomwalters@33 208 split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list
tomwalters@33 209 splits=( $(ls $WORK/split_list*))
tomwalters@33 210 element=0
tomwalters@33 211 echo "Spawning tasks..."
tomwalters@33 212 for ((c=1;c<=$MACHINE_CORES;c+=1)); do
tomwalters@33 213 s=${splits[$element]}
tomwalters@43 214 if [ "$MFCC_FEATURES" ]
tomwalters@43 215 then
tomwalters@43 216 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $s &
tomwalters@43 217 else
tomwalters@43 218 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s &
tomwalters@43 219 fi
tomwalters@33 220 let element=element+1
tomwalters@33 221 done
tomwalters@33 222
tomwalters@33 223 echo "Waiting for tasks to complete..."
tomwalters@33 224 wait
tomwalters@33 225 echo "Done!"