annotate scripts/gen_features.sh @ 610:01986636257a

Second check-in of Alex Brandmeyer's C++ implementation of CARFAC. Addressed style issues and completed implementation of remaining functions. Still needs proper testing of the output stages against the MATLAB version, and runtime functions need improvements in efficiency.
author alexbrandmeyer
date Thu, 16 May 2013 17:33:23 +0000
parents ce361fbd55fd
children
rev   line source
tomwalters@33 1 #!/bin/bash
tomwalters@33 2 #
tomwalters@33 3 # Run the syllables database 'spider pattern' through AIM-C
tomwalters@33 4 #
tomwalters@33 5 # Copyright 2009-2010 University of Cambridge
tomwalters@33 6 # Author: Thomas Walters <tom@acousticscale.org>
tomwalters@33 7
tomwalters@33 8 # Source directory for all the sound files
tomwalters@39 9 #SOUND_SOURCE="/Users/Tom/Documents/Work/PhD/HTk-AIM/Sounds/"
tomwalters@41 10 #SOUND_SOURCE="/media/sounds-database/cnbh-sounds/"
tomwalters@41 11 SOUND_SOURCE="/mnt/sounds/snr_+0dB/"
tomwalters@33 12
tomwalters@33 13 # Location of the AIMCopy binary if not in the path
tomwalters@39 14 AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/"
tomwalters@33 15
tomwalters@41 16 MACHINE_CORES=8
tomwalters@33 17
tomwalters@43 18 # Set to true / 1 to enable MFCC features rather than AIM features
tomwalters@43 19 # (leave blank for AIM features)
tomwalters@43 20 MFCC_FEATURES=
tomwalters@43 21
tomwalters@33 22 # Names of various internal files and directories.
tomwalters@33 23 # Rename here if you don't like them for some reason.
tomwalters@33 24 SYLLIST=syls
tomwalters@33 25 TRAIN_SPEAKERS=train_speakers
tomwalters@33 26 TEST_SPEAKERS=test_speakers
tomwalters@33 27 WORK_PREFIX=snr
tomwalters@33 28 TRAIN_LIST=train.list
tomwalters@33 29 TEST_LIST=test.list
tomwalters@33 30 COMBINED_LIST=combined.list
tomwalters@33 31 FEATURES_DIR=features
tomwalters@33 32 AIMCOPY_CONFIG=aimcopy.cfg
tomwalters@43 33 HCOPY_CONFIG=hcopy.cfg
tomwalters@33 34 AIMCOPY_LOG_TRAIN=aimcopy_train.log
tomwalters@33 35 AIMCOPY_LOG_TEST=aimcopy_test.log
tomwalters@43 36 HTK_PREFIX=""
tomwalters@33 37
tomwalters@33 38
tomwalters@33 39 # The vowels and consonants that make up the CNBH database
tomwalters@33 40 VOWELS="a e i o u"
tomwalters@33 41 CONSONANTS="b d f g h k l m n p r s t v w x y z"
tomwalters@33 42 SILENCE="sil"
tomwalters@33 43
tomwalters@33 44 WORK=${WORK_PREFIX}`echo $1 | tr -d ' '`
tomwalters@33 45 mkdir -p $WORK
tomwalters@33 46
tomwalters@33 47 # Make a copy of this script in the experimental directory
tomwalters@33 48 cp -p $0 $WORK
tomwalters@33 49
tomwalters@33 50
tomwalters@33 51 echo "Generating syllable list..."
tomwalters@33 52
tomwalters@33 53 for v in $VOWELS; do
tomwalters@35 54 echo $v$v >> $WORK/$SYLLIST.tmp
tomwalters@33 55 for c in $CONSONANTS; do
tomwalters@35 56 echo $v$c >> $WORK/$SYLLIST.tmp
tomwalters@34 57 echo $c$v >> $WORK/$SYLLIST.tmp
tomwalters@33 58 done
tomwalters@33 59 done
tomwalters@33 60
tomwalters@33 61 # Sort the syllable list and delete the
tomwalters@33 62 # temporary, unsorted version
tomwalters@33 63 sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST
tomwalters@33 64
tomwalters@33 65
tomwalters@33 66 # Generate a list of filenames from the spoke pattern
tomwalters@33 67 cat <<"EOF" > $WORK/$TRAIN_SPEAKERS
tomwalters@33 68 170.9p112.2s100.0t+000itd
tomwalters@33 69 171.0p112.8s100.0t+000itd
tomwalters@33 70 171.3p111.7s100.0t+000itd
tomwalters@33 71 171.5p113.1s100.0t+000itd
tomwalters@33 72 171.9p111.5s100.0t+000itd
tomwalters@33 73 172.1p113.0s100.0t+000itd
tomwalters@33 74 172.4p111.9s100.0t+000itd
tomwalters@33 75 172.5p112.5s100.0t+000itd
tomwalters@33 76 EOF
tomwalters@33 77
tomwalters@33 78 cat <<"EOF" > $WORK/$TEST_SPEAKERS
tomwalters@33 79 137.0p104.3s100.0t+000itd
tomwalters@33 80 141.3p135.4s100.0t+000itd
tomwalters@33 81 145.5p106.3s100.0t+000itd
tomwalters@33 82 148.8p128.8s100.0t+000itd
tomwalters@33 83 151.6p83.9s100.0t+000itd
tomwalters@33 84 153.0p108.1s100.0t+000itd
tomwalters@33 85 155.5p123.5s100.0t+000itd
tomwalters@33 86 156.7p90.6s100.0t+000itd
tomwalters@33 87 159.5p109.6s100.0t+000itd
tomwalters@33 88 161.1p119.4s100.0t+000itd
tomwalters@33 89 161.1p96.8s100.0t+000itd
tomwalters@33 90 163.4p157.6s100.0t+000itd
tomwalters@33 91 164.7p110.8s100.0t+000itd
tomwalters@33 92 164.9p102.1s100.0t+000itd
tomwalters@33 93 165.6p144.0s100.0t+000itd
tomwalters@33 94 165.7p116.2s100.0t+000itd
tomwalters@33 95 167.4p133.5s100.0t+000itd
tomwalters@33 96 167.8p106.5s100.0t+000itd
tomwalters@33 97 168.6p111.6s100.0t+000itd
tomwalters@33 98 168.9p125.4s100.0t+000itd
tomwalters@33 99 169.0p114.0s100.0t+000itd
tomwalters@33 100 170.0p109.7s100.0t+000itd
tomwalters@33 101 170.1p119.5s100.0t+000itd
tomwalters@33 102 171.0p115.5s100.0t+000itd
tomwalters@33 103 171.7p112.3s100.0t+000itd
tomwalters@33 104 172.4p109.3s100.0t+000itd
tomwalters@33 105 173.3p105.6s100.0t+000itd
tomwalters@33 106 173.5p115.0s100.0t+000itd
tomwalters@33 107 174.5p100.6s100.0t+000itd
tomwalters@33 108 174.5p110.6s100.0t+000itd
tomwalters@33 109 174.9p113.0s100.0t+000itd
tomwalters@33 110 175.7p118.5s100.0t+000itd
tomwalters@33 111 176.1p94.5s100.0t+000itd
tomwalters@33 112 178.0p108.5s100.0t+000itd
tomwalters@33 113 178.1p87.6s100.0t+000itd
tomwalters@33 114 178.8p123.6s100.0t+000itd
tomwalters@33 115 179.0p113.9s100.0t+000itd
tomwalters@33 116 180.4p80.1s100.0t+000itd
tomwalters@33 117 183.0p105.7s100.0t+000itd
tomwalters@33 118 183.0p130.4s100.0t+000itd
tomwalters@33 119 184.8p115.1s100.0t+000itd
tomwalters@33 120 188.1p139.2s100.0t+000itd
tomwalters@33 121 189.6p102.1s100.0t+000itd
tomwalters@33 122 192.7p116.7s100.0t+000itd
tomwalters@33 123 194.5p150.4s100.0t+000itd
tomwalters@33 124 198.1p97.9s100.0t+000itd
tomwalters@33 125 202.7p118.6s100.0t+000itd
tomwalters@33 126 208.6p93.2s100.0t+000itd
tomwalters@33 127 215.2p121.0s100.0t+000itd
tomwalters@33 128 EOF
tomwalters@33 129
tomwalters@33 130 # Construct the conversion scripts for AIMCopy (or HCopy) and
tomwalters@33 131 # the master label files for the train and test sets
tomwalters@33 132
tomwalters@33 133 echo "Generating train and test scripts..."
tomwalters@33 134 if [ -a $WORK/$TRAIN_LIST ]
tomwalters@33 135 then
tomwalters@33 136 rm $WORK/$TRAIN_LIST
tomwalters@33 137 fi
tomwalters@33 138 if [ -a $WORK/$TEST_LIST ]
tomwalters@33 139 then
tomwalters@33 140 rm $WORK/$TEST_LIST
tomwalters@33 141 fi
tomwalters@33 142
tomwalters@33 143 exec 3> $WORK/$TRAIN_LIST
tomwalters@33 144 exec 5> $WORK/$TEST_LIST
tomwalters@33 145 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@33 146 for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do
tomwalters@33 147 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@33 148 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@33 149 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3
tomwalters@33 150 done
tomwalters@33 151 for speaker in $(cat $WORK/$TEST_SPEAKERS); do
tomwalters@33 152 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@33 153 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@33 154 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5
tomwalters@33 155 done
tomwalters@33 156 done
tomwalters@33 157 exec 3>&-
tomwalters@33 158 exec 5>&-
tomwalters@33 159
tomwalters@33 160 # Make the necessary directories for the computed features
tomwalters@33 161 echo "Making directory structure..."
tomwalters@33 162 mkdir $WORK/$FEATURES_DIR
tomwalters@33 163 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@33 164 mkdir $WORK/$FEATURES_DIR/$syllable
tomwalters@33 165 done
tomwalters@33 166
tomwalters@33 167 # Write the AIMCopy config file
tomwalters@33 168 echo "Creating AIMCopy config file..."
tomwalters@33 169 cat <<"EOF" > $WORK/$AIMCOPY_CONFIG
tomwalters@33 170 input.buffersize=480
tomwalters@33 171 sai.frame_period_ms=10.0
tomwalters@33 172 slice.normalize=true
tomwalters@33 173 ssi.weight_by_cutoff=false
tomwalters@33 174 ssi.weight_by_scaling=true
tomwalters@33 175 ssi.log_cycles_axis=true
tomwalters@33 176 EOF
tomwalters@33 177 echo "noise.level_db=$1" >> $WORK/$AIMCOPY_CONFIG
tomwalters@43 178
tomwalters@43 179 echo "Creating HCopy config file..."
tomwalters@43 180 cat <<"EOF" > $WORK/$HCOPY_CONFIG
tomwalters@43 181 # Coding parameters
tomwalters@43 182 SOURCEFORMAT= WAV
tomwalters@43 183 TARGETKIND = MFCC_0_D_A
tomwalters@43 184 TARGETRATE = 100000.0
tomwalters@43 185 SAVECOMPRESSED = T
tomwalters@43 186 SAVEWITHCRC = T
tomwalters@43 187 WINDOWSIZE = 250000.0
tomwalters@43 188 USEHAMMING = T
tomwalters@43 189 PREEMCOEF = 0.97
tomwalters@43 190 NUMCHANS = 200
tomwalters@43 191 CEPLIFTER = 22
tomwalters@43 192 NUMCEPS = 12
tomwalters@43 193 ENORMALISE = F
tomwalters@43 194 EOF
tomwalters@33 195
tomwalters@33 196 echo "Splitting data files..."
tomwalters@33 197 cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
tomwalters@33 198
tomwalters@33 199 total_cores=$(($MACHINE_CORES))
tomwalters@33 200 echo -n $total_cores
tomwalters@33 201 echo " cores available"
tomwalters@33 202 total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'`
tomwalters@33 203 echo -n $total_files
tomwalters@33 204 echo " files to process"
tomwalters@33 205 files_per_core=$(($total_files/$total_cores+1))
tomwalters@33 206 echo -n $files_per_core
tomwalters@33 207 echo " files per core"
tomwalters@33 208 split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list
tomwalters@33 209 splits=( $(ls $WORK/split_list*))
tomwalters@33 210 element=0
tomwalters@33 211 echo "Spawning tasks..."
tomwalters@33 212 for ((c=1;c<=$MACHINE_CORES;c+=1)); do
tomwalters@33 213 s=${splits[$element]}
tomwalters@43 214 if [ "$MFCC_FEATURES" ]
tomwalters@43 215 then
tomwalters@43 216 ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $s &
tomwalters@43 217 else
tomwalters@43 218 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s &
tomwalters@43 219 fi
tomwalters@33 220 let element=element+1
tomwalters@33 221 done
tomwalters@33 222
tomwalters@33 223 echo "Waiting for tasks to complete..."
tomwalters@33 224 wait
tomwalters@33 225 echo "Done!"