tomwalters@33: #!/bin/bash tomwalters@33: # tomwalters@33: # Run the syllables database 'spider pattern' through AIM-C tomwalters@33: # tomwalters@33: # Copyright 2009-2010 University of Cambridge tomwalters@33: # Author: Thomas Walters tomwalters@33: tomwalters@33: # Source directory for all the sound files tomwalters@39: #SOUND_SOURCE="/Users/Tom/Documents/Work/PhD/HTk-AIM/Sounds/" tomwalters@41: #SOUND_SOURCE="/media/sounds-database/cnbh-sounds/" tomwalters@41: SOUND_SOURCE="/mnt/sounds/snr_+0dB/" tomwalters@33: tomwalters@33: # Location of the AIMCopy binary if not in the path tomwalters@39: AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/" tomwalters@33: tomwalters@41: MACHINE_CORES=8 tomwalters@33: tomwalters@43: # Set to true / 1 to enable MFCC features rather than AIM features tomwalters@43: # (leave blank for AIM features) tomwalters@43: MFCC_FEATURES= tomwalters@43: tomwalters@33: # Names of various internal files and directories. tomwalters@33: # Rename here if you don't like them for some reason. tomwalters@33: SYLLIST=syls tomwalters@33: TRAIN_SPEAKERS=train_speakers tomwalters@33: TEST_SPEAKERS=test_speakers tomwalters@33: WORK_PREFIX=snr tomwalters@33: TRAIN_LIST=train.list tomwalters@33: TEST_LIST=test.list tomwalters@33: COMBINED_LIST=combined.list tomwalters@33: FEATURES_DIR=features tomwalters@33: AIMCOPY_CONFIG=aimcopy.cfg tomwalters@43: HCOPY_CONFIG=hcopy.cfg tomwalters@33: AIMCOPY_LOG_TRAIN=aimcopy_train.log tomwalters@33: AIMCOPY_LOG_TEST=aimcopy_test.log tomwalters@43: HTK_PREFIX="" tomwalters@33: tomwalters@33: tomwalters@33: # The vowels and consonants that make up the CNBH database tomwalters@33: VOWELS="a e i o u" tomwalters@33: CONSONANTS="b d f g h k l m n p r s t v w x y z" tomwalters@33: SILENCE="sil" tomwalters@33: tomwalters@33: WORK=${WORK_PREFIX}`echo $1 | tr -d ' '` tomwalters@33: mkdir -p $WORK tomwalters@33: tomwalters@33: # Make a copy of this script in the experimental directory tomwalters@33: cp -p $0 $WORK tomwalters@33: tomwalters@33: tomwalters@33: echo "Generating syllable list..." tomwalters@33: tomwalters@33: for v in $VOWELS; do tomwalters@35: echo $v$v >> $WORK/$SYLLIST.tmp tomwalters@33: for c in $CONSONANTS; do tomwalters@35: echo $v$c >> $WORK/$SYLLIST.tmp tomwalters@34: echo $c$v >> $WORK/$SYLLIST.tmp tomwalters@33: done tomwalters@33: done tomwalters@33: tomwalters@33: # Sort the syllable list and delete the tomwalters@33: # temporary, unsorted version tomwalters@33: sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST tomwalters@33: tomwalters@33: tomwalters@33: # Generate a list of filenames from the spoke pattern tomwalters@33: cat <<"EOF" > $WORK/$TRAIN_SPEAKERS tomwalters@33: 170.9p112.2s100.0t+000itd tomwalters@33: 171.0p112.8s100.0t+000itd tomwalters@33: 171.3p111.7s100.0t+000itd tomwalters@33: 171.5p113.1s100.0t+000itd tomwalters@33: 171.9p111.5s100.0t+000itd tomwalters@33: 172.1p113.0s100.0t+000itd tomwalters@33: 172.4p111.9s100.0t+000itd tomwalters@33: 172.5p112.5s100.0t+000itd tomwalters@33: EOF tomwalters@33: tomwalters@33: cat <<"EOF" > $WORK/$TEST_SPEAKERS tomwalters@33: 137.0p104.3s100.0t+000itd tomwalters@33: 141.3p135.4s100.0t+000itd tomwalters@33: 145.5p106.3s100.0t+000itd tomwalters@33: 148.8p128.8s100.0t+000itd tomwalters@33: 151.6p83.9s100.0t+000itd tomwalters@33: 153.0p108.1s100.0t+000itd tomwalters@33: 155.5p123.5s100.0t+000itd tomwalters@33: 156.7p90.6s100.0t+000itd tomwalters@33: 159.5p109.6s100.0t+000itd tomwalters@33: 161.1p119.4s100.0t+000itd tomwalters@33: 161.1p96.8s100.0t+000itd tomwalters@33: 163.4p157.6s100.0t+000itd tomwalters@33: 164.7p110.8s100.0t+000itd tomwalters@33: 164.9p102.1s100.0t+000itd tomwalters@33: 165.6p144.0s100.0t+000itd tomwalters@33: 165.7p116.2s100.0t+000itd tomwalters@33: 167.4p133.5s100.0t+000itd tomwalters@33: 167.8p106.5s100.0t+000itd tomwalters@33: 168.6p111.6s100.0t+000itd tomwalters@33: 168.9p125.4s100.0t+000itd tomwalters@33: 169.0p114.0s100.0t+000itd tomwalters@33: 170.0p109.7s100.0t+000itd tomwalters@33: 170.1p119.5s100.0t+000itd tomwalters@33: 171.0p115.5s100.0t+000itd tomwalters@33: 171.7p112.3s100.0t+000itd tomwalters@33: 172.4p109.3s100.0t+000itd tomwalters@33: 173.3p105.6s100.0t+000itd tomwalters@33: 173.5p115.0s100.0t+000itd tomwalters@33: 174.5p100.6s100.0t+000itd tomwalters@33: 174.5p110.6s100.0t+000itd tomwalters@33: 174.9p113.0s100.0t+000itd tomwalters@33: 175.7p118.5s100.0t+000itd tomwalters@33: 176.1p94.5s100.0t+000itd tomwalters@33: 178.0p108.5s100.0t+000itd tomwalters@33: 178.1p87.6s100.0t+000itd tomwalters@33: 178.8p123.6s100.0t+000itd tomwalters@33: 179.0p113.9s100.0t+000itd tomwalters@33: 180.4p80.1s100.0t+000itd tomwalters@33: 183.0p105.7s100.0t+000itd tomwalters@33: 183.0p130.4s100.0t+000itd tomwalters@33: 184.8p115.1s100.0t+000itd tomwalters@33: 188.1p139.2s100.0t+000itd tomwalters@33: 189.6p102.1s100.0t+000itd tomwalters@33: 192.7p116.7s100.0t+000itd tomwalters@33: 194.5p150.4s100.0t+000itd tomwalters@33: 198.1p97.9s100.0t+000itd tomwalters@33: 202.7p118.6s100.0t+000itd tomwalters@33: 208.6p93.2s100.0t+000itd tomwalters@33: 215.2p121.0s100.0t+000itd tomwalters@33: EOF tomwalters@33: tomwalters@33: # Construct the conversion scripts for AIMCopy (or HCopy) and tomwalters@33: # the master label files for the train and test sets tomwalters@33: tomwalters@33: echo "Generating train and test scripts..." tomwalters@33: if [ -a $WORK/$TRAIN_LIST ] tomwalters@33: then tomwalters@33: rm $WORK/$TRAIN_LIST tomwalters@33: fi tomwalters@33: if [ -a $WORK/$TEST_LIST ] tomwalters@33: then tomwalters@33: rm $WORK/$TEST_LIST tomwalters@33: fi tomwalters@33: tomwalters@33: exec 3> $WORK/$TRAIN_LIST tomwalters@33: exec 5> $WORK/$TEST_LIST tomwalters@33: for syllable in $(cat $WORK/$SYLLIST); do tomwalters@33: for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do tomwalters@33: SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav tomwalters@33: DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker} tomwalters@33: echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3 tomwalters@33: done tomwalters@33: for speaker in $(cat $WORK/$TEST_SPEAKERS); do tomwalters@33: SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav tomwalters@33: DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker} tomwalters@33: echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5 tomwalters@33: done tomwalters@33: done tomwalters@33: exec 3>&- tomwalters@33: exec 5>&- tomwalters@33: tomwalters@33: # Make the necessary directories for the computed features tomwalters@33: echo "Making directory structure..." tomwalters@33: mkdir $WORK/$FEATURES_DIR tomwalters@33: for syllable in $(cat $WORK/$SYLLIST); do tomwalters@33: mkdir $WORK/$FEATURES_DIR/$syllable tomwalters@33: done tomwalters@33: tomwalters@33: # Write the AIMCopy config file tomwalters@33: echo "Creating AIMCopy config file..." tomwalters@33: cat <<"EOF" > $WORK/$AIMCOPY_CONFIG tomwalters@33: input.buffersize=480 tomwalters@33: sai.frame_period_ms=10.0 tomwalters@33: slice.normalize=true tomwalters@33: ssi.weight_by_cutoff=false tomwalters@33: ssi.weight_by_scaling=true tomwalters@33: ssi.log_cycles_axis=true tomwalters@33: EOF tomwalters@33: echo "noise.level_db=$1" >> $WORK/$AIMCOPY_CONFIG tomwalters@43: tomwalters@43: echo "Creating HCopy config file..." tomwalters@43: cat <<"EOF" > $WORK/$HCOPY_CONFIG tomwalters@43: # Coding parameters tomwalters@43: SOURCEFORMAT= WAV tomwalters@43: TARGETKIND = MFCC_0_D_A tomwalters@43: TARGETRATE = 100000.0 tomwalters@43: SAVECOMPRESSED = T tomwalters@43: SAVEWITHCRC = T tomwalters@43: WINDOWSIZE = 250000.0 tomwalters@43: USEHAMMING = T tomwalters@43: PREEMCOEF = 0.97 tomwalters@43: NUMCHANS = 200 tomwalters@43: CEPLIFTER = 22 tomwalters@43: NUMCEPS = 12 tomwalters@43: ENORMALISE = F tomwalters@43: EOF tomwalters@33: tomwalters@33: echo "Splitting data files..." tomwalters@33: cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST} tomwalters@33: tomwalters@33: total_cores=$(($MACHINE_CORES)) tomwalters@33: echo -n $total_cores tomwalters@33: echo " cores available" tomwalters@33: total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'` tomwalters@33: echo -n $total_files tomwalters@33: echo " files to process" tomwalters@33: files_per_core=$(($total_files/$total_cores+1)) tomwalters@33: echo -n $files_per_core tomwalters@33: echo " files per core" tomwalters@33: split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list tomwalters@33: splits=( $(ls $WORK/split_list*)) tomwalters@33: element=0 tomwalters@33: echo "Spawning tasks..." tomwalters@33: for ((c=1;c<=$MACHINE_CORES;c+=1)); do tomwalters@33: s=${splits[$element]} tomwalters@43: if [ "$MFCC_FEATURES" ] tomwalters@43: then tomwalters@43: ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $s & tomwalters@43: else tomwalters@43: ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s & tomwalters@43: fi tomwalters@33: let element=element+1 tomwalters@33: done tomwalters@33: tomwalters@33: echo "Waiting for tasks to complete..." tomwalters@33: wait tomwalters@33: echo "Done!"