tomwalters@306: #!/bin/bash tomwalters@306: # tomwalters@306: # Run the syllables database 'spider pattern' through AIM-C tomwalters@306: # tomwalters@306: # Copyright 2009-2010 University of Cambridge tomwalters@306: # Author: Thomas Walters tomwalters@306: tomwalters@306: # Source directory for all the sound files tomwalters@312: #SOUND_SOURCE="/Users/Tom/Documents/Work/PhD/HTk-AIM/Sounds/" tomwalters@314: #SOUND_SOURCE="/media/sounds-database/cnbh-sounds/" tomwalters@314: SOUND_SOURCE="/mnt/sounds/snr_+0dB/" tomwalters@306: tomwalters@306: # Location of the AIMCopy binary if not in the path tomwalters@312: AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/" tomwalters@306: tomwalters@314: MACHINE_CORES=8 tomwalters@306: tomwalters@316: # Set to true / 1 to enable MFCC features rather than AIM features tomwalters@316: # (leave blank for AIM features) tomwalters@316: MFCC_FEATURES= tomwalters@316: tomwalters@306: # Names of various internal files and directories. tomwalters@306: # Rename here if you don't like them for some reason. tomwalters@306: SYLLIST=syls tomwalters@306: TRAIN_SPEAKERS=train_speakers tomwalters@306: TEST_SPEAKERS=test_speakers tomwalters@306: WORK_PREFIX=snr tomwalters@306: TRAIN_LIST=train.list tomwalters@306: TEST_LIST=test.list tomwalters@306: COMBINED_LIST=combined.list tomwalters@306: FEATURES_DIR=features tomwalters@306: AIMCOPY_CONFIG=aimcopy.cfg tomwalters@316: HCOPY_CONFIG=hcopy.cfg tomwalters@306: AIMCOPY_LOG_TRAIN=aimcopy_train.log tomwalters@306: AIMCOPY_LOG_TEST=aimcopy_test.log tomwalters@316: HTK_PREFIX="" tomwalters@306: tomwalters@306: tomwalters@306: # The vowels and consonants that make up the CNBH database tomwalters@306: VOWELS="a e i o u" tomwalters@306: CONSONANTS="b d f g h k l m n p r s t v w x y z" tomwalters@306: SILENCE="sil" tomwalters@306: tomwalters@306: WORK=${WORK_PREFIX}`echo $1 | tr -d ' '` tomwalters@306: mkdir -p $WORK tomwalters@306: tomwalters@306: # Make a copy of this script in the experimental directory tomwalters@306: cp -p $0 $WORK tomwalters@306: tomwalters@306: tomwalters@306: echo "Generating syllable list..." tomwalters@306: tomwalters@306: for v in $VOWELS; do tomwalters@308: echo $v$v >> $WORK/$SYLLIST.tmp tomwalters@306: for c in $CONSONANTS; do tomwalters@308: echo $v$c >> $WORK/$SYLLIST.tmp tomwalters@307: echo $c$v >> $WORK/$SYLLIST.tmp tomwalters@306: done tomwalters@306: done tomwalters@306: tomwalters@306: # Sort the syllable list and delete the tomwalters@306: # temporary, unsorted version tomwalters@306: sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST tomwalters@306: tomwalters@306: tomwalters@306: # Generate a list of filenames from the spoke pattern tomwalters@306: cat <<"EOF" > $WORK/$TRAIN_SPEAKERS tomwalters@306: 170.9p112.2s100.0t+000itd tomwalters@306: 171.0p112.8s100.0t+000itd tomwalters@306: 171.3p111.7s100.0t+000itd tomwalters@306: 171.5p113.1s100.0t+000itd tomwalters@306: 171.9p111.5s100.0t+000itd tomwalters@306: 172.1p113.0s100.0t+000itd tomwalters@306: 172.4p111.9s100.0t+000itd tomwalters@306: 172.5p112.5s100.0t+000itd tomwalters@306: EOF tomwalters@306: tomwalters@306: cat <<"EOF" > $WORK/$TEST_SPEAKERS tomwalters@306: 137.0p104.3s100.0t+000itd tomwalters@306: 141.3p135.4s100.0t+000itd tomwalters@306: 145.5p106.3s100.0t+000itd tomwalters@306: 148.8p128.8s100.0t+000itd tomwalters@306: 151.6p83.9s100.0t+000itd tomwalters@306: 153.0p108.1s100.0t+000itd tomwalters@306: 155.5p123.5s100.0t+000itd tomwalters@306: 156.7p90.6s100.0t+000itd tomwalters@306: 159.5p109.6s100.0t+000itd tomwalters@306: 161.1p119.4s100.0t+000itd tomwalters@306: 161.1p96.8s100.0t+000itd tomwalters@306: 163.4p157.6s100.0t+000itd tomwalters@306: 164.7p110.8s100.0t+000itd tomwalters@306: 164.9p102.1s100.0t+000itd tomwalters@306: 165.6p144.0s100.0t+000itd tomwalters@306: 165.7p116.2s100.0t+000itd tomwalters@306: 167.4p133.5s100.0t+000itd tomwalters@306: 167.8p106.5s100.0t+000itd tomwalters@306: 168.6p111.6s100.0t+000itd tomwalters@306: 168.9p125.4s100.0t+000itd tomwalters@306: 169.0p114.0s100.0t+000itd tomwalters@306: 170.0p109.7s100.0t+000itd tomwalters@306: 170.1p119.5s100.0t+000itd tomwalters@306: 171.0p115.5s100.0t+000itd tomwalters@306: 171.7p112.3s100.0t+000itd tomwalters@306: 172.4p109.3s100.0t+000itd tomwalters@306: 173.3p105.6s100.0t+000itd tomwalters@306: 173.5p115.0s100.0t+000itd tomwalters@306: 174.5p100.6s100.0t+000itd tomwalters@306: 174.5p110.6s100.0t+000itd tomwalters@306: 174.9p113.0s100.0t+000itd tomwalters@306: 175.7p118.5s100.0t+000itd tomwalters@306: 176.1p94.5s100.0t+000itd tomwalters@306: 178.0p108.5s100.0t+000itd tomwalters@306: 178.1p87.6s100.0t+000itd tomwalters@306: 178.8p123.6s100.0t+000itd tomwalters@306: 179.0p113.9s100.0t+000itd tomwalters@306: 180.4p80.1s100.0t+000itd tomwalters@306: 183.0p105.7s100.0t+000itd tomwalters@306: 183.0p130.4s100.0t+000itd tomwalters@306: 184.8p115.1s100.0t+000itd tomwalters@306: 188.1p139.2s100.0t+000itd tomwalters@306: 189.6p102.1s100.0t+000itd tomwalters@306: 192.7p116.7s100.0t+000itd tomwalters@306: 194.5p150.4s100.0t+000itd tomwalters@306: 198.1p97.9s100.0t+000itd tomwalters@306: 202.7p118.6s100.0t+000itd tomwalters@306: 208.6p93.2s100.0t+000itd tomwalters@306: 215.2p121.0s100.0t+000itd tomwalters@306: EOF tomwalters@306: tomwalters@306: # Construct the conversion scripts for AIMCopy (or HCopy) and tomwalters@306: # the master label files for the train and test sets tomwalters@306: tomwalters@306: echo "Generating train and test scripts..." tomwalters@306: if [ -a $WORK/$TRAIN_LIST ] tomwalters@306: then tomwalters@306: rm $WORK/$TRAIN_LIST tomwalters@306: fi tomwalters@306: if [ -a $WORK/$TEST_LIST ] tomwalters@306: then tomwalters@306: rm $WORK/$TEST_LIST tomwalters@306: fi tomwalters@306: tomwalters@306: exec 3> $WORK/$TRAIN_LIST tomwalters@306: exec 5> $WORK/$TEST_LIST tomwalters@306: for syllable in $(cat $WORK/$SYLLIST); do tomwalters@306: for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do tomwalters@306: SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav tomwalters@306: DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker} tomwalters@306: echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3 tomwalters@306: done tomwalters@306: for speaker in $(cat $WORK/$TEST_SPEAKERS); do tomwalters@306: SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav tomwalters@306: DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker} tomwalters@306: echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5 tomwalters@306: done tomwalters@306: done tomwalters@306: exec 3>&- tomwalters@306: exec 5>&- tomwalters@306: tomwalters@306: # Make the necessary directories for the computed features tomwalters@306: echo "Making directory structure..." tomwalters@306: mkdir $WORK/$FEATURES_DIR tomwalters@306: for syllable in $(cat $WORK/$SYLLIST); do tomwalters@306: mkdir $WORK/$FEATURES_DIR/$syllable tomwalters@306: done tomwalters@306: tomwalters@306: # Write the AIMCopy config file tomwalters@306: echo "Creating AIMCopy config file..." tomwalters@306: cat <<"EOF" > $WORK/$AIMCOPY_CONFIG tomwalters@306: input.buffersize=480 tomwalters@306: sai.frame_period_ms=10.0 tomwalters@306: slice.normalize=true tomwalters@306: ssi.weight_by_cutoff=false tomwalters@306: ssi.weight_by_scaling=true tomwalters@306: ssi.log_cycles_axis=true tomwalters@306: EOF tomwalters@306: echo "noise.level_db=$1" >> $WORK/$AIMCOPY_CONFIG tomwalters@316: tomwalters@316: echo "Creating HCopy config file..." tomwalters@316: cat <<"EOF" > $WORK/$HCOPY_CONFIG tomwalters@316: # Coding parameters tomwalters@316: SOURCEFORMAT= WAV tomwalters@316: TARGETKIND = MFCC_0_D_A tomwalters@316: TARGETRATE = 100000.0 tomwalters@316: SAVECOMPRESSED = T tomwalters@316: SAVEWITHCRC = T tomwalters@316: WINDOWSIZE = 250000.0 tomwalters@316: USEHAMMING = T tomwalters@316: PREEMCOEF = 0.97 tomwalters@316: NUMCHANS = 200 tomwalters@316: CEPLIFTER = 22 tomwalters@316: NUMCEPS = 12 tomwalters@316: ENORMALISE = F tomwalters@316: EOF tomwalters@306: tomwalters@306: echo "Splitting data files..." tomwalters@306: cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST} tomwalters@306: tomwalters@306: total_cores=$(($MACHINE_CORES)) tomwalters@306: echo -n $total_cores tomwalters@306: echo " cores available" tomwalters@306: total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'` tomwalters@306: echo -n $total_files tomwalters@306: echo " files to process" tomwalters@306: files_per_core=$(($total_files/$total_cores+1)) tomwalters@306: echo -n $files_per_core tomwalters@306: echo " files per core" tomwalters@306: split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list tomwalters@306: splits=( $(ls $WORK/split_list*)) tomwalters@306: element=0 tomwalters@306: echo "Spawning tasks..." tomwalters@306: for ((c=1;c<=$MACHINE_CORES;c+=1)); do tomwalters@306: s=${splits[$element]} tomwalters@316: if [ "$MFCC_FEATURES" ] tomwalters@316: then tomwalters@316: ${HTK_PREFIX}HCopy -T 1 -C $WORK/$HCOPY_CONFIG -S $s & tomwalters@316: else tomwalters@316: ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s & tomwalters@316: fi tomwalters@306: let element=element+1 tomwalters@306: done tomwalters@306: tomwalters@306: echo "Waiting for tasks to complete..." tomwalters@306: wait tomwalters@306: echo "Done!"