annotate trunk/scripts/gen_features.sh @ 314:f8aa25bd6085

-New experimental system with pre-noised signals.
author tomwalters
date Thu, 04 Mar 2010 11:01:39 +0000
parents 66d5c4d2d90c
children 66a23c0545b6
rev   line source
tomwalters@306 1 #!/bin/bash
tomwalters@306 2 #
tomwalters@306 3 # Run the syllables database 'spider pattern' through AIM-C
tomwalters@306 4 #
tomwalters@306 5 # Copyright 2009-2010 University of Cambridge
tomwalters@306 6 # Author: Thomas Walters <tom@acousticscale.org>
tomwalters@306 7
tomwalters@306 8 # Source directory for all the sound files
tomwalters@312 9 #SOUND_SOURCE="/Users/Tom/Documents/Work/PhD/HTk-AIM/Sounds/"
tomwalters@314 10 #SOUND_SOURCE="/media/sounds-database/cnbh-sounds/"
tomwalters@314 11 SOUND_SOURCE="/mnt/sounds/snr_+0dB/"
tomwalters@306 12
tomwalters@306 13 # Location of the AIMCopy binary if not in the path
tomwalters@312 14 AIMCOPY_PREFIX="../aimc-read-only/build/posix-release/"
tomwalters@306 15
tomwalters@314 16 MACHINE_CORES=8
tomwalters@306 17
tomwalters@306 18 # Names of various internal files and directories.
tomwalters@306 19 # Rename here if you don't like them for some reason.
tomwalters@306 20 SYLLIST=syls
tomwalters@306 21 TRAIN_SPEAKERS=train_speakers
tomwalters@306 22 TEST_SPEAKERS=test_speakers
tomwalters@306 23 WORK_PREFIX=snr
tomwalters@306 24 TRAIN_LIST=train.list
tomwalters@306 25 TEST_LIST=test.list
tomwalters@306 26 COMBINED_LIST=combined.list
tomwalters@306 27 FEATURES_DIR=features
tomwalters@306 28 AIMCOPY_CONFIG=aimcopy.cfg
tomwalters@306 29 AIMCOPY_LOG_TRAIN=aimcopy_train.log
tomwalters@306 30 AIMCOPY_LOG_TEST=aimcopy_test.log
tomwalters@306 31
tomwalters@306 32
tomwalters@306 33 # The vowels and consonants that make up the CNBH database
tomwalters@306 34 VOWELS="a e i o u"
tomwalters@306 35 CONSONANTS="b d f g h k l m n p r s t v w x y z"
tomwalters@306 36 SILENCE="sil"
tomwalters@306 37
tomwalters@306 38 WORK=${WORK_PREFIX}`echo $1 | tr -d ' '`
tomwalters@306 39 mkdir -p $WORK
tomwalters@306 40
tomwalters@306 41 # Make a copy of this script in the experimental directory
tomwalters@306 42 cp -p $0 $WORK
tomwalters@306 43
tomwalters@306 44
tomwalters@306 45 echo "Generating syllable list..."
tomwalters@306 46
tomwalters@306 47 for v in $VOWELS; do
tomwalters@308 48 echo $v$v >> $WORK/$SYLLIST.tmp
tomwalters@306 49 for c in $CONSONANTS; do
tomwalters@308 50 echo $v$c >> $WORK/$SYLLIST.tmp
tomwalters@307 51 echo $c$v >> $WORK/$SYLLIST.tmp
tomwalters@306 52 done
tomwalters@306 53 done
tomwalters@306 54
tomwalters@306 55 # Sort the syllable list and delete the
tomwalters@306 56 # temporary, unsorted version
tomwalters@306 57 sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST
tomwalters@306 58
tomwalters@306 59
tomwalters@306 60 # Generate a list of filenames from the spoke pattern
tomwalters@306 61 cat <<"EOF" > $WORK/$TRAIN_SPEAKERS
tomwalters@306 62 170.9p112.2s100.0t+000itd
tomwalters@306 63 171.0p112.8s100.0t+000itd
tomwalters@306 64 171.3p111.7s100.0t+000itd
tomwalters@306 65 171.5p113.1s100.0t+000itd
tomwalters@306 66 171.9p111.5s100.0t+000itd
tomwalters@306 67 172.1p113.0s100.0t+000itd
tomwalters@306 68 172.4p111.9s100.0t+000itd
tomwalters@306 69 172.5p112.5s100.0t+000itd
tomwalters@306 70 EOF
tomwalters@306 71
tomwalters@306 72 cat <<"EOF" > $WORK/$TEST_SPEAKERS
tomwalters@306 73 137.0p104.3s100.0t+000itd
tomwalters@306 74 141.3p135.4s100.0t+000itd
tomwalters@306 75 145.5p106.3s100.0t+000itd
tomwalters@306 76 148.8p128.8s100.0t+000itd
tomwalters@306 77 151.6p83.9s100.0t+000itd
tomwalters@306 78 153.0p108.1s100.0t+000itd
tomwalters@306 79 155.5p123.5s100.0t+000itd
tomwalters@306 80 156.7p90.6s100.0t+000itd
tomwalters@306 81 159.5p109.6s100.0t+000itd
tomwalters@306 82 161.1p119.4s100.0t+000itd
tomwalters@306 83 161.1p96.8s100.0t+000itd
tomwalters@306 84 163.4p157.6s100.0t+000itd
tomwalters@306 85 164.7p110.8s100.0t+000itd
tomwalters@306 86 164.9p102.1s100.0t+000itd
tomwalters@306 87 165.6p144.0s100.0t+000itd
tomwalters@306 88 165.7p116.2s100.0t+000itd
tomwalters@306 89 167.4p133.5s100.0t+000itd
tomwalters@306 90 167.8p106.5s100.0t+000itd
tomwalters@306 91 168.6p111.6s100.0t+000itd
tomwalters@306 92 168.9p125.4s100.0t+000itd
tomwalters@306 93 169.0p114.0s100.0t+000itd
tomwalters@306 94 170.0p109.7s100.0t+000itd
tomwalters@306 95 170.1p119.5s100.0t+000itd
tomwalters@306 96 171.0p115.5s100.0t+000itd
tomwalters@306 97 171.7p112.3s100.0t+000itd
tomwalters@306 98 172.4p109.3s100.0t+000itd
tomwalters@306 99 173.3p105.6s100.0t+000itd
tomwalters@306 100 173.5p115.0s100.0t+000itd
tomwalters@306 101 174.5p100.6s100.0t+000itd
tomwalters@306 102 174.5p110.6s100.0t+000itd
tomwalters@306 103 174.9p113.0s100.0t+000itd
tomwalters@306 104 175.7p118.5s100.0t+000itd
tomwalters@306 105 176.1p94.5s100.0t+000itd
tomwalters@306 106 178.0p108.5s100.0t+000itd
tomwalters@306 107 178.1p87.6s100.0t+000itd
tomwalters@306 108 178.8p123.6s100.0t+000itd
tomwalters@306 109 179.0p113.9s100.0t+000itd
tomwalters@306 110 180.4p80.1s100.0t+000itd
tomwalters@306 111 183.0p105.7s100.0t+000itd
tomwalters@306 112 183.0p130.4s100.0t+000itd
tomwalters@306 113 184.8p115.1s100.0t+000itd
tomwalters@306 114 188.1p139.2s100.0t+000itd
tomwalters@306 115 189.6p102.1s100.0t+000itd
tomwalters@306 116 192.7p116.7s100.0t+000itd
tomwalters@306 117 194.5p150.4s100.0t+000itd
tomwalters@306 118 198.1p97.9s100.0t+000itd
tomwalters@306 119 202.7p118.6s100.0t+000itd
tomwalters@306 120 208.6p93.2s100.0t+000itd
tomwalters@306 121 215.2p121.0s100.0t+000itd
tomwalters@306 122 EOF
tomwalters@306 123
tomwalters@306 124 # Construct the conversion scripts for AIMCopy (or HCopy) and
tomwalters@306 125 # the master label files for the train and test sets
tomwalters@306 126
tomwalters@306 127 echo "Generating train and test scripts..."
tomwalters@306 128 if [ -a $WORK/$TRAIN_LIST ]
tomwalters@306 129 then
tomwalters@306 130 rm $WORK/$TRAIN_LIST
tomwalters@306 131 fi
tomwalters@306 132 if [ -a $WORK/$TEST_LIST ]
tomwalters@306 133 then
tomwalters@306 134 rm $WORK/$TEST_LIST
tomwalters@306 135 fi
tomwalters@306 136
tomwalters@306 137 exec 3> $WORK/$TRAIN_LIST
tomwalters@306 138 exec 5> $WORK/$TEST_LIST
tomwalters@306 139 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@306 140 for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do
tomwalters@306 141 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@306 142 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@306 143 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&3
tomwalters@306 144 done
tomwalters@306 145 for speaker in $(cat $WORK/$TEST_SPEAKERS); do
tomwalters@306 146 SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
tomwalters@306 147 DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
tomwalters@306 148 echo "$SOURCE_FILENAME ${DEST_FILENAME}.htk" >&5
tomwalters@306 149 done
tomwalters@306 150 done
tomwalters@306 151 exec 3>&-
tomwalters@306 152 exec 5>&-
tomwalters@306 153
tomwalters@306 154 # Make the necessary directories for the computed features
tomwalters@306 155 echo "Making directory structure..."
tomwalters@306 156 mkdir $WORK/$FEATURES_DIR
tomwalters@306 157 for syllable in $(cat $WORK/$SYLLIST); do
tomwalters@306 158 mkdir $WORK/$FEATURES_DIR/$syllable
tomwalters@306 159 done
tomwalters@306 160
tomwalters@306 161 # Write the AIMCopy config file
tomwalters@306 162 echo "Creating AIMCopy config file..."
tomwalters@306 163 cat <<"EOF" > $WORK/$AIMCOPY_CONFIG
tomwalters@306 164 input.buffersize=480
tomwalters@306 165 sai.frame_period_ms=10.0
tomwalters@306 166 slice.normalize=true
tomwalters@306 167 ssi.weight_by_cutoff=false
tomwalters@306 168 ssi.weight_by_scaling=true
tomwalters@306 169 ssi.log_cycles_axis=true
tomwalters@306 170 EOF
tomwalters@306 171 echo "noise.level_db=$1" >> $WORK/$AIMCOPY_CONFIG
tomwalters@306 172
tomwalters@306 173 echo "Splitting data files..."
tomwalters@306 174 cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
tomwalters@306 175
tomwalters@306 176 total_cores=$(($MACHINE_CORES))
tomwalters@306 177 echo -n $total_cores
tomwalters@306 178 echo " cores available"
tomwalters@306 179 total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'`
tomwalters@306 180 echo -n $total_files
tomwalters@306 181 echo " files to process"
tomwalters@306 182 files_per_core=$(($total_files/$total_cores+1))
tomwalters@306 183 echo -n $files_per_core
tomwalters@306 184 echo " files per core"
tomwalters@306 185 split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list
tomwalters@306 186 splits=( $(ls $WORK/split_list*))
tomwalters@306 187 element=0
tomwalters@306 188 echo "Spawning tasks..."
tomwalters@306 189 for ((c=1;c<=$MACHINE_CORES;c+=1)); do
tomwalters@306 190 s=${splits[$element]}
tomwalters@306 191 ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s &
tomwalters@306 192 let element=element+1
tomwalters@306 193 done
tomwalters@306 194
tomwalters@306 195 echo "Waiting for tasks to complete..."
tomwalters@306 196 wait
tomwalters@306 197 echo "Done!"