diff trunk/scripts/gen_features.sh @ 306:42d154648b93

-Modified AIMCopy for slices experiment -Added gen_features script to just generate features for a given SNR
author tomwalters
date Thu, 25 Feb 2010 23:08:08 +0000
parents
children a9e7737cad19
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/scripts/gen_features.sh	Thu Feb 25 23:08:08 2010 +0000
@@ -0,0 +1,195 @@
+#!/bin/bash
+#
+# Run the syllables database 'spider pattern' through AIM-C
+#
+# Copyright 2009-2010 University of Cambridge
+# Author: Thomas Walters <tom@acousticscale.org>
+
+# Source directory for all the sound files
+SOUND_SOURCE="/Users/Tom/Documents/Work/PhD/HTK-AIM/Sounds/"
+
+# Location of the AIMCopy binary if not in the path
+AIMCOPY_PREFIX="../build/darwin-release/"
+
+MACHINE_CORES=2
+
+# Names of various internal files and directories. 
+# Rename here if you don't like them for some reason.
+SYLLIST=syls
+TRAIN_SPEAKERS=train_speakers
+TEST_SPEAKERS=test_speakers
+WORK_PREFIX=snr
+TRAIN_LIST=train.list
+TEST_LIST=test.list
+COMBINED_LIST=combined.list
+FEATURES_DIR=features
+AIMCOPY_CONFIG=aimcopy.cfg
+AIMCOPY_LOG_TRAIN=aimcopy_train.log
+AIMCOPY_LOG_TEST=aimcopy_test.log
+
+
+# The vowels and consonants that make up the CNBH database
+VOWELS="a e i o u"
+CONSONANTS="b d f g h k l m n p r s t v w x y z"
+SILENCE="sil"
+
+WORK=${WORK_PREFIX}`echo $1 | tr -d ' '`
+mkdir -p $WORK
+
+# Make a copy of this script in the experimental directory
+cp -p $0  $WORK
+
+
+echo "Generating syllable list..."
+
+for v in $VOWELS; do
+  echo $v$v >> $WORK/$SYLLIST.tmp
+  for c in $CONSONANTS; do
+    echo $v$c >> $WORK/$SYLLIST.tmp
+  done
+done
+
+# Sort the syllable list and delete the 
+# temporary, unsorted version
+sort $WORK/$SYLLIST.tmp > $WORK/$SYLLIST
+
+
+# Generate a list of filenames from the spoke pattern
+cat <<"EOF" > $WORK/$TRAIN_SPEAKERS
+170.9p112.2s100.0t+000itd
+171.0p112.8s100.0t+000itd	
+171.3p111.7s100.0t+000itd	
+171.5p113.1s100.0t+000itd	
+171.9p111.5s100.0t+000itd	
+172.1p113.0s100.0t+000itd	
+172.4p111.9s100.0t+000itd	
+172.5p112.5s100.0t+000itd
+EOF
+
+cat <<"EOF" > $WORK/$TEST_SPEAKERS
+137.0p104.3s100.0t+000itd	
+141.3p135.4s100.0t+000itd	
+145.5p106.3s100.0t+000itd	
+148.8p128.8s100.0t+000itd	
+151.6p83.9s100.0t+000itd	
+153.0p108.1s100.0t+000itd	
+155.5p123.5s100.0t+000itd	
+156.7p90.6s100.0t+000itd	
+159.5p109.6s100.0t+000itd	
+161.1p119.4s100.0t+000itd	
+161.1p96.8s100.0t+000itd	
+163.4p157.6s100.0t+000itd	
+164.7p110.8s100.0t+000itd	
+164.9p102.1s100.0t+000itd	
+165.6p144.0s100.0t+000itd	
+165.7p116.2s100.0t+000itd	
+167.4p133.5s100.0t+000itd	
+167.8p106.5s100.0t+000itd	
+168.6p111.6s100.0t+000itd	
+168.9p125.4s100.0t+000itd	
+169.0p114.0s100.0t+000itd	
+170.0p109.7s100.0t+000itd	
+170.1p119.5s100.0t+000itd	
+171.0p115.5s100.0t+000itd	
+171.7p112.3s100.0t+000itd	
+172.4p109.3s100.0t+000itd	
+173.3p105.6s100.0t+000itd	
+173.5p115.0s100.0t+000itd	
+174.5p100.6s100.0t+000itd	
+174.5p110.6s100.0t+000itd	
+174.9p113.0s100.0t+000itd	
+175.7p118.5s100.0t+000itd	
+176.1p94.5s100.0t+000itd	
+178.0p108.5s100.0t+000itd	
+178.1p87.6s100.0t+000itd	
+178.8p123.6s100.0t+000itd	
+179.0p113.9s100.0t+000itd	
+180.4p80.1s100.0t+000itd	
+183.0p105.7s100.0t+000itd	
+183.0p130.4s100.0t+000itd	
+184.8p115.1s100.0t+000itd	
+188.1p139.2s100.0t+000itd	
+189.6p102.1s100.0t+000itd	
+192.7p116.7s100.0t+000itd	
+194.5p150.4s100.0t+000itd	
+198.1p97.9s100.0t+000itd	
+202.7p118.6s100.0t+000itd	
+208.6p93.2s100.0t+000itd	
+215.2p121.0s100.0t+000itd
+EOF
+
+# Construct the conversion scripts for AIMCopy (or HCopy) and 
+# the master label files for the train and test sets
+
+echo "Generating train and test scripts..."
+if [ -a $WORK/$TRAIN_LIST ] 
+then
+  rm $WORK/$TRAIN_LIST
+fi
+if [ -a $WORK/$TEST_LIST ]
+then
+  rm $WORK/$TEST_LIST
+fi  
+ 
+exec 3> $WORK/$TRAIN_LIST
+exec 5> $WORK/$TEST_LIST
+for syllable in $(cat $WORK/$SYLLIST); do
+  for speaker in $(cat $WORK/$TRAIN_SPEAKERS); do
+    SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
+    DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
+    echo "$SOURCE_FILENAME  ${DEST_FILENAME}.htk" >&3
+  done
+  for speaker in $(cat $WORK/$TEST_SPEAKERS); do
+    SOURCE_FILENAME=$SOUND_SOURCE/$syllable/${syllable}${speaker}.wav
+    DEST_FILENAME=$WORK/$FEATURES_DIR/$syllable/${syllable}${speaker}
+    echo "$SOURCE_FILENAME  ${DEST_FILENAME}.htk" >&5
+  done
+done
+exec 3>&-
+exec 5>&-
+
+# Make the necessary directories for the computed features
+echo "Making directory structure..."
+mkdir $WORK/$FEATURES_DIR
+for syllable in $(cat $WORK/$SYLLIST); do
+  mkdir $WORK/$FEATURES_DIR/$syllable
+done
+
+# Write the AIMCopy config file
+echo "Creating AIMCopy config file..."
+cat <<"EOF" > $WORK/$AIMCOPY_CONFIG
+input.buffersize=480
+sai.frame_period_ms=10.0
+slice.normalize=true
+ssi.pitch_cutoff=true
+ssi.weight_by_cutoff=false
+ssi.weight_by_scaling=true
+ssi.log_cycles_axis=true
+EOF
+echo "noise.level_db=$1" >> $WORK/$AIMCOPY_CONFIG
+ 
+echo "Splitting data files..."
+cat $WORK/${TRAIN_LIST} $WORK/${TEST_LIST} > $WORK/${COMBINED_LIST}
+
+total_cores=$(($MACHINE_CORES))
+echo -n $total_cores
+echo " cores available"
+total_files=`cat $WORK/${COMBINED_LIST} | wc -l | sed 's/ *//'`
+echo -n $total_files
+echo " files to process"
+files_per_core=$(($total_files/$total_cores+1))
+echo -n $files_per_core
+echo " files per core"
+split -l $files_per_core $WORK/${COMBINED_LIST} $WORK/split_list
+splits=( $(ls $WORK/split_list*))
+element=0
+echo "Spawning tasks..."
+for ((c=1;c<=$MACHINE_CORES;c+=1)); do
+  s=${splits[$element]}
+  ${AIMCOPY_PREFIX}AIMCopy -C $WORK/$AIMCOPY_CONFIG -S $s &
+  let element=element+1
+done
+
+echo "Waiting for tasks to complete..."
+wait
+echo "Done!"