changeset 365:2f4530363f7a

- Added as-yet-unfinished support for a proper configuraiton file format - Added a couple of pythin scripts to generate HMM configuration files - Variable name changes and other cosmetic things - Added the option for the noise generation to do pink noise (untested)
author tomwalters
date Thu, 12 Aug 2010 11:28:11 +0000
parents 5a12172dcb73
children 43bada33b705
files trunk/experiments/scripts/cnbh-syllables/feature_generation/nap_profile_features.aimcopycfg trunk/experiments/scripts/cnbh-syllables/run_training_and_testing/gen_hhed_script.py trunk/experiments/scripts/cnbh-syllables/run_training_and_testing/gen_hmmproto.py trunk/src/Modules/Features/ModuleGaussians.cc trunk/src/Modules/Features/ModuleGaussians.h trunk/src/Modules/SNR/ModuleNoise.cc trunk/src/Modules/SNR/ModuleNoise.h trunk/src/Support/ModuleTree.cc trunk/src/Support/ModuleTree.h
diffstat 9 files changed, 333 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/experiments/scripts/cnbh-syllables/feature_generation/nap_profile_features.aimcopycfg	Thu Aug 12 11:28:11 2010 +0000
@@ -0,0 +1,32 @@
+module1.name = FileInput
+module1.id = file_input
+module1.parameters = <<<ENDPARAMS
+input.buffersize = 512
+ENDPARAMS
+module1.child1 = GammatoneFilterbank
+
+module2.name = GammatoneFilterbank
+module2.id = gt
+module2.child1 = StandardNAP
+module2.child2 = SmoothNAP
+
+module3.name = SmoothNAP
+module3.id = nap
+module3.parameters = <<<ENDPARAMS
+nap.lowpass_cutoff = 100.0
+ENDPARAMS
+module3.child1 = SmoothNAPProfile
+
+module4.name = SmoothNAPProfile
+module4.id = slice
+module4.parameters = <<<ENDPARAMS
+slice.all = true
+ENDPARAMS
+module4.child1 = SmoothNAPFeatures
+
+module5.name = SmoothNAPFeatures
+module5.id = gaussians
+module5.child1 = SmoothNAPOutput
+
+moudule6.name = SmoothNAPOutput
+module6.id = htk_out
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/experiments/scripts/cnbh-syllables/run_training_and_testing/gen_hhed_script.py	Thu Aug 12 11:28:11 2010 +0000
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+gen_hhed_script.py
+
+Created by Thomas Walters on 2010-07-08.
+"""
+
+import sys
+import getopt
+
+
+help_message = '''
+Generate an HTK HHed script to change the number of means in the output
+distribution to num_means for the emitting states of an HMM with
+total_hmm_states states
+'''
+
+
+class Usage(Exception):
+  def __init__(self, msg):
+    self.msg = msg
+
+
+def main(argv=None):
+  if argv is None:
+    argv = sys.argv
+  try:
+    try:
+      opts, args = getopt.getopt(argv[1:], "hn:s:v", ["help", "num_means=", "total_hmm_states="])
+    except getopt.error, msg:
+      raise Usage(msg)
+  
+    # defaults
+    num_means = 3
+    total_hmm_states = 6
+    
+    # option processing
+    for option, value in opts:
+      if option == "-v":
+        verbose = True
+      if option in ("-h", "--help"):
+        raise Usage(help_message)
+      if option in ("-n", "--num_means"):
+        num_means = int(value)
+      if option in ("-s", "--total_hmm_states"):
+        total_hmm_states = int(value)
+  
+  except Usage, err:
+    print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
+    print >> sys.stderr, "\t for help use --help"
+    return 2
+
+  out_string = ""
+  for state in xrange(2, total_hmm_states):
+    out_string += ("MU " + str(num_means) + " {*.state[" + str(state) + "].mix} ")
+  print out_string
+
+if __name__ == "__main__":
+  sys.exit(main())
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/experiments/scripts/cnbh-syllables/run_training_and_testing/gen_hmmproto.py	Thu Aug 12 11:28:11 2010 +0000
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+gen_hmmproto.py
+
+Created by Thomas Walters on 2010-07-08.
+"""
+
+import sys
+import getopt
+
+
+help_message = '''
+Generate an HTK HMM prototype with an input_size dimensional input and
+total_hmm_states total HMM states (including start and end state)
+The feature type string can be specified in feature_type
+'''
+
+
+class Usage(Exception):
+  def __init__(self, msg):
+    self.msg = msg
+
+
+def main(argv=None):
+  if argv is None:
+    argv = sys.argv
+  try:
+    try:
+      opts, args = getopt.getopt(argv[1:], "hi:s:t:v", ["help", "input_size=", "total_hmm_states=", "feature_type="])
+    except getopt.error, msg:
+      raise Usage(msg)
+  
+    # defaults
+    input_size = 39
+    total_hmm_states = 6
+    feature_type = "MFCC_0_D_A"
+    
+    # option processing
+    for option, value in opts:
+      if option == "-v":
+        verbose = True
+      if option in ("-h", "--help"):
+        raise Usage(help_message)
+      if option in ("-i", "--input_size"):
+        input_size = int(value)
+      if option in ("-s", "--total_hmm_states"):
+        total_hmm_states = int(value)
+      if option in ("-t", "--feature_type"):
+        feature_type = value
+  
+  except Usage, err:
+    print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
+    print >> sys.stderr, "\t for help use --help"
+    return 2
+    
+  print "~o<VECSIZE> " + str(input_size) + "<NULLD>" + "<" + feature_type + ">"
+  print "~h \"proto\""
+  print "<BEGINHMM>"
+  print "<NUMSTATES> " + str(total_hmm_states)
+  for state in xrange(2, total_hmm_states):
+    print "<State> " + str(state)
+    print "<Mean>" + str(input_size)
+    print "0 " * input_size
+    print "<Variance> " + str(input_size)
+    print "1.0 " * input_size
+  print
+  print "<TransP> " + str(total_hmm_states)
+  print "0.0 1.0 " + "0.0 " * (total_hmm_states - 2)
+  for state in xrange(1, total_hmm_states - 1):
+    print ("0.0 " * state) + "0.6 0.4 " + "0.0 " * (total_hmm_states -2 - state)
+  print "0.0 " * total_hmm_states
+  print "<EndHMM>"
+
+if __name__ == "__main__":
+  sys.exit(main())
--- a/trunk/src/Modules/Features/ModuleGaussians.cc	Thu Aug 12 09:15:49 2010 +0000
+++ b/trunk/src/Modules/Features/ModuleGaussians.cc	Thu Aug 12 11:28:11 2010 +0000
@@ -38,17 +38,17 @@
   module_type_ = "features";
   module_version_ = "$Id$";
 
-  m_iParamNComp = parameters_->DefaultInt("features.gaussians.ncomp", 4);
-  m_fParamVar = parameters_->DefaultFloat("features.gaussians.var", 115.0);
-  m_fParamPosteriorExp =
-    parameters_->DefaultFloat("features.gaussians.posterior_exp", 6.0);
-  m_iParamMaxIt = parameters_->DefaultInt("features.gaussians.maxit", 250);
+  m_iParamNComp = parameters_->DefaultInt("gaussians.ncomp", 4);
+  m_fParamVar = parameters_->DefaultFloat("gaussians.var", 115.0);
+  m_fParamPosteriorExp = parameters_->DefaultFloat("gaussians.posterior_exp",
+                                                   6.0);
+  m_iParamMaxIt = parameters_->DefaultInt("gaussians.maxit", 250);
 
   // The parameters system doesn't support tiny numbers well, to define this
   // variable as a string, then convert it to a float afterwards
-  parameters_->DefaultString("features.gaussians.priors_converged", "1e-7");
-  m_fParamPriorsConverged =
-    parameters_->GetFloat("features.gaussians.priors_converged");
+  parameters_->DefaultString("gaussians.priors_converged", "1e-7");
+  priors_converged_ = parameters_->GetFloat("gaussians.priors_converged");
+  output_positions_ = parameters_->DefaultBool("gaussians.positions", false);
 }
 
 ModuleGaussians::~ModuleGaussians() {
@@ -60,8 +60,9 @@
 
   // Assuming the number of channels is greater than twice the number of
   // Gaussian components, this is ok
+  output_component_count_ = 1; // Energy component
   if (input.channel_count() >= 2 * m_iParamNComp) {
-    output_.Initialize(m_iParamNComp, 1, input.sample_rate());
+    output_component_count_ += (m_iParamNComp - 1);
   } else {
     LOG_ERROR(_T("Too few channels in filterbank to produce sensible "
                  "Gaussian features. Either increase the number of filterbank"
@@ -69,6 +70,12 @@
     return false;
   }
 
+  if (output_positions_) {
+    output_component_count_ += m_iParamNComp;
+  }
+
+  output_.Initialize(output_component_count_, 1, input.sample_rate());
+
   m_iNumChannels = input.channel_count();
   m_pSpectralProfile.resize(m_iNumChannels, 0.0f);
 
@@ -90,16 +97,12 @@
     return;
   }
   // Calculate spectral profile
-  for (int iChannel = 0;
-       iChannel < input.channel_count();
-       ++iChannel) {
-    m_pSpectralProfile[iChannel] = 0.0f;
-    for (int iSample = 0;
-         iSample < input.buffer_length();
-         ++iSample) {
-      m_pSpectralProfile[iChannel] += input[iChannel][iSample];
+  for (int ch = 0; ch < input.channel_count(); ++ch) {
+    m_pSpectralProfile[ch] = 0.0f;
+    for (int i = 0; i < input.buffer_length(); ++i) {
+      m_pSpectralProfile[ch] += input[ch][i];
     }
-    m_pSpectralProfile[iChannel] /= static_cast<float>(input.buffer_length());
+    m_pSpectralProfile[ch] /= static_cast<float>(input.buffer_length());
   }
 
   float spectral_profile_sum = 0.0f;
@@ -107,36 +110,35 @@
     spectral_profile_sum += m_pSpectralProfile[i];
   }
 
+  // Set the last component of the feature vector to be the log energy
   float logsum = log(spectral_profile_sum);
   if (!isinf(logsum)) {
-    output_.set_sample(m_iParamNComp - 1, 0, logsum);
+    output_.set_sample(output_component_count_ - 1, 0, logsum);
   } else {
-    output_.set_sample(m_iParamNComp - 1, 0, -1000.0);
+    output_.set_sample(output_component_count_ - 1, 0, -1000.0);
   }
 
-  for (int iChannel = 0;
-       iChannel < input.channel_count();
-       ++iChannel) {
-    m_pSpectralProfile[iChannel] = pow(m_pSpectralProfile[iChannel], 0.8f);
+  for (int ch = 0; ch < input.channel_count(); ++ch) {
+    m_pSpectralProfile[ch] = pow(m_pSpectralProfile[ch], 0.8);
   }
 
   RubberGMMCore(2, true);
 
-  float fMean1 = m_pMu[0];
-  float fMean2 = m_pMu[1];
+  float mean1 = m_pMu[0];
+  float mean2 = m_pMu[1];
   // LOG_INFO(_T("Orig. mean 0 = %f"), m_pMu[0]);
   // LOG_INFO(_T("Orig. mean 1 = %f"), m_pMu[1]);
   // LOG_INFO(_T("Orig. prob 0 = %f"), m_pA[0]);
   // LOG_INFO(_T("Orig. prob 1 = %f"), m_pA[1]);
 
-  float fA1 = 0.05 * m_pA[0];
-  float fA2 = 1.0 - 0.25 * m_pA[1];
+  float a1 = 0.05 * m_pA[0];
+  float a2 = 1.0 - 0.25 * m_pA[1];
 
   // LOG_INFO(_T("fA1 = %f"), fA1);
   // LOG_INFO(_T("fA2 = %f"), fA2);
 
-  float fGradient = (fMean2 - fMean1) / (fA2 - fA1);
-  float fIntercept = fMean2 - fGradient * fA2;
+  float gradient = (mean2 - mean1) / (a2 - a1);
+  float intercept = mean2 - gradient * a2;
 
   // LOG_INFO(_T("fGradient = %f"), fGradient);
   // LOG_INFO(_T("fIntercept = %f"), fIntercept);
@@ -144,7 +146,7 @@
   for (int i = 0; i < m_iParamNComp; ++i) {
     m_pMu[i] = (static_cast<float>(i)
                 / (static_cast<float>(m_iParamNComp) - 1.0f))
-                * fGradient + fIntercept;
+                * gradient + intercept;
                 // LOG_INFO(_T("mean %d = %f"), i, m_pMu[i]);
   }
 
@@ -154,6 +156,7 @@
 
   RubberGMMCore(m_iParamNComp, false);
 
+  // Amplitudes first
   for (int i = 0; i < m_iParamNComp - 1; ++i) {
     if (!isnan(m_pA[i])) {
       output_.set_sample(i, 0, m_pA[i]);
@@ -162,6 +165,19 @@
     }
   }
 
+  // Then means if required
+  if (output_positions_) {
+    int idx = 0;
+    for (int i = m_iParamNComp - 1; i < 2 * m_iParamNComp - 1; ++i) {
+      if (!isnan(m_pMu[i])) {
+        output_.set_sample(i, 0, m_pMu[idx]);
+      } else {
+        output_.set_sample(i, 0, 0.0f);
+      }
+      ++idx;
+    }
+  }
+
   PushOutput();
 }
 
@@ -169,12 +185,12 @@
   int iSizeX = m_iNumChannels;
 
   // Normalise the spectral profile
-  float fSpectralProfileTotal = 0.0f;
+  float SpectralProfileTotal = 0.0f;
   for (int iCount = 0; iCount < iSizeX; iCount++) {
-    fSpectralProfileTotal += m_pSpectralProfile[iCount];
+    SpectralProfileTotal += m_pSpectralProfile[iCount];
   }
   for (int iCount = 0; iCount < iSizeX; iCount++) {
-    m_pSpectralProfile[iCount] /= fSpectralProfileTotal;
+    m_pSpectralProfile[iCount] /= SpectralProfileTotal;
   }
 
   if (bDoInit) {
@@ -200,12 +216,12 @@
       pP_mod_X[i] = 0.0f;
     }
 
-    for (int i = 0; i < iNComponents; i++) {
+    for (int c = 0; c < iNComponents; c++) {
       for (int iCount = 0; iCount < iSizeX; iCount++) {
         pP_mod_X[iCount] += 1.0f / sqrt(2.0f * M_PI * m_fParamVar)
                             * exp((-0.5f)
-                            * pow(static_cast<float>(iCount+1) - m_pMu[i], 2)
-                            / m_fParamVar) * m_pA[i];
+                            * pow(static_cast<float>(iCount+1) - m_pMu[c], 2)
+                            / m_fParamVar) * m_pA[c];
       }
     }
 
@@ -251,7 +267,7 @@
     }
     fPrdist /= iNComponents;
 
-    if (fPrdist < m_fParamPriorsConverged) {
+    if (fPrdist < priors_converged_) {
       // LOG_INFO("Converged!");
       break;
     }
--- a/trunk/src/Modules/Features/ModuleGaussians.h	Thu Aug 12 09:15:49 2010 +0000
+++ b/trunk/src/Modules/Features/ModuleGaussians.h	Thu Aug 12 11:28:11 2010 +0000
@@ -75,7 +75,15 @@
 
   /*! \brief convergence criterion
    */
-  float m_fParamPriorsConverged;
+  float priors_converged_;
+
+  /*! \brief Output component positions as well as amplitudes
+   */
+  bool output_positions_;
+
+  /*! \brief Total number of values in the output
+   */
+  int output_component_count_;
 
   /*! \brief The amplitudes of the components (priors)
    */
--- a/trunk/src/Modules/SNR/ModuleNoise.cc	Thu Aug 12 09:15:49 2010 +0000
+++ b/trunk/src/Modules/SNR/ModuleNoise.cc	Thu Aug 12 11:28:11 2010 +0000
@@ -37,6 +37,7 @@
   module_type_ = "snr";
   module_version_ = "$Id$";
 
+  pink_ = parameters_->DefaultBool("noise.pink", true);
   // Noise level relative to unit-variance Gaussian noise (ie. 0dB will give a
   // noise with an RMS level of 1.0)
   float snr_db = parameters_->DefaultFloat("noise.level_db", 0.0f);
@@ -54,11 +55,14 @@
   channel_count_ = input.channel_count();
 
   output_.Initialize(input);
+  ResetInternal();
   return true;
 }
 
 void ModuleNoise::ResetInternal() {
-
+  s0_ = 0.0f;
+  s1_ = 0.0f;
+  s2_ = 0.0f;
 }
 
 void ModuleNoise::Process(const SignalBank &input) {
@@ -81,7 +85,20 @@
   for (int c = 0; c < input.channel_count(); ++c) {
     for (int i = 0; i < input.buffer_length(); ++i) {
       float s = input[c][i];
-      s += (multiplier_ * gaussian_variate_());
+      float n =  gaussian_variate_();
+      if (pink_) {
+        // Pink noise filter coefficients from 
+        // ccrma.stanford.edu/~jos/sasp/Example_Synthesis_1_F_Noise.html
+        // Smith, Julius O. Spectral Audio Signal Processing, October 2008
+        // Draft, http://ccrma.stanford.edu/~jos/sasp/, online book, 
+        // accessed 2010-02-27.
+        float f = 0.049922035 * n + s0_;
+        s0_ = -0.095993537 * n - (-2.494956002 * f) + s1_;
+        s1_ = 0.050612699 * n - (2.017265875 * f) + s2_;
+        s2_ = -0.004408786 * n - (-0.522189400 * f);
+        n = f;
+      }
+      s += multiplier_ * n;
       output_.set_sample(c, i, s);
     }
   }
--- a/trunk/src/Modules/SNR/ModuleNoise.h	Thu Aug 12 09:15:49 2010 +0000
+++ b/trunk/src/Modules/SNR/ModuleNoise.h	Thu Aug 12 11:28:11 2010 +0000
@@ -53,10 +53,18 @@
   int buffer_length_;
   int channel_count_;
 
+  // True to generate pink noise, otherwise white noise
+  bool pink_;
+
+  // Filter state variables
+  float s0_;
+  float s1_;
+  float s2_;
+
   float multiplier_;
 
-  // Random number generator which yeilds Gaussian-distributed values by
-  // The generator is a Mersenne twister
+  // Mersenne twister random number generator fed into a transform which 
+  // yeilds Gaussian-distributed values
   boost::variate_generator<boost::mt19937,
                            boost::normal_distribution<float> >
                            gaussian_variate_;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/src/Support/ModuleTree.cc	Thu Aug 12 11:28:11 2010 +0000
@@ -0,0 +1,30 @@
+// Copyright 2010, Thomas Walters
+//
+// AIM-C: A C++ implementation of the Auditory Image Model
+// http://www.acousticscale.org/AIMC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file
+ *  \brief Parse a configuration file to generate a tree of modules.
+ */
+
+/*! \author: Thomas Walters <tom@acousticscale.org>
+ *  \date 2010/08/08
+ *  \version \$Id: $
+ */
+
+namespace aimc {
+  ParseConfigFile(string )
+  
+}  // namespace aimc
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trunk/src/Support/ModuleTree.h	Thu Aug 12 11:28:11 2010 +0000
@@ -0,0 +1,43 @@
+// Copyright 2010, Thomas Walters
+//
+// AIM-C: A C++ implementation of the Auditory Image Model
+// http://www.acousticscale.org/AIMC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file
+ *  \brief Parse a configuration file to generate a tree of modules.
+ */
+
+/*! \author: Thomas Walters <tom@acousticscale.org>
+ *  \date 2010/08/08
+ *  \version \$Id: $
+ */
+
+#include <string>
+
+namespace aimc {
+using std::string;
+class ModuleTree {
+ public:
+  bool ParseConfigFile(const string &filename);
+  bool ParseConfigText(const string &config_text);
+  void set_output_filename_prefix(const string &prefix);
+  string output_filename_prefix() {
+    return output_filename_prefix_;
+  }
+ private:
+  string output_filename_prefix_;
+  DISALLOW_COPY_AND_ASSIGN(ModuleTree);
+};
+}  // namespace aimc
\ No newline at end of file