changeset 16:2a5354042241

-Updated the Slaney IIR gammatone to use a cascase of four second-order filters as per the implementtion in Slaney's auditory toolbox. This is more numerically stable at high sample rates and low centre frequencies.
author tomwalters
date Sat, 20 Feb 2010 17:56:40 +0000
parents b4cafba48e9d
children f4e712d41321
files src/Main/aimc.cc src/Modules/BMM/ModuleGammatone.cc src/Modules/BMM/ModuleGammatone.h src/Modules/BMM/ModuleGammatone_test.py swig/setup.py
diffstat 5 files changed, 184 insertions(+), 109 deletions(-) [+]
line wrap: on
line diff
--- a/src/Main/aimc.cc	Fri Feb 19 15:19:27 2010 +0000
+++ b/src/Main/aimc.cc	Sat Feb 20 17:56:40 2010 +0000
@@ -26,21 +26,21 @@
 #include "Modules/NAP/ModuleHCL.h"
 #include "Modules/Strobes/ModuleParabola.h"
 #include "Modules/SAI/ModuleSAI.h"
-// #include "Modules/SSI/ModuleSSI.h"
-// #include "Modules/Profile/ModuleProfile.h"
+#include "Modules/SSI/ModuleSSI.h"
+#include "Modules/Profile/ModuleSlice.h"
 #include "Modules/Features/ModuleGaussians.h"
 #include "Modules/Output/FileOutputHTK.h"
 
 int main(int argc, char* argv[]) {
   aimc::Parameters params;
   aimc::ModuleFileInput input(&params);
-  // aimc::ModuleGammatone bmm(&params);
-  aimc::ModulePZFC bmm(&params);
+  aimc::ModuleGammatone bmm(&params);
+  //aimc::ModulePZFC bmm(&params);
   aimc::ModuleHCL nap(&params);
   aimc::ModuleParabola strobes(&params);
   aimc::ModuleSAI sai(&params);
-  // aimc::ModuleSSI ssi(&params);
-  // aimc::ModuleProfile profile(&params);
+  aimc::ModuleSSI ssi(&params);
+  aimc::ModuleSlice profile(&params);
   aimc::ModuleGaussians features(&params);
   aimc::FileOutputHTK output(&params);
 
@@ -51,9 +51,9 @@
   bmm.AddTarget(&nap);
   nap.AddTarget(&strobes);
   strobes.AddTarget(&sai);
-  sai.AddTarget(&features);
-  // ssi.AddTarget(&profile);
-  // profile.AddTarget(&features);
+  sai.AddTarget(&ssi);
+  ssi.AddTarget(&profile);
+  profile.AddTarget(&features);
   features.AddTarget(&output);
 
   output.OpenFile("test_output.htk", params.GetFloat("sai.frame_period_ms"));
--- a/src/Modules/BMM/ModuleGammatone.cc	Fri Feb 19 15:19:27 2010 +0000
+++ b/src/Modules/BMM/ModuleGammatone.cc	Sat Feb 20 17:56:40 2010 +0000
@@ -48,9 +48,15 @@
 }
 
 void ModuleGammatone::ResetInternal() {
-  state_.resize(num_channels_);
+  state_1_.resize(num_channels_);
+  state_2_.resize(num_channels_);
+  state_3_.resize(num_channels_);
+  state_4_.resize(num_channels_);
   for (int i = 0; i < num_channels_; ++i) {
-    state_[i].resize(9, 0.0f);
+    state_1_[i].resize(3, 0.0f);
+    state_2_[i].resize(3, 0.0f);
+    state_3_[i].resize(3, 0.0f);
+    state_4_[i].resize(3, 0.0f);
   }
 }
 
@@ -63,82 +69,104 @@
   centre_frequencies_.resize(num_channels_);
   float erb_current = erb_min;
 
+  output_.Initialize(num_channels_,
+                     input.buffer_length(),
+                     input.sample_rate());
+
   for (int i = 0; i < num_channels_; ++i) {
     centre_frequencies_[i] = ERBTools::ERB2Freq(erb_current);
     erb_current += delta_erb;
+    output_.set_centre_frequency(i, centre_frequencies_[i]);
   }
 
-  forward_.resize(num_channels_);
-  back_.resize(num_channels_);
-  state_.resize(num_channels_);
+  a_.resize(num_channels_);
+  b1_.resize(num_channels_);
+  b2_.resize(num_channels_);
+  b3_.resize(num_channels_);
+  b4_.resize(num_channels_);
+  state_1_.resize(num_channels_);
+  state_2_.resize(num_channels_);
+  state_3_.resize(num_channels_);
+  state_4_.resize(num_channels_);
 
   for (int ch = 0; ch < num_channels_; ++ch) {
-    float cf = centre_frequencies_[ch];
-    float erb = ERBTools::Freq2ERBw(cf);
+    double cf = centre_frequencies_[ch];
+    double erb = ERBTools::Freq2ERBw(cf);
 
     // Sample interval
-    float dt = 1.0f / input.sample_rate();
+    double dt = 1.0f / input.sample_rate();
 
     // Bandwidth parameter
-    float b = 1.019f * 2.0f * M_PI * erb;
+    double b = 1.019f * 2.0f * M_PI * erb;
 
-    // All of the following expressions are derived in Apple TR #35, "An
+    // The following expressions are derived in Apple TR #35, "An
     // Efficient Implementation of the Patterson-Holdsworth Cochlear
-    // Filter Bank".
+    // Filter Bank" and used in Malcolm Slaney's auditory toolbox, where he
+    // defines this alternaltive four stage cascade of second-order filters.
 
     // Calculate the gain:
-    float cpt = cf * M_PI * dt;
-    complex<float> exponent(0.0f, 2.0f * cpt);
-    complex<float> ec = exp(2.0f * exponent);
-    complex<float> two_cf_pi_t(2.0f * cpt, 0.0f);
-    complex<float> two_pow(pow(2.0f, (3.0f / 2.0f)), 0.0f);
-    complex<float> p = -2.0f * ec * dt
-                       + 2.0f * exp(-(b * dt) + exponent) * dt;
-    complex<float> b_dt(b * dt, 0.0f);
+    double cpt = cf * M_PI * dt;
+    complex<double> exponent(0.0, 2.0 * cpt);
+    complex<double> ec = exp(2.0 * exponent);
+    complex<double> two_cf_pi_t(2.0 * cpt, 0.0);
+    complex<double> two_pow(pow(2.0, (3.0 / 2.0)), 0.0);
+    complex<double> p = -2.0 * ec * dt
+                       + 2.0 * exp(-(b * dt) + exponent) * dt;
+    complex<double> b_dt(b * dt, 0.0);
 
-    float gain = abs(
-      (p * (cos(two_cf_pi_t) - sqrt(3.0f - two_pow) * sin(two_cf_pi_t)))
-      * (p * (cos(two_cf_pi_t) + sqrt(3.0f - two_pow) * sin(two_cf_pi_t)))
-      * (p * (cos(two_cf_pi_t) - sqrt(3.0f + two_pow) * sin(two_cf_pi_t)))
-      * (p * (cos(two_cf_pi_t) + sqrt(3.0f + two_pow) * sin(two_cf_pi_t)))
-      / pow(-2.0f / exp(2.0f * b_dt) - 2.0f * ec + 2.0f * (1.0f + ec)
-            / exp(b_dt), 4.0f));
+    double gain = abs(
+      (p * (cos(two_cf_pi_t) - sqrt(3.0 - two_pow) * sin(two_cf_pi_t)))
+      * (p * (cos(two_cf_pi_t) + sqrt(3.0 - two_pow) * sin(two_cf_pi_t)))
+      * (p * (cos(two_cf_pi_t) - sqrt(3.0 + two_pow) * sin(two_cf_pi_t)))
+      * (p * (cos(two_cf_pi_t) + sqrt(3.0 + two_pow) * sin(two_cf_pi_t)))
+      / pow(-2.0 / exp(2.0 * b_dt) - 2.0 * ec + 2.0 * (1.0 + ec)
+            / exp(b_dt), 4.0));
 
     // The filter coefficients themselves:
-    const int coeff_count = 9;
-    forward_[ch].resize(coeff_count, 0.0f);
-    back_[ch].resize(coeff_count, 0.0f);
-    state_[ch].resize(coeff_count, 0.0f);
+    const int coeff_count = 3;
+    a_[ch].resize(coeff_count, 0.0f);
+    b1_[ch].resize(coeff_count, 0.0f);
+    b2_[ch].resize(coeff_count, 0.0f);
+    b3_[ch].resize(coeff_count, 0.0f);
+    b4_[ch].resize(coeff_count, 0.0f);
+    state_1_[ch].resize(coeff_count, 0.0f);
+    state_2_[ch].resize(coeff_count, 0.0f);
+    state_3_[ch].resize(coeff_count, 0.0f);
+    state_4_[ch].resize(coeff_count, 0.0f);
 
-    forward_[ch][0] = pow(dt, 4.0f) / gain;
-    forward_[ch][1] = (-4.0f * pow(dt, 4.0f) * cos(2.0f * cpt)
-                      / exp(b * dt) / gain);
-    forward_[ch][2] = (6.0f * pow(dt, 4.0f) * cos(4.0f * cpt)
-                      / exp(2.0f * b * dt) / gain);
-    forward_[ch][3] = (-4.0f * pow(dt, 4.0f) * cos(6.0f * cpt)
-                      / exp(3.0f * b * dt) / gain);
-    forward_[ch][4] = (pow(dt, 4.0f) * cos(8.0f * cpt)
-                      / exp(4.0f * b * dt) / gain);
-    // Note: the remainder of the forward vector is zero-padded
+    double B0 = dt;
+    double B2 = 0.0f;
 
-    back_[ch][0] = 1.0f;
-    back_[ch][1] = -8.0f * cos(2.0f * cpt) / exp(b * dt);
-    back_[ch][2] = (4.0f * (4.0f + 3.0f * cos(4.0f * cpt))
-                    / exp(2.0f * b * dt));
-    back_[ch][3] = (-8.0f * (6.0f * cos(2.0f * cpt) + cos(6.0f * cpt))
-                    / exp(3.0f * b * dt));
-    back_[ch][4] = (2.0f * (18.0f + 16.0f * cos(4.0f * cpt) + cos(8.0f * cpt))
-                    / exp(4.0f * b * dt));
-    back_[ch][5] = (-8.0f * (6.0f * cos(2.0f * cpt) + cos(6.0f * cpt))
-                    / exp(5.0f * b * dt));
-    back_[ch][6] = (4.0f * (4.0f + 3.0f * cos(4.0f * cpt))
-                    / exp(6.0f * b * dt));
-    back_[ch][7] = -8.0f * cos(2.0f * cpt) / exp(7.0f * b * dt);
-    back_[ch][8] = exp(-8.0f * b * dt);
+    double B11 = -(2.0f * dt * cos(2.0f * cf * M_PI * dt) / exp(b * dt)
+                   + 2.0f * sqrt(3 + pow(2.0f, 1.5f)) * dt
+                       * sin(2.0f * cf * M_PI * dt) / exp(b * dt)) / 2.0f;
+    double B12 = -(2.0f * dt * cos(2.0f * cf * M_PI * dt) / exp(b * dt)
+                   - 2.0f * sqrt(3 + pow(2.0f, 1.5f)) * dt
+                       * sin(2.0f * cf * M_PI * dt) / exp(b * dt)) / 2.0f;
+    double B13 = -(2.0f * dt * cos(2.0f * cf * M_PI * dt) / exp(b * dt)
+                   + 2.0f * sqrt(3 - pow(2.0f, 1.5f)) * dt
+                       * sin(2.0f * cf * M_PI * dt) / exp(b * dt)) / 2.0f;
+    double B14 = -(2.0f * dt * cos(2.0f * cf * M_PI * dt) / exp(b * dt)
+                   - 2.0f * sqrt(3 - pow(2.0f, 1.5f)) * dt
+                       * sin(2.0f * cf * M_PI * dt) / exp(b * dt)) / 2.0f;;
+
+    a_[ch][0] = 1.0f;
+    a_[ch][1] = -2.0f * cos(2.0f * cf * M_PI * dt) / exp(b * dt);
+    a_[ch][2] = exp(-2.0f * b * dt);
+    b1_[ch][0] = B0 / gain;
+    b1_[ch][1] = B11 / gain;
+    b1_[ch][2] = B2 / gain;
+    b2_[ch][0] = B0;
+    b2_[ch][1] = B12;
+    b2_[ch][2] = B2;
+    b3_[ch][0] = B0;
+    b3_[ch][1] = B13;
+    b3_[ch][2] = B2;
+    b4_[ch][0] = B0;
+    b4_[ch][1] = B14;
+    b4_[ch][2] = B2;
+
   }
-  output_.Initialize(num_channels_,
-                     input.buffer_length(),
-                     input.sample_rate());
   return true;
 }
 
@@ -146,18 +174,52 @@
   output_.set_start_time(input.start_time());
   int audio_channel = 0;
 
-  vector<vector<float> >::iterator b = forward_.begin();
-  vector<vector<float> >::iterator a = back_.begin();
-  vector<vector<float> >::iterator s = state_.begin();
+  vector<vector<double> >::iterator b1 = b1_.begin();
+  vector<vector<double> >::iterator b2 = b2_.begin();
+  vector<vector<double> >::iterator b3 = b3_.begin();
+  vector<vector<double> >::iterator b4 = b4_.begin();
+  vector<vector<double> >::iterator a = a_.begin();
+  vector<vector<double> >::iterator s1 = state_1_.begin();
+  vector<vector<double> >::iterator s2 = state_2_.begin();
+  vector<vector<double> >::iterator s3 = state_3_.begin();
+  vector<vector<double> >::iterator s4 = state_4_.begin();
 
-  for (int ch = 0; ch < num_channels_; ++ch, ++a, ++b, ++s) {
+  // Temporary storage between filter stages
+  vector<double> out(input.buffer_length());
+  for (int ch = 0; ch < num_channels_;
+       ++ch, ++b1, ++b2, ++b3, ++b4, ++a, ++s1, ++s2, ++s3, ++s4) {
     for (int i = 0; i < input.buffer_length(); ++i) {
       // Direct-form-II IIR filter
-      float in = input.sample(audio_channel, i);
-      float out = (*b)[0] * in + (*s)[0];
-      for (unsigned int stage = 1; stage < s->size(); ++stage)
-        (*s)[stage - 1] = (*b)[stage] * in - (*a)[stage] * out + (*s)[stage];
-      output_.set_sample(ch, i, out);
+      double in = input.sample(audio_channel, i);
+      out[i] = (*b1)[0] * in + (*s1)[0];
+      for (unsigned int stage = 1; stage < s1->size(); ++stage)
+        (*s1)[stage - 1] = (*b1)[stage] * in
+                           - (*a)[stage] * out[i] + (*s1)[stage];
+    }
+    for (int i = 0; i < input.buffer_length(); ++i) {
+      // Direct-form-II IIR filter
+      double in = out[i];
+      out[i] = (*b2)[0] * in + (*s2)[0];
+      for (unsigned int stage = 1; stage < s2->size(); ++stage)
+        (*s2)[stage - 1] = (*b2)[stage] * in
+                           - (*a)[stage] * out[i] + (*s2)[stage];
+    }
+    for (int i = 0; i < input.buffer_length(); ++i) {
+      // Direct-form-II IIR filter
+      double in = out[i];
+      out[i] = (*b3)[0] * in + (*s3)[0];
+      for (unsigned int stage = 1; stage < s3->size(); ++stage)
+        (*s3)[stage - 1] = (*b3)[stage] * in
+                           - (*a)[stage] * out[i] + (*s3)[stage];
+    }
+    for (int i = 0; i < input.buffer_length(); ++i) {
+      // Direct-form-II IIR filter
+      double in = out[i];
+      out[i] = (*b4)[0] * in + (*s4)[0];
+      for (unsigned int stage = 1; stage < s4->size(); ++stage)
+        (*s4)[stage - 1] = (*b4)[stage] * in
+                           - (*a)[stage] * out[i] + (*s4)[stage];
+      output_.set_sample(ch, i, out[i]);
     }
   }
   PushOutput();
--- a/src/Modules/BMM/ModuleGammatone.h	Fri Feb 19 15:19:27 2010 +0000
+++ b/src/Modules/BMM/ModuleGammatone.h	Sat Feb 20 17:56:40 2010 +0000
@@ -22,6 +22,14 @@
  *  \author Thomas Walters <tom@acousticscale.org>
  *  \date created 2009/11/13
  *  \version \$Id$
+ *
+ * This is the version of the IIR gammatone used in Slaney's Auditory toolbox.
+ * The original verison as described in Apple Tech. Report #35 has a problem
+ * with the high-order coefficients at low centre frequencies and high sample
+ * rates. Since it is important that AIM-C can deal with these cases (for
+ * example for the Gaussian features), I've reiplemeted Slaney's alternative
+ * version which uses a cascade of four second-order filters in place of the 
+ * eighth-order filter.
  */
 #ifndef _AIMC_MODULES_BMM_GAMMATONE_H_
 #define _AIMC_MODULES_BMM_GAMMATONE_H_
@@ -45,13 +53,23 @@
  private:
   virtual bool InitializeInternal(const SignalBank& input);
   virtual void ResetInternal();
-  vector<vector<float> > forward_;
-  vector<vector<float> > back_;
-  vector<vector<float> > state_;
-  vector<float> centre_frequencies_;
+
+  // Filter coefficients
+  vector<vector<double> > b1_;
+  vector<vector<double> > b2_;
+  vector<vector<double> > b3_;
+  vector<vector<double> > b4_;
+  vector<vector<double> > a_;
+
+  vector<vector<double> > state_1_;
+  vector<vector<double> > state_2_;
+  vector<vector<double> > state_3_;
+  vector<vector<double> > state_4_;
+
+  vector<double> centre_frequencies_;
   int num_channels_;
-  float max_frequency_;
-  float min_frequency_;
+  double max_frequency_;
+  double min_frequency_;
 };
 }  // namespace aimc
 #endif  // _AIMC_MODULES_BMM_GAMMATONE_H_
--- a/src/Modules/BMM/ModuleGammatone_test.py	Fri Feb 19 15:19:27 2010 +0000
+++ b/src/Modules/BMM/ModuleGammatone_test.py	Sat Feb 20 17:56:40 2010 +0000
@@ -26,6 +26,8 @@
 
 import aimc
 from scipy import io
+import wave
+import scipy
 
 def main():
   data_file = "src/Modules/BMM/testdata/gammatone.mat"
@@ -40,41 +42,34 @@
   centre_frequencies = data["centre_frequencies"]
   expected_output = data["expected_output"]
   
-  (channel_count, buffer_length, frame_count) = expected_output.shape
+  (channel_count, frame_count) = expected_output.shape
+  buffer_length = 20000;
   
   input_sig = aimc.SignalBank()
-  input_sig.Initialize(1, buffer_length, 44100)
+  input_sig.Initialize(1, buffer_length, 48000)
   parameters = aimc.Parameters()
+  parameters.Load("src/Modules/BMM/testdata/gammatone.cfg")
   mod_gt = aimc.ModuleGammatone(parameters)
   mod_gt.Initialize(input_sig)
   
   correct_count = 0;
   incorrect_count  = 0;
-  for p in range(0, profile_count):
-    profile = given_profiles[p]
-    features = matlab_features[p]
-    for i in range(0, channel_count):
-      profile_sig.set_sample(i, 0, profile[i])
-    mod_gauss.Process(profile_sig)
-    out_sig = mod_gauss.GetOutputBank()
-    error = False;
-    for j in range(0, out_sig.channel_count()):
-      if (abs(out_sig.sample(j, 0) - features[j]) > epsilon):
-        error = True;
-        incorrect_count += 1;
-      else:
-        correct_count += 1;
-    if error:
-      print("Mismatch at profile %d" % (p))
-      print("AIM-C values: %f %f %f %f" % (out_sig.sample(0, 0), out_sig.sample(1, 0), out_sig.sample(2, 0), out_sig.sample(3, 0)))
-      print("MATLAB values: %f %f %f %f" % (features[0], features[1], features[2], features[3]))
-      print("")
-  percent_correct = 100 * correct_count / (correct_count + incorrect_count)
-  print("Total correct: %f percent" % (percent_correct))
-  if percent_correct == 100:
-    print("=== TEST PASSED ===")
-  else:
-    print("=== TEST FAILED! ===")
+  
+  out = scipy.zeros((channel_count, buffer_length))
+  
+  cfs = scipy.zeros((channel_count))
+
+  for i in range(0, buffer_length):
+    input_sig.set_sample(0, i, input_wave[i][0])
+  mod_gt.Process(input_sig)
+  out_sig = mod_gt.GetOutputBank()
+  for ch in range(0, out_sig.channel_count()):
+    cfs[ch] = out_sig.centre_frequency(ch);
+    for i in range(0, buffer_length):  
+      out[ch, i] = out_sig.sample(ch, i)
+  
+  outmat = dict(filterbank_out=out, centre_frequencies_out=cfs)
+  io.savemat("src/Modules/BMM/testdata/out_v2.mat", outmat)
 
   pass
 
--- a/swig/setup.py	Fri Feb 19 15:19:27 2010 +0000
+++ b/swig/setup.py	Sat Feb 20 17:56:40 2010 +0000
@@ -30,7 +30,7 @@
                                    '../src/Support/SignalBank.cc', 
                                    '../src/Support/Module.cc',
                                    '../src/Modules/Features/ModuleGaussians.cc',
-                                   '../src/Modules/BMM/ModuleGammatone.cc', 
+                                   '../src/Modules/BMM/ModuleGammatone.cc',
                                    '../src/Modules/BMM/ModulePZFC.cc',
                                    '../src/Modules/NAP/ModuleHCL.cc',
                                    '../src/Modules/Strobes/ModuleParabola.cc',