Mercurial > hg > aimc

--- a/src/Main/aimc.cc	Fri Feb 19 15:19:27 2010 +0000
+++ b/src/Main/aimc.cc	Sat Feb 20 17:56:40 2010 +0000
@@ -26,21 +26,21 @@
 #include "Modules/NAP/ModuleHCL.h"
 #include "Modules/Strobes/ModuleParabola.h"
 #include "Modules/SAI/ModuleSAI.h"
-// #include "Modules/SSI/ModuleSSI.h"
-// #include "Modules/Profile/ModuleProfile.h"
+#include "Modules/SSI/ModuleSSI.h"
+#include "Modules/Profile/ModuleSlice.h"
 #include "Modules/Features/ModuleGaussians.h"
 #include "Modules/Output/FileOutputHTK.h"

 int main(int argc, char* argv[]) {
   aimc::Parameters params;
   aimc::ModuleFileInput input(&params);
-  // aimc::ModuleGammatone bmm(&params);
-  aimc::ModulePZFC bmm(&params);
+  aimc::ModuleGammatone bmm(&params);
+  //aimc::ModulePZFC bmm(&params);
   aimc::ModuleHCL nap(&params);
   aimc::ModuleParabola strobes(&params);
   aimc::ModuleSAI sai(&params);
-  // aimc::ModuleSSI ssi(&params);
-  // aimc::ModuleProfile profile(&params);
+  aimc::ModuleSSI ssi(&params);
+  aimc::ModuleSlice profile(&params);
   aimc::ModuleGaussians features(&params);
   aimc::FileOutputHTK output(&params);

@@ -51,9 +51,9 @@
   bmm.AddTarget(&nap);
   nap.AddTarget(&strobes);
   strobes.AddTarget(&sai);
-  sai.AddTarget(&features);
-  // ssi.AddTarget(&profile);
-  // profile.AddTarget(&features);
+  sai.AddTarget(&ssi);
+  ssi.AddTarget(&profile);
+  profile.AddTarget(&features);
   features.AddTarget(&output);

   output.OpenFile("test_output.htk", params.GetFloat("sai.frame_period_ms"));
--- a/src/Modules/BMM/ModuleGammatone.cc	Fri Feb 19 15:19:27 2010 +0000
+++ b/src/Modules/BMM/ModuleGammatone.cc	Sat Feb 20 17:56:40 2010 +0000
@@ -48,9 +48,15 @@
 }

 void ModuleGammatone::ResetInternal() {
-  state_.resize(num_channels_);
+  state_1_.resize(num_channels_);
+  state_2_.resize(num_channels_);
+  state_3_.resize(num_channels_);
+  state_4_.resize(num_channels_);
   for (int i = 0; i < num_channels_; ++i) {
-    state_[i].resize(9, 0.0f);
+    state_1_[i].resize(3, 0.0f);
+    state_2_[i].resize(3, 0.0f);
+    state_3_[i].resize(3, 0.0f);
+    state_4_[i].resize(3, 0.0f);
   }
 }

@@ -63,82 +69,104 @@
   centre_frequencies_.resize(num_channels_);
   float erb_current = erb_min;

+  output_.Initialize(num_channels_,
+                     input.buffer_length(),
+                     input.sample_rate());
+
   for (int i = 0; i < num_channels_; ++i) {
     centre_frequencies_[i] = ERBTools::ERB2Freq(erb_current);
     erb_current += delta_erb;
+    output_.set_centre_frequency(i, centre_frequencies_[i]);
   }

-  forward_.resize(num_channels_);
-  back_.resize(num_channels_);
-  state_.resize(num_channels_);
+  a_.resize(num_channels_);
+  b1_.resize(num_channels_);
+  b2_.resize(num_channels_);
+  b3_.resize(num_channels_);
+  b4_.resize(num_channels_);
+  state_1_.resize(num_channels_);
+  state_2_.resize(num_channels_);
+  state_3_.resize(num_channels_);
+  state_4_.resize(num_channels_);

   for (int ch = 0; ch < num_channels_; ++ch) {
-    float cf = centre_frequencies_[ch];
-    float erb = ERBTools::Freq2ERBw(cf);
+    double cf = centre_frequencies_[ch];
+    double erb = ERBTools::Freq2ERBw(cf);

     // Sample interval
-    float dt = 1.0f / input.sample_rate();
+    double dt = 1.0f / input.sample_rate();

     // Bandwidth parameter
-    float b = 1.019f * 2.0f * M_PI * erb;
+    double b = 1.019f * 2.0f * M_PI * erb;

-    // All of the following expressions are derived in Apple TR #35, "An
+    // The following expressions are derived in Apple TR #35, "An
     // Efficient Implementation of the Patterson-Holdsworth Cochlear
-    // Filter Bank".
+    // Filter Bank" and used in Malcolm Slaney's auditory toolbox, where he
+    // defines this alternaltive four stage cascade of second-order filters.

     // Calculate the gain:
-    float cpt = cf * M_PI * dt;
-    complex<float> exponent(0.0f, 2.0f * cpt);
-    complex<float> ec = exp(2.0f * exponent);
-    complex<float> two_cf_pi_t(2.0f * cpt, 0.0f);
-    complex<float> two_pow(pow(2.0f, (3.0f / 2.0f)), 0.0f);
-    complex<float> p = -2.0f * ec * dt
-                       + 2.0f * exp(-(b * dt) + exponent) * dt;
-    complex<float> b_dt(b * dt, 0.0f);
+    double cpt = cf * M_PI * dt;
+    complex<double> exponent(0.0, 2.0 * cpt);
+    complex<double> ec = exp(2.0 * exponent);
+    complex<double> two_cf_pi_t(2.0 * cpt, 0.0);
+    complex<double> two_pow(pow(2.0, (3.0 / 2.0)), 0.0);
+    complex<double> p = -2.0 * ec * dt
+                       + 2.0 * exp(-(b * dt) + exponent) * dt;
+    complex<double> b_dt(b * dt, 0.0);

-    float gain = abs(
-      (p * (cos(two_cf_pi_t) - sqrt(3.0f - two_pow) * sin(two_cf_pi_t)))
-      * (p * (cos(two_cf_pi_t) + sqrt(3.0f - two_pow) * sin(two_cf_pi_t)))
-      * (p * (cos(two_cf_pi_t) - sqrt(3.0f + two_pow) * sin(two_cf_pi_t)))
-      * (p * (cos(two_cf_pi_t) + sqrt(3.0f + two_pow) * sin(two_cf_pi_t)))
-      / pow(-2.0f / exp(2.0f * b_dt) - 2.0f * ec + 2.0f * (1.0f + ec)
-            / exp(b_dt), 4.0f));
+    double gain = abs(
+      (p * (cos(two_cf_pi_t) - sqrt(3.0 - two_pow) * sin(two_cf_pi_t)))
+      * (p * (cos(two_cf_pi_t) + sqrt(3.0 - two_pow) * sin(two_cf_pi_t)))
+      * (p * (cos(two_cf_pi_t) - sqrt(3.0 + two_pow) * sin(two_cf_pi_t)))
+      * (p * (cos(two_cf_pi_t) + sqrt(3.0 + two_pow) * sin(two_cf_pi_t)))
+      / pow(-2.0 / exp(2.0 * b_dt) - 2.0 * ec + 2.0 * (1.0 + ec)
+            / exp(b_dt), 4.0));

     // The filter coefficients themselves:
-    const int coeff_count = 9;
-    forward_[ch].resize(coeff_count, 0.0f);
-    back_[ch].resize(coeff_count, 0.0f);
-    state_[ch].resize(coeff_count, 0.0f);
+    const int coeff_count = 3;
+    a_[ch].resize(coeff_count, 0.0f);
+    b1_[ch].resize(coeff_count, 0.0f);
+    b2_[ch].resize(coeff_count, 0.0f);
+    b3_[ch].resize(coeff_count, 0.0f);
+    b4_[ch].resize(coeff_count, 0.0f);
+    state_1_[ch].resize(coeff_count, 0.0f);
+    state_2_[ch].resize(coeff_count, 0.0f);
+    state_3_[ch].resize(coeff_count, 0.0f);
+    state_4_[ch].resize(coeff_count, 0.0f);

-    forward_[ch][0] = pow(dt, 4.0f) / gain;
-    forward_[ch][1] = (-4.0f * pow(dt, 4.0f) * cos(2.0f * cpt)
-                      / exp(b * dt) / gain);
-    forward_[ch][2] = (6.0f * pow(dt, 4.0f) * cos(4.0f * cpt)
-                      / exp(2.0f * b * dt) / gain);
-    forward_[ch][3] = (-4.0f * pow(dt, 4.0f) * cos(6.0f * cpt)
-                      / exp(3.0f * b * dt) / gain);
-    forward_[ch][4] = (pow(dt, 4.0f) * cos(8.0f * cpt)
-                      / exp(4.0f * b * dt) / gain);
-    // Note: the remainder of the forward vector is zero-padded
+    double B0 = dt;
+    double B2 = 0.0f;

-    back_[ch][0] = 1.0f;
-    back_[ch][1] = -8.0f * cos(2.0f * cpt) / exp(b * dt);
-    back_[ch][2] = (4.0f * (4.0f + 3.0f * cos(4.0f * cpt))
-                    / exp(2.0f * b * dt));
-    back_[ch][3] = (-8.0f * (6.0f * cos(2.0f * cpt) + cos(6.0f * cpt))
-                    / exp(3.0f * b * dt));
-    back_[ch][4] = (2.0f * (18.0f + 16.0f * cos(4.0f * cpt) + cos(8.0f * cpt))
-                    / exp(4.0f * b * dt));
-    back_[ch][5] = (-8.0f * (6.0f * cos(2.0f * cpt) + cos(6.0f * cpt))
-                    / exp(5.0f * b * dt));
-    back_[ch][6] = (4.0f * (4.0f + 3.0f * cos(4.0f * cpt))
-                    / exp(6.0f * b * dt));
-    back_[ch][7] = -8.0f * cos(2.0f * cpt) / exp(7.0f * b * dt);
-    back_[ch][8] = exp(-8.0f * b * dt);
+    double B11 = -(2.0f * dt * cos(2.0f * cf * M_PI * dt) / exp(b * dt)
+                   + 2.0f * sqrt(3 + pow(2.0f, 1.5f)) * dt
+                       * sin(2.0f * cf * M_PI * dt) / exp(b * dt)) / 2.0f;
+    double B12 = -(2.0f * dt * cos(2.0f * cf * M_PI * dt) / exp(b * dt)
+                   - 2.0f * sqrt(3 + pow(2.0f, 1.5f)) * dt
+                       * sin(2.0f * cf * M_PI * dt) / exp(b * dt)) / 2.0f;
+    double B13 = -(2.0f * dt * cos(2.0f * cf * M_PI * dt) / exp(b * dt)
+                   + 2.0f * sqrt(3 - pow(2.0f, 1.5f)) * dt
+                       * sin(2.0f * cf * M_PI * dt) / exp(b * dt)) / 2.0f;
+    double B14 = -(2.0f * dt * cos(2.0f * cf * M_PI * dt) / exp(b * dt)
+                   - 2.0f * sqrt(3 - pow(2.0f, 1.5f)) * dt
+                       * sin(2.0f * cf * M_PI * dt) / exp(b * dt)) / 2.0f;;
+
+    a_[ch][0] = 1.0f;
+    a_[ch][1] = -2.0f * cos(2.0f * cf * M_PI * dt) / exp(b * dt);
+    a_[ch][2] = exp(-2.0f * b * dt);
+    b1_[ch][0] = B0 / gain;
+    b1_[ch][1] = B11 / gain;
+    b1_[ch][2] = B2 / gain;
+    b2_[ch][0] = B0;
+    b2_[ch][1] = B12;
+    b2_[ch][2] = B2;
+    b3_[ch][0] = B0;
+    b3_[ch][1] = B13;
+    b3_[ch][2] = B2;
+    b4_[ch][0] = B0;
+    b4_[ch][1] = B14;
+    b4_[ch][2] = B2;
+
   }
-  output_.Initialize(num_channels_,
-                     input.buffer_length(),
-                     input.sample_rate());
   return true;
 }

@@ -146,18 +174,52 @@
   output_.set_start_time(input.start_time());
   int audio_channel = 0;

-  vector<vector<float> >::iterator b = forward_.begin();
-  vector<vector<float> >::iterator a = back_.begin();
-  vector<vector<float> >::iterator s = state_.begin();
+  vector<vector<double> >::iterator b1 = b1_.begin();
+  vector<vector<double> >::iterator b2 = b2_.begin();
+  vector<vector<double> >::iterator b3 = b3_.begin();
+  vector<vector<double> >::iterator b4 = b4_.begin();
+  vector<vector<double> >::iterator a = a_.begin();
+  vector<vector<double> >::iterator s1 = state_1_.begin();
+  vector<vector<double> >::iterator s2 = state_2_.begin();
+  vector<vector<double> >::iterator s3 = state_3_.begin();
+  vector<vector<double> >::iterator s4 = state_4_.begin();

-  for (int ch = 0; ch < num_channels_; ++ch, ++a, ++b, ++s) {
+  // Temporary storage between filter stages
+  vector<double> out(input.buffer_length());
+  for (int ch = 0; ch < num_channels_;
+       ++ch, ++b1, ++b2, ++b3, ++b4, ++a, ++s1, ++s2, ++s3, ++s4) {
     for (int i = 0; i < input.buffer_length(); ++i) {
       // Direct-form-II IIR filter
-      float in = input.sample(audio_channel, i);
-      float out = (*b)[0] * in + (*s)[0];
-      for (unsigned int stage = 1; stage < s->size(); ++stage)
-        (*s)[stage - 1] = (*b)[stage] * in - (*a)[stage] * out + (*s)[stage];
-      output_.set_sample(ch, i, out);
+      double in = input.sample(audio_channel, i);
+      out[i] = (*b1)[0] * in + (*s1)[0];
+      for (unsigned int stage = 1; stage < s1->size(); ++stage)
+        (*s1)[stage - 1] = (*b1)[stage] * in
+                           - (*a)[stage] * out[i] + (*s1)[stage];
+    }
+    for (int i = 0; i < input.buffer_length(); ++i) {
+      // Direct-form-II IIR filter
+      double in = out[i];
+      out[i] = (*b2)[0] * in + (*s2)[0];
+      for (unsigned int stage = 1; stage < s2->size(); ++stage)
+        (*s2)[stage - 1] = (*b2)[stage] * in
+                           - (*a)[stage] * out[i] + (*s2)[stage];
+    }
+    for (int i = 0; i < input.buffer_length(); ++i) {
+      // Direct-form-II IIR filter
+      double in = out[i];
+      out[i] = (*b3)[0] * in + (*s3)[0];
+      for (unsigned int stage = 1; stage < s3->size(); ++stage)
+        (*s3)[stage - 1] = (*b3)[stage] * in
+                           - (*a)[stage] * out[i] + (*s3)[stage];
+    }
+    for (int i = 0; i < input.buffer_length(); ++i) {
+      // Direct-form-II IIR filter
+      double in = out[i];
+      out[i] = (*b4)[0] * in + (*s4)[0];
+      for (unsigned int stage = 1; stage < s4->size(); ++stage)
+        (*s4)[stage - 1] = (*b4)[stage] * in
+                           - (*a)[stage] * out[i] + (*s4)[stage];
+      output_.set_sample(ch, i, out[i]);
     }
   }
   PushOutput();
--- a/src/Modules/BMM/ModuleGammatone.h	Fri Feb 19 15:19:27 2010 +0000
+++ b/src/Modules/BMM/ModuleGammatone.h	Sat Feb 20 17:56:40 2010 +0000
@@ -22,6 +22,14 @@
  *  \author Thomas Walters <tom@acousticscale.org>
  *  \date created 2009/11/13
  *  \version \$Id$
+ *
+ * This is the version of the IIR gammatone used in Slaney's Auditory toolbox.
+ * The original verison as described in Apple Tech. Report #35 has a problem
+ * with the high-order coefficients at low centre frequencies and high sample
+ * rates. Since it is important that AIM-C can deal with these cases (for
+ * example for the Gaussian features), I've reiplemeted Slaney's alternative
+ * version which uses a cascade of four second-order filters in place of the
+ * eighth-order filter.
  */
 #ifndef _AIMC_MODULES_BMM_GAMMATONE_H_
 #define _AIMC_MODULES_BMM_GAMMATONE_H_
@@ -45,13 +53,23 @@
  private:
   virtual bool InitializeInternal(const SignalBank& input);
   virtual void ResetInternal();
-  vector<vector<float> > forward_;
-  vector<vector<float> > back_;
-  vector<vector<float> > state_;
-  vector<float> centre_frequencies_;
+
+  // Filter coefficients
+  vector<vector<double> > b1_;
+  vector<vector<double> > b2_;
+  vector<vector<double> > b3_;
+  vector<vector<double> > b4_;
+  vector<vector<double> > a_;
+
+  vector<vector<double> > state_1_;
+  vector<vector<double> > state_2_;
+  vector<vector<double> > state_3_;
+  vector<vector<double> > state_4_;
+
+  vector<double> centre_frequencies_;
   int num_channels_;
-  float max_frequency_;
-  float min_frequency_;
+  double max_frequency_;
+  double min_frequency_;
 };
 }  // namespace aimc
 #endif  // _AIMC_MODULES_BMM_GAMMATONE_H_
--- a/src/Modules/BMM/ModuleGammatone_test.py	Fri Feb 19 15:19:27 2010 +0000
+++ b/src/Modules/BMM/ModuleGammatone_test.py	Sat Feb 20 17:56:40 2010 +0000
@@ -26,6 +26,8 @@

 import aimc
 from scipy import io
+import wave
+import scipy

 def main():
   data_file = "src/Modules/BMM/testdata/gammatone.mat"
@@ -40,41 +42,34 @@
   centre_frequencies = data["centre_frequencies"]
   expected_output = data["expected_output"]

-  (channel_count, buffer_length, frame_count) = expected_output.shape
+  (channel_count, frame_count) = expected_output.shape
+  buffer_length = 20000;

   input_sig = aimc.SignalBank()
-  input_sig.Initialize(1, buffer_length, 44100)
+  input_sig.Initialize(1, buffer_length, 48000)
   parameters = aimc.Parameters()
+  parameters.Load("src/Modules/BMM/testdata/gammatone.cfg")
   mod_gt = aimc.ModuleGammatone(parameters)
   mod_gt.Initialize(input_sig)

   correct_count = 0;
   incorrect_count  = 0;
-  for p in range(0, profile_count):
-    profile = given_profiles[p]
-    features = matlab_features[p]
-    for i in range(0, channel_count):
-      profile_sig.set_sample(i, 0, profile[i])
-    mod_gauss.Process(profile_sig)
-    out_sig = mod_gauss.GetOutputBank()
-    error = False;
-    for j in range(0, out_sig.channel_count()):
-      if (abs(out_sig.sample(j, 0) - features[j]) > epsilon):
-        error = True;
-        incorrect_count += 1;
-      else:
-        correct_count += 1;
-    if error:
-      print("Mismatch at profile %d" % (p))
-      print("AIM-C values: %f %f %f %f" % (out_sig.sample(0, 0), out_sig.sample(1, 0), out_sig.sample(2, 0), out_sig.sample(3, 0)))
-      print("MATLAB values: %f %f %f %f" % (features[0], features[1], features[2], features[3]))
-      print("")
-  percent_correct = 100 * correct_count / (correct_count + incorrect_count)
-  print("Total correct: %f percent" % (percent_correct))
-  if percent_correct == 100:
-    print("=== TEST PASSED ===")
-  else:
-    print("=== TEST FAILED! ===")
+
+  out = scipy.zeros((channel_count, buffer_length))
+
+  cfs = scipy.zeros((channel_count))
+
+  for i in range(0, buffer_length):
+    input_sig.set_sample(0, i, input_wave[i][0])
+  mod_gt.Process(input_sig)
+  out_sig = mod_gt.GetOutputBank()
+  for ch in range(0, out_sig.channel_count()):
+    cfs[ch] = out_sig.centre_frequency(ch);
+    for i in range(0, buffer_length):
+      out[ch, i] = out_sig.sample(ch, i)
+
+  outmat = dict(filterbank_out=out, centre_frequencies_out=cfs)
+  io.savemat("src/Modules/BMM/testdata/out_v2.mat", outmat)

   pass
--- a/swig/setup.py	Fri Feb 19 15:19:27 2010 +0000
+++ b/swig/setup.py	Sat Feb 20 17:56:40 2010 +0000
@@ -30,7 +30,7 @@
                                    '../src/Support/SignalBank.cc',
                                    '../src/Support/Module.cc',
                                    '../src/Modules/Features/ModuleGaussians.cc',
-                                   '../src/Modules/BMM/ModuleGammatone.cc',
+                                   '../src/Modules/BMM/ModuleGammatone.cc',
                                    '../src/Modules/BMM/ModulePZFC.cc',
                                    '../src/Modules/NAP/ModuleHCL.cc',
                                    '../src/Modules/Strobes/ModuleParabola.cc',