view src/Modules/SSI/ModuleSSI.cc @ 232:af531fc3f280

- Massive refactoring to make module tree stuff work. In theory we now support configuration files again. The graphics stuff is untested as yet.
author tomwalters
date Mon, 18 Oct 2010 04:42:28 +0000
parents 73c6d61440ad
children af02b6addf7a
line wrap: on
line source
// Copyright 2010, Thomas Walters
//
// AIM-C: A C++ implementation of the Auditory Image Model
// http://www.acousticscale.org/AIMC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/*!
 * \author Thomas Walters <tom@acousticscale.org>
 * \date created 2010/02/19
 * \version \$Id$
 */

#include <cmath>

#include "Modules/SSI/ModuleSSI.h"

namespace aimc {
#ifdef _MSC_VER
// MSVC doesn't define log2()
float log2(float n) {
  return log(n) / log(2.0);  
}
#endif

ModuleSSI::ModuleSSI(Parameters *params) : Module(params) {
  module_description_ = "Size-shape image (aka the 'sscAI')";
  module_identifier_ = "ssi";
  module_type_ = "ssi";
  module_version_ = "$Id$";

  // Cut off the SSI at the end of the first cycle
  do_pitch_cutoff_ = parameters_->DefaultBool("ssi.pitch_cutoff", false);

  // Weight the values in each channel more strongly if the channel was
  // truncated due to the pitch cutoff. This ensures that the same amount of
  // energy remains in the SSI spectral profile
  weight_by_cutoff_ = parameters_->DefaultBool("ssi.weight_by_cutoff", false);

  // Weight the values in each channel more strongly if the channel was
  // scaled such that the end goes off the edge of the computed SSI.
  // Again, this ensures that the overall energy of the spectral profile
  // remains the same.
  weight_by_scaling_ = parameters_->DefaultBool("ssi.weight_by_scaling",
                                                false);

  // Time from the zero-lag line of the SAI from which to start searching
  // for a maximum in the input SAI's temporal profile.
  pitch_search_start_ms_ = parameters_->DefaultFloat(
    "ssi.pitch_search_start_ms", 2.0f);

  // Total width in cycles of the whole SSI
  ssi_width_cycles_ = parameters_->DefaultFloat("ssi.width_cycles", 10.0f);

  // Set to true to make the cycles axis logarithmic (ie indexing by gamma
  // rather than by cycles) 
  log_cycles_axis_ = parameters_->DefaultBool("ssi.log_cycles_axis", true);

  // The centre frequency of the channel which will just fill the complete
  // width of the SSI buffer
  pivot_cf_ = parameters_->DefaultFloat("ssi.pivot_cf", 1000.0f);
  
  // Whether or not to do smooth offset when the pitch cutoff is active.
  do_smooth_offset_ = parameters_->DefaultBool("ssi.do_smooth_offset", false);
  
  // The number of cycles, centered on the pitch line, over which the SSI is taken
  // to zero when doing the pitch cutoff.
  smooth_offset_cycles_ = parameters_->DefaultFloat("ssi.smooth_offset_cycles", 3.0f);
}

ModuleSSI::~ModuleSSI() {
}

bool ModuleSSI::InitializeInternal(const SignalBank &input) {
  // Copy the parameters of the input signal bank into internal variables, so
  // that they can be checked later.
  sample_rate_ = input.sample_rate();
  buffer_length_ = input.buffer_length();
  channel_count_ = input.channel_count();

  ssi_width_samples_ = sample_rate_ * ssi_width_cycles_ / pivot_cf_;
  if (ssi_width_samples_ > buffer_length_) {
    ssi_width_samples_ = buffer_length_;
    float cycles = ssi_width_samples_ * pivot_cf_ / sample_rate_;
    LOG_INFO(_T("Requested SSI width of %f cycles is too long for the "
                "input buffer length of %d samples. The SSI will be "
                "truncated at %d samples wide. This corresponds to a width "
                "of %f cycles."), ssi_width_cycles_, buffer_length_,
                ssi_width_samples_, cycles);
    ssi_width_cycles_ = cycles;
  }
  output_.Initialize(channel_count_, ssi_width_samples_, sample_rate_);
  return true;
}

void ModuleSSI::ResetInternal() {
}

int ModuleSSI::ExtractPitchIndex(const SignalBank &input) const {
  // Generate temporal profile of the SAI
  vector<float> sai_temporal_profile(buffer_length_, 0.0f);
  for (int i = 0; i < buffer_length_; ++i) {
    float val = 0.0f;
    for (int ch = 0; ch < channel_count_; ++ch) {
      val += input.sample(ch, i);
    }
    sai_temporal_profile[i] = val;
  }

  // Find pitch value
  int start_sample = floor(pitch_search_start_ms_ * sample_rate_ / 1000.0f);
  int max_idx = 0;
  float max_val = 0.0f;
  for (int i = start_sample; i < buffer_length_; ++i) {
    if (sai_temporal_profile[i] > max_val) {
      max_idx = i;
      max_val = sai_temporal_profile[i];
    }
  }
  return max_idx;
}

void ModuleSSI::Process(const SignalBank &input) {
  // Check to see if the module has been initialized. If not, processing
  // should not continue.
  if (!initialized_) {
    LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str());
    return;
  }

  // Check that ths input this time is the same as the input passed to
  // Initialize()
  if (buffer_length_ != input.buffer_length()
      || channel_count_ != input.channel_count()) {
    LOG_ERROR(_T("Mismatch between input to Initialize() and input to "
                 "Process() in module %s."), module_identifier_.c_str());
    return;
  }

  output_.set_start_time(input.start_time());

  int pitch_index = buffer_length_ - 1;
  if (do_pitch_cutoff_) {
    pitch_index = ExtractPitchIndex(input);
  }
  
  float gamma_min = -1.0f;
  float gamma_max = log2(ssi_width_cycles_);

  for (int ch = 0; ch < channel_count_; ++ch) {
    float centre_frequency = input.centre_frequency(ch);
    
    float channel_weight = 1.0f;
    int cutoff_index = buffer_length_ - 1;
    if (do_pitch_cutoff_) {
      if (pitch_index < cutoff_index) {
        if (weight_by_cutoff_) {
          channel_weight = static_cast<float>(buffer_length_)
                    / static_cast<float>(pitch_index);
        }
        cutoff_index = pitch_index;
      }
    }
    
    // tanh(3) is about 0.995. Seems reasonable. 
    float smooth_pitch_constant = smooth_offset_cycles_ * 3.0f;
    float pitch_h = 0.0f;
    if (do_smooth_offset_) {
      if (log_cycles_axis_) {
        float gamma = gamma_min + (gamma_max - gamma_min)
                                   * static_cast<float>(pitch_index)
                                   / static_cast<float>(ssi_width_samples_);
        pitch_h = pow(2.0f, gamma);
      } else {
        pitch_h = static_cast<float>(pitch_index) * ssi_width_cycles_
            / static_cast<float>(ssi_width_samples_);
      }
    }
    
    // Copy the buffer from input to output, addressing by h-value
    for (int i = 0; i < ssi_width_samples_; ++i) {
      float h;
      float cycle_samples = sample_rate_ / centre_frequency;
      if (log_cycles_axis_) {
        float gamma = gamma_min + (gamma_max - gamma_min)
                                   * static_cast<float>(i)
                                   / static_cast<float>(ssi_width_samples_);
        h = pow(2.0f, gamma);
      } else {
        h = static_cast<float>(i) * ssi_width_cycles_
            / static_cast<float>(ssi_width_samples_);
      }

      // The index into the input array is a floating-point number, which is
      // split into a whole part and a fractional part. The whole part and
      // fractional part are found, and are used to linearly interpolate
      // between input samples to yield an output sample.
      double whole_part;
      float frac_part = modf(h * cycle_samples, &whole_part);
      int sample = floor(whole_part);

      float weight = channel_weight;
      
      if (do_smooth_offset_ && do_pitch_cutoff_) {
        // Smoothing around the pitch cutoff line.
        weight *= (1.0f + tanh(smooth_pitch_constant * (pitch_h - h))) / 2.0f;
      }

      if (weight_by_scaling_) {
        if (centre_frequency > pivot_cf_) {
          weight *= (centre_frequency / pivot_cf_);
        }
      }

      float val;
      if (sample < cutoff_index || do_smooth_offset_) {
        float curr_sample = input.sample(ch, sample);
        float next_sample = input.sample(ch, sample + 1);
        val = weight * (curr_sample
                        + frac_part * (next_sample - curr_sample));
      } else {
        val = 0.0f;
      }
      output_.set_sample(ch, i, val);
    }
  }
  PushOutput();
}
}  // namespace aimc