tomwalters@284: // Copyright 2010, Thomas Walters tomwalters@284: // tomwalters@284: // AIM-C: A C++ implementation of the Auditory Image Model tomwalters@284: // http://www.acousticscale.org/AIMC tomwalters@284: // tomwalters@318: // Licensed under the Apache License, Version 2.0 (the "License"); tomwalters@318: // you may not use this file except in compliance with the License. tomwalters@318: // You may obtain a copy of the License at tomwalters@284: // tomwalters@318: // http://www.apache.org/licenses/LICENSE-2.0 tomwalters@284: // tomwalters@318: // Unless required by applicable law or agreed to in writing, software tomwalters@318: // distributed under the License is distributed on an "AS IS" BASIS, tomwalters@318: // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. tomwalters@318: // See the License for the specific language governing permissions and tomwalters@318: // limitations under the License. tomwalters@284: tomwalters@284: /*! tomwalters@284: * \author Thomas Walters tomwalters@284: * \date created 2010/02/19 tomwalters@284: * \version \$Id$ tomwalters@284: */ tomwalters@284: tomwalters@287: #include tomwalters@287: tomwalters@284: #include "Modules/SSI/ModuleSSI.h" tomwalters@284: tomwalters@284: namespace aimc { tomwalters@330: #ifdef _MSC_VER tomwalters@330: // MSVC doesn't define log2() tomwalters@330: float log2(float n) { tomwalters@330: return log(n) / log(2.0); tomwalters@330: } tomwalters@330: #endif tomwalters@330: tomwalters@284: ModuleSSI::ModuleSSI(Parameters *params) : Module(params) { tomwalters@284: module_description_ = "Size-shape image (aka the 'sscAI')"; tomwalters@284: module_identifier_ = "ssi"; tomwalters@284: module_type_ = "ssi"; tomwalters@284: module_version_ = "$Id$"; tomwalters@284: tomwalters@305: // Cut off the SSI at the end of the first cycle tomwalters@305: do_pitch_cutoff_ = parameters_->DefaultBool("ssi.pitch_cutoff", false); tomwalters@305: tomwalters@305: // Weight the values in each channel more strongly if the channel was tomwalters@305: // truncated due to the pitch cutoff. This ensures that the same amount of tomwalters@305: // energy remains in the SSI spectral profile tomwalters@305: weight_by_cutoff_ = parameters_->DefaultBool("ssi.weight_by_cutoff", false); tomwalters@305: tomwalters@305: // Weight the values in each channel more strongly if the channel was tomwalters@305: // scaled such that the end goes off the edge of the computed SSI. tomwalters@305: // Again, this ensures that the overall energy of the spectral profile tomwalters@305: // remains the same. tomwalters@305: weight_by_scaling_ = parameters_->DefaultBool("ssi.weight_by_scaling", tomwalters@305: false); tomwalters@305: tomwalters@305: // Time from the zero-lag line of the SAI from which to start searching tomwalters@305: // for a maximum in the input SAI's temporal profile. tomwalters@305: pitch_search_start_ms_ = parameters_->DefaultFloat( tomwalters@411: "ssi.pitch_search_start_ms", 2.0f); tomwalters@305: tomwalters@305: // Total width in cycles of the whole SSI tomwalters@305: ssi_width_cycles_ = parameters_->DefaultFloat("ssi.width_cycles", 10.0f); tomwalters@305: tomwalters@305: // Set to true to make the cycles axis logarithmic (ie indexing by gamma tomwalters@305: // rather than by cycles) tomwalters@305: log_cycles_axis_ = parameters_->DefaultBool("ssi.log_cycles_axis", true); tomwalters@305: tomwalters@305: // The centre frequency of the channel which will just fill the complete tomwalters@305: // width of the SSI buffer tomwalters@305: pivot_cf_ = parameters_->DefaultFloat("ssi.pivot_cf", 1000.0f); tomwalters@397: tomwalters@397: // Whether or not to do smooth offset when the pitch cutoff is active. tomwalters@397: do_smooth_offset_ = parameters_->DefaultBool("ssi.do_smooth_offset", false); tomwalters@397: tomwalters@397: // The number of cycles, centered on the pitch line, over which the SSI is taken tomwalters@397: // to zero when doing the pitch cutoff. tomwalters@397: smooth_offset_cycles_ = parameters_->DefaultFloat("ssi.smooth_offset_cycles", 3.0f); tomwalters@284: } tomwalters@284: tomwalters@284: ModuleSSI::~ModuleSSI() { tomwalters@284: } tomwalters@284: tomwalters@284: bool ModuleSSI::InitializeInternal(const SignalBank &input) { tomwalters@284: // Copy the parameters of the input signal bank into internal variables, so tomwalters@284: // that they can be checked later. tomwalters@284: sample_rate_ = input.sample_rate(); tomwalters@284: buffer_length_ = input.buffer_length(); tomwalters@284: channel_count_ = input.channel_count(); tomwalters@284: tomwalters@305: ssi_width_samples_ = sample_rate_ * ssi_width_cycles_ / pivot_cf_; tomwalters@287: if (ssi_width_samples_ > buffer_length_) { tomwalters@287: ssi_width_samples_ = buffer_length_; tomwalters@305: float cycles = ssi_width_samples_ * pivot_cf_ / sample_rate_; tomwalters@287: LOG_INFO(_T("Requested SSI width of %f cycles is too long for the " tomwalters@287: "input buffer length of %d samples. The SSI will be " tomwalters@287: "truncated at %d samples wide. This corresponds to a width " tomwalters@287: "of %f cycles."), ssi_width_cycles_, buffer_length_, tomwalters@287: ssi_width_samples_, cycles); tomwalters@287: ssi_width_cycles_ = cycles; tomwalters@287: } tomwalters@411: for (int i = 0; i < input.channel_count(); ++i) { tomwalters@411: output_.set_centre_frequency(i, input.centre_frequency(i)); tomwalters@411: } tomwalters@411: tom@420: h_.resize(ssi_width_samples_, 0.0); tom@420: float gamma_min = -1.0f; tom@420: float gamma_max = log2(ssi_width_cycles_); tom@420: for (int i = 0; i < ssi_width_samples_; ++i) { tom@420: if (log_cycles_axis_) { tom@420: float gamma = gamma_min + (gamma_max - gamma_min) tom@420: * static_cast(i) tom@420: / static_cast(ssi_width_samples_); tom@420: h_[i]= pow(2.0f, gamma); tom@420: } else { tom@420: h_[i] = static_cast(i) * ssi_width_cycles_ tom@420: / static_cast(ssi_width_samples_); tom@420: } tom@420: } tom@420: tomwalters@287: output_.Initialize(channel_count_, ssi_width_samples_, sample_rate_); tomwalters@284: return true; tomwalters@284: } tomwalters@284: tomwalters@284: void ModuleSSI::ResetInternal() { tomwalters@284: } tomwalters@284: tomwalters@305: int ModuleSSI::ExtractPitchIndex(const SignalBank &input) const { tomwalters@305: // Generate temporal profile of the SAI tomwalters@305: vector sai_temporal_profile(buffer_length_, 0.0f); tomwalters@305: for (int i = 0; i < buffer_length_; ++i) { tomwalters@305: float val = 0.0f; tomwalters@305: for (int ch = 0; ch < channel_count_; ++ch) { tomwalters@305: val += input.sample(ch, i); tomwalters@305: } tomwalters@305: sai_temporal_profile[i] = val; tomwalters@305: } tomwalters@305: tomwalters@305: // Find pitch value tomwalters@305: int start_sample = floor(pitch_search_start_ms_ * sample_rate_ / 1000.0f); tomwalters@305: int max_idx = 0; tomwalters@305: float max_val = 0.0f; tomwalters@305: for (int i = start_sample; i < buffer_length_; ++i) { tomwalters@305: if (sai_temporal_profile[i] > max_val) { tomwalters@305: max_idx = i; tomwalters@305: max_val = sai_temporal_profile[i]; tomwalters@305: } tomwalters@305: } tomwalters@305: return max_idx; tomwalters@305: } tomwalters@305: tomwalters@284: void ModuleSSI::Process(const SignalBank &input) { tomwalters@284: // Check to see if the module has been initialized. If not, processing tomwalters@284: // should not continue. tomwalters@284: if (!initialized_) { tomwalters@285: LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str()); tomwalters@284: return; tomwalters@284: } tomwalters@284: tomwalters@284: // Check that ths input this time is the same as the input passed to tomwalters@284: // Initialize() tomwalters@284: if (buffer_length_ != input.buffer_length() tomwalters@284: || channel_count_ != input.channel_count()) { tomwalters@284: LOG_ERROR(_T("Mismatch between input to Initialize() and input to " tomwalters@285: "Process() in module %s."), module_identifier_.c_str()); tomwalters@284: return; tomwalters@284: } tomwalters@284: tomwalters@287: output_.set_start_time(input.start_time()); tomwalters@284: tomwalters@305: int pitch_index = buffer_length_ - 1; tomwalters@305: if (do_pitch_cutoff_) { tomwalters@305: pitch_index = ExtractPitchIndex(input); tomwalters@305: } tomwalters@305: tomwalters@287: for (int ch = 0; ch < channel_count_; ++ch) { tomwalters@305: float centre_frequency = input.centre_frequency(ch); tomwalters@411: float cycle_samples = sample_rate_ / centre_frequency; tomwalters@397: tomwalters@397: float channel_weight = 1.0f; tomwalters@397: int cutoff_index = buffer_length_ - 1; tomwalters@397: if (do_pitch_cutoff_) { tomwalters@397: if (pitch_index < cutoff_index) { tomwalters@397: if (weight_by_cutoff_) { tomwalters@397: channel_weight = static_cast(buffer_length_) tomwalters@411: / static_cast(pitch_index); tomwalters@397: } tomwalters@397: cutoff_index = pitch_index; tomwalters@397: } tomwalters@397: } tomwalters@397: tomwalters@397: // tanh(3) is about 0.995. Seems reasonable. tomwalters@411: float smooth_pitch_constant = 3.0f / smooth_offset_cycles_; tomwalters@397: float pitch_h = 0.0f; tomwalters@397: if (do_smooth_offset_) { tomwalters@411: pitch_h = static_cast(pitch_index) / cycle_samples; tomwalters@397: } tomwalters@397: tomwalters@411: // Copy the buffer from input to output, addressing by h-value. tomwalters@287: for (int i = 0; i < ssi_width_samples_; ++i) { tom@420: tomwalters@287: // The index into the input array is a floating-point number, which is tomwalters@287: // split into a whole part and a fractional part. The whole part and tomwalters@287: // fractional part are found, and are used to linearly interpolate tomwalters@287: // between input samples to yield an output sample. tomwalters@287: double whole_part; tom@420: float frac_part = modf(h_[i] * cycle_samples, &whole_part); tomwalters@305: int sample = floor(whole_part); tomwalters@305: tomwalters@397: float weight = channel_weight; tomwalters@397: tomwalters@397: if (do_smooth_offset_ && do_pitch_cutoff_) { tomwalters@397: // Smoothing around the pitch cutoff line. tom@420: float pitch_weight = (1.0f + tanh((pitch_h - h_[i]) tomwalters@411: * smooth_pitch_constant)) / 2.0f; tomwalters@411: weight *= pitch_weight; tomwalters@411: //LOG_INFO("Channel %d, Sample %d. Pitch weight: %f", ch, i, pitch_weight); tomwalters@305: } tomwalters@305: tomwalters@305: if (weight_by_scaling_) { tomwalters@305: if (centre_frequency > pivot_cf_) { tomwalters@305: weight *= (centre_frequency / pivot_cf_); tomwalters@305: } tomwalters@305: } tomwalters@287: tomwalters@287: float val; tomwalters@397: if (sample < cutoff_index || do_smooth_offset_) { tomwalters@287: float curr_sample = input.sample(ch, sample); tomwalters@287: float next_sample = input.sample(ch, sample + 1); tomwalters@305: val = weight * (curr_sample tomwalters@305: + frac_part * (next_sample - curr_sample)); tomwalters@287: } else { tomwalters@309: val = 0.0f; tomwalters@287: } tomwalters@287: output_.set_sample(ch, i, val); tomwalters@287: } tomwalters@287: } tomwalters@284: PushOutput(); tomwalters@284: } tomwalters@284: } // namespace aimc tomwalters@284: