annotate trunk/src/Modules/SSI/ModuleSSI.cc @ 706:f8e90b5d85fd tip

Delete CARFAC code from this repository. It has been moved to https://github.com/google/carfac Please email me with your github username to get access. I've also created a new mailing list to discuss CARFAC development: https://groups.google.com/forum/#!forum/carfac-dev
author ronw@google.com
date Thu, 18 Jul 2013 20:56:51 +0000
parents 733a11a65f3d
children
rev   line source
tomwalters@284 1 // Copyright 2010, Thomas Walters
tomwalters@284 2 //
tomwalters@284 3 // AIM-C: A C++ implementation of the Auditory Image Model
tomwalters@284 4 // http://www.acousticscale.org/AIMC
tomwalters@284 5 //
tomwalters@318 6 // Licensed under the Apache License, Version 2.0 (the "License");
tomwalters@318 7 // you may not use this file except in compliance with the License.
tomwalters@318 8 // You may obtain a copy of the License at
tomwalters@284 9 //
tomwalters@318 10 // http://www.apache.org/licenses/LICENSE-2.0
tomwalters@284 11 //
tomwalters@318 12 // Unless required by applicable law or agreed to in writing, software
tomwalters@318 13 // distributed under the License is distributed on an "AS IS" BASIS,
tomwalters@318 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
tomwalters@318 15 // See the License for the specific language governing permissions and
tomwalters@318 16 // limitations under the License.
tomwalters@284 17
tomwalters@284 18 /*!
tomwalters@284 19 * \author Thomas Walters <tom@acousticscale.org>
tomwalters@284 20 * \date created 2010/02/19
tomwalters@284 21 * \version \$Id$
tomwalters@284 22 */
tomwalters@284 23
tomwalters@287 24 #include <cmath>
tomwalters@287 25
tomwalters@284 26 #include "Modules/SSI/ModuleSSI.h"
tomwalters@284 27
tomwalters@284 28 namespace aimc {
tomwalters@330 29 #ifdef _MSC_VER
tomwalters@330 30 // MSVC doesn't define log2()
tomwalters@330 31 float log2(float n) {
tomwalters@330 32 return log(n) / log(2.0);
tomwalters@330 33 }
tomwalters@330 34 #endif
tomwalters@330 35
tomwalters@284 36 ModuleSSI::ModuleSSI(Parameters *params) : Module(params) {
tomwalters@284 37 module_description_ = "Size-shape image (aka the 'sscAI')";
tomwalters@284 38 module_identifier_ = "ssi";
tomwalters@284 39 module_type_ = "ssi";
tomwalters@284 40 module_version_ = "$Id$";
tomwalters@284 41
tomwalters@305 42 // Cut off the SSI at the end of the first cycle
tomwalters@305 43 do_pitch_cutoff_ = parameters_->DefaultBool("ssi.pitch_cutoff", false);
tomwalters@305 44
tomwalters@305 45 // Weight the values in each channel more strongly if the channel was
tomwalters@305 46 // truncated due to the pitch cutoff. This ensures that the same amount of
tomwalters@305 47 // energy remains in the SSI spectral profile
tomwalters@305 48 weight_by_cutoff_ = parameters_->DefaultBool("ssi.weight_by_cutoff", false);
tomwalters@305 49
tomwalters@305 50 // Weight the values in each channel more strongly if the channel was
tomwalters@305 51 // scaled such that the end goes off the edge of the computed SSI.
tomwalters@305 52 // Again, this ensures that the overall energy of the spectral profile
tomwalters@305 53 // remains the same.
tomwalters@305 54 weight_by_scaling_ = parameters_->DefaultBool("ssi.weight_by_scaling",
tomwalters@305 55 false);
tomwalters@305 56
tomwalters@305 57 // Time from the zero-lag line of the SAI from which to start searching
tomwalters@305 58 // for a maximum in the input SAI's temporal profile.
tomwalters@305 59 pitch_search_start_ms_ = parameters_->DefaultFloat(
tomwalters@411 60 "ssi.pitch_search_start_ms", 2.0f);
tomwalters@305 61
tomwalters@305 62 // Total width in cycles of the whole SSI
tomwalters@305 63 ssi_width_cycles_ = parameters_->DefaultFloat("ssi.width_cycles", 10.0f);
tomwalters@305 64
tomwalters@305 65 // Set to true to make the cycles axis logarithmic (ie indexing by gamma
tomwalters@305 66 // rather than by cycles)
tomwalters@305 67 log_cycles_axis_ = parameters_->DefaultBool("ssi.log_cycles_axis", true);
tomwalters@305 68
tomwalters@305 69 // The centre frequency of the channel which will just fill the complete
tomwalters@305 70 // width of the SSI buffer
tomwalters@305 71 pivot_cf_ = parameters_->DefaultFloat("ssi.pivot_cf", 1000.0f);
tomwalters@397 72
tomwalters@397 73 // Whether or not to do smooth offset when the pitch cutoff is active.
tomwalters@397 74 do_smooth_offset_ = parameters_->DefaultBool("ssi.do_smooth_offset", false);
tomwalters@397 75
tomwalters@397 76 // The number of cycles, centered on the pitch line, over which the SSI is taken
tomwalters@397 77 // to zero when doing the pitch cutoff.
tomwalters@397 78 smooth_offset_cycles_ = parameters_->DefaultFloat("ssi.smooth_offset_cycles", 3.0f);
tomwalters@284 79 }
tomwalters@284 80
tomwalters@284 81 ModuleSSI::~ModuleSSI() {
tomwalters@284 82 }
tomwalters@284 83
tomwalters@284 84 bool ModuleSSI::InitializeInternal(const SignalBank &input) {
tomwalters@284 85 // Copy the parameters of the input signal bank into internal variables, so
tomwalters@284 86 // that they can be checked later.
tomwalters@284 87 sample_rate_ = input.sample_rate();
tomwalters@284 88 buffer_length_ = input.buffer_length();
tomwalters@284 89 channel_count_ = input.channel_count();
tomwalters@284 90
tomwalters@305 91 ssi_width_samples_ = sample_rate_ * ssi_width_cycles_ / pivot_cf_;
tomwalters@287 92 if (ssi_width_samples_ > buffer_length_) {
tomwalters@287 93 ssi_width_samples_ = buffer_length_;
tomwalters@305 94 float cycles = ssi_width_samples_ * pivot_cf_ / sample_rate_;
tomwalters@287 95 LOG_INFO(_T("Requested SSI width of %f cycles is too long for the "
tomwalters@287 96 "input buffer length of %d samples. The SSI will be "
tomwalters@287 97 "truncated at %d samples wide. This corresponds to a width "
tomwalters@287 98 "of %f cycles."), ssi_width_cycles_, buffer_length_,
tomwalters@287 99 ssi_width_samples_, cycles);
tomwalters@287 100 ssi_width_cycles_ = cycles;
tomwalters@287 101 }
tomwalters@411 102 for (int i = 0; i < input.channel_count(); ++i) {
tomwalters@411 103 output_.set_centre_frequency(i, input.centre_frequency(i));
tomwalters@411 104 }
tomwalters@411 105
tom@420 106 h_.resize(ssi_width_samples_, 0.0);
tom@420 107 float gamma_min = -1.0f;
tom@420 108 float gamma_max = log2(ssi_width_cycles_);
tom@420 109 for (int i = 0; i < ssi_width_samples_; ++i) {
tom@420 110 if (log_cycles_axis_) {
tom@420 111 float gamma = gamma_min + (gamma_max - gamma_min)
tom@420 112 * static_cast<float>(i)
tom@420 113 / static_cast<float>(ssi_width_samples_);
tom@420 114 h_[i]= pow(2.0f, gamma);
tom@420 115 } else {
tom@420 116 h_[i] = static_cast<float>(i) * ssi_width_cycles_
tom@420 117 / static_cast<float>(ssi_width_samples_);
tom@420 118 }
tom@420 119 }
tom@420 120
tomwalters@287 121 output_.Initialize(channel_count_, ssi_width_samples_, sample_rate_);
tomwalters@284 122 return true;
tomwalters@284 123 }
tomwalters@284 124
tomwalters@284 125 void ModuleSSI::ResetInternal() {
tomwalters@284 126 }
tomwalters@284 127
tomwalters@305 128 int ModuleSSI::ExtractPitchIndex(const SignalBank &input) const {
tomwalters@305 129 // Generate temporal profile of the SAI
tomwalters@305 130 vector<float> sai_temporal_profile(buffer_length_, 0.0f);
tomwalters@305 131 for (int i = 0; i < buffer_length_; ++i) {
tomwalters@305 132 float val = 0.0f;
tomwalters@305 133 for (int ch = 0; ch < channel_count_; ++ch) {
tomwalters@305 134 val += input.sample(ch, i);
tomwalters@305 135 }
tomwalters@305 136 sai_temporal_profile[i] = val;
tomwalters@305 137 }
tomwalters@305 138
tomwalters@305 139 // Find pitch value
tomwalters@305 140 int start_sample = floor(pitch_search_start_ms_ * sample_rate_ / 1000.0f);
tomwalters@305 141 int max_idx = 0;
tomwalters@305 142 float max_val = 0.0f;
tomwalters@305 143 for (int i = start_sample; i < buffer_length_; ++i) {
tomwalters@305 144 if (sai_temporal_profile[i] > max_val) {
tomwalters@305 145 max_idx = i;
tomwalters@305 146 max_val = sai_temporal_profile[i];
tomwalters@305 147 }
tomwalters@305 148 }
tomwalters@305 149 return max_idx;
tomwalters@305 150 }
tomwalters@305 151
tomwalters@284 152 void ModuleSSI::Process(const SignalBank &input) {
tomwalters@284 153 // Check to see if the module has been initialized. If not, processing
tomwalters@284 154 // should not continue.
tomwalters@284 155 if (!initialized_) {
tomwalters@285 156 LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str());
tomwalters@284 157 return;
tomwalters@284 158 }
tomwalters@284 159
tomwalters@284 160 // Check that ths input this time is the same as the input passed to
tomwalters@284 161 // Initialize()
tomwalters@284 162 if (buffer_length_ != input.buffer_length()
tomwalters@284 163 || channel_count_ != input.channel_count()) {
tomwalters@284 164 LOG_ERROR(_T("Mismatch between input to Initialize() and input to "
tomwalters@285 165 "Process() in module %s."), module_identifier_.c_str());
tomwalters@284 166 return;
tomwalters@284 167 }
tomwalters@284 168
tomwalters@287 169 output_.set_start_time(input.start_time());
tomwalters@284 170
tomwalters@305 171 int pitch_index = buffer_length_ - 1;
tomwalters@305 172 if (do_pitch_cutoff_) {
tomwalters@305 173 pitch_index = ExtractPitchIndex(input);
tomwalters@305 174 }
tomwalters@305 175
tomwalters@287 176 for (int ch = 0; ch < channel_count_; ++ch) {
tomwalters@305 177 float centre_frequency = input.centre_frequency(ch);
tomwalters@411 178 float cycle_samples = sample_rate_ / centre_frequency;
tomwalters@397 179
tomwalters@397 180 float channel_weight = 1.0f;
tomwalters@397 181 int cutoff_index = buffer_length_ - 1;
tomwalters@397 182 if (do_pitch_cutoff_) {
tomwalters@397 183 if (pitch_index < cutoff_index) {
tomwalters@397 184 if (weight_by_cutoff_) {
tomwalters@397 185 channel_weight = static_cast<float>(buffer_length_)
tomwalters@411 186 / static_cast<float>(pitch_index);
tomwalters@397 187 }
tomwalters@397 188 cutoff_index = pitch_index;
tomwalters@397 189 }
tomwalters@397 190 }
tomwalters@397 191
tomwalters@397 192 // tanh(3) is about 0.995. Seems reasonable.
tomwalters@411 193 float smooth_pitch_constant = 3.0f / smooth_offset_cycles_;
tomwalters@397 194 float pitch_h = 0.0f;
tomwalters@397 195 if (do_smooth_offset_) {
tomwalters@411 196 pitch_h = static_cast<float>(pitch_index) / cycle_samples;
tomwalters@397 197 }
tomwalters@397 198
tomwalters@411 199 // Copy the buffer from input to output, addressing by h-value.
tomwalters@287 200 for (int i = 0; i < ssi_width_samples_; ++i) {
tom@420 201
tomwalters@287 202 // The index into the input array is a floating-point number, which is
tomwalters@287 203 // split into a whole part and a fractional part. The whole part and
tomwalters@287 204 // fractional part are found, and are used to linearly interpolate
tomwalters@287 205 // between input samples to yield an output sample.
tomwalters@287 206 double whole_part;
tom@420 207 float frac_part = modf(h_[i] * cycle_samples, &whole_part);
tomwalters@305 208 int sample = floor(whole_part);
tomwalters@305 209
tomwalters@397 210 float weight = channel_weight;
tomwalters@397 211
tomwalters@397 212 if (do_smooth_offset_ && do_pitch_cutoff_) {
tomwalters@397 213 // Smoothing around the pitch cutoff line.
tom@420 214 float pitch_weight = (1.0f + tanh((pitch_h - h_[i])
tomwalters@411 215 * smooth_pitch_constant)) / 2.0f;
tomwalters@411 216 weight *= pitch_weight;
tomwalters@411 217 //LOG_INFO("Channel %d, Sample %d. Pitch weight: %f", ch, i, pitch_weight);
tomwalters@305 218 }
tomwalters@305 219
tomwalters@305 220 if (weight_by_scaling_) {
tomwalters@305 221 if (centre_frequency > pivot_cf_) {
tomwalters@305 222 weight *= (centre_frequency / pivot_cf_);
tomwalters@305 223 }
tomwalters@305 224 }
tomwalters@287 225
tomwalters@287 226 float val;
tomwalters@397 227 if (sample < cutoff_index || do_smooth_offset_) {
tomwalters@287 228 float curr_sample = input.sample(ch, sample);
tomwalters@287 229 float next_sample = input.sample(ch, sample + 1);
tomwalters@305 230 val = weight * (curr_sample
tomwalters@305 231 + frac_part * (next_sample - curr_sample));
tomwalters@287 232 } else {
tomwalters@309 233 val = 0.0f;
tomwalters@287 234 }
tomwalters@287 235 output_.set_sample(ch, i, val);
tomwalters@287 236 }
tomwalters@287 237 }
tomwalters@284 238 PushOutput();
tomwalters@284 239 }
tomwalters@284 240 } // namespace aimc
tomwalters@284 241