annotate src/Modules/SSI/ModuleSSI.cc @ 611:0fbaf443ec82

Carfac C++ revision 3, indluding more style improvements. The output structs are now classes again, and have separate storage methods for each output structure along with flags in the Run and RunSegment methods to allow for only storing NAPs if desired.
author alexbrandmeyer
date Fri, 17 May 2013 19:52:45 +0000
parents 0a3342606855
children
rev   line source
tomwalters@12 1 // Copyright 2010, Thomas Walters
tomwalters@12 2 //
tomwalters@12 3 // AIM-C: A C++ implementation of the Auditory Image Model
tomwalters@12 4 // http://www.acousticscale.org/AIMC
tomwalters@12 5 //
tomwalters@45 6 // Licensed under the Apache License, Version 2.0 (the "License");
tomwalters@45 7 // you may not use this file except in compliance with the License.
tomwalters@45 8 // You may obtain a copy of the License at
tomwalters@12 9 //
tomwalters@45 10 // http://www.apache.org/licenses/LICENSE-2.0
tomwalters@12 11 //
tomwalters@45 12 // Unless required by applicable law or agreed to in writing, software
tomwalters@45 13 // distributed under the License is distributed on an "AS IS" BASIS,
tomwalters@45 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
tomwalters@45 15 // See the License for the specific language governing permissions and
tomwalters@45 16 // limitations under the License.
tomwalters@12 17
tomwalters@12 18 /*!
tomwalters@12 19 * \author Thomas Walters <tom@acousticscale.org>
tomwalters@12 20 * \date created 2010/02/19
tomwalters@12 21 * \version \$Id$
tomwalters@12 22 */
tomwalters@12 23
tomwalters@15 24 #include <cmath>
tomwalters@15 25
tomwalters@12 26 #include "Modules/SSI/ModuleSSI.h"
tomwalters@12 27
tomwalters@12 28 namespace aimc {
tomwalters@163 29 #ifdef _MSC_VER
tomwalters@163 30 // MSVC doesn't define log2()
tomwalters@163 31 float log2(float n) {
tomwalters@163 32 return log(n) / log(2.0);
tomwalters@163 33 }
tomwalters@163 34 #endif
tomwalters@163 35
tomwalters@12 36 ModuleSSI::ModuleSSI(Parameters *params) : Module(params) {
tomwalters@12 37 module_description_ = "Size-shape image (aka the 'sscAI')";
tomwalters@12 38 module_identifier_ = "ssi";
tomwalters@12 39 module_type_ = "ssi";
tomwalters@12 40 module_version_ = "$Id$";
tomwalters@12 41
tomwalters@32 42 // Cut off the SSI at the end of the first cycle
tomwalters@32 43 do_pitch_cutoff_ = parameters_->DefaultBool("ssi.pitch_cutoff", false);
tomwalters@32 44
tomwalters@32 45 // Weight the values in each channel more strongly if the channel was
tomwalters@32 46 // truncated due to the pitch cutoff. This ensures that the same amount of
tomwalters@32 47 // energy remains in the SSI spectral profile
tomwalters@32 48 weight_by_cutoff_ = parameters_->DefaultBool("ssi.weight_by_cutoff", false);
tomwalters@32 49
tomwalters@32 50 // Weight the values in each channel more strongly if the channel was
tomwalters@32 51 // scaled such that the end goes off the edge of the computed SSI.
tomwalters@32 52 // Again, this ensures that the overall energy of the spectral profile
tomwalters@32 53 // remains the same.
tomwalters@32 54 weight_by_scaling_ = parameters_->DefaultBool("ssi.weight_by_scaling",
tomwalters@32 55 false);
tomwalters@32 56
tomwalters@32 57 // Time from the zero-lag line of the SAI from which to start searching
tomwalters@32 58 // for a maximum in the input SAI's temporal profile.
tomwalters@32 59 pitch_search_start_ms_ = parameters_->DefaultFloat(
tomwalters@237 60 "ssi.pitch_search_start_ms", 2.0f);
tomwalters@32 61
tomwalters@32 62 // Total width in cycles of the whole SSI
tomwalters@32 63 ssi_width_cycles_ = parameters_->DefaultFloat("ssi.width_cycles", 10.0f);
tomwalters@32 64
tomwalters@32 65 // Set to true to make the cycles axis logarithmic (ie indexing by gamma
tomwalters@32 66 // rather than by cycles)
tomwalters@32 67 log_cycles_axis_ = parameters_->DefaultBool("ssi.log_cycles_axis", true);
tomwalters@32 68
tomwalters@32 69 // The centre frequency of the channel which will just fill the complete
tomwalters@32 70 // width of the SSI buffer
tomwalters@32 71 pivot_cf_ = parameters_->DefaultFloat("ssi.pivot_cf", 1000.0f);
tomwalters@227 72
tomwalters@227 73 // Whether or not to do smooth offset when the pitch cutoff is active.
tomwalters@227 74 do_smooth_offset_ = parameters_->DefaultBool("ssi.do_smooth_offset", false);
tomwalters@227 75
tomwalters@227 76 // The number of cycles, centered on the pitch line, over which the SSI is taken
tomwalters@227 77 // to zero when doing the pitch cutoff.
tomwalters@227 78 smooth_offset_cycles_ = parameters_->DefaultFloat("ssi.smooth_offset_cycles", 3.0f);
tomwalters@12 79 }
tomwalters@12 80
tomwalters@12 81 ModuleSSI::~ModuleSSI() {
tomwalters@12 82 }
tomwalters@12 83
tomwalters@12 84 bool ModuleSSI::InitializeInternal(const SignalBank &input) {
tomwalters@12 85 // Copy the parameters of the input signal bank into internal variables, so
tomwalters@12 86 // that they can be checked later.
tomwalters@12 87 sample_rate_ = input.sample_rate();
tomwalters@12 88 buffer_length_ = input.buffer_length();
tomwalters@12 89 channel_count_ = input.channel_count();
tomwalters@12 90
tomwalters@32 91 ssi_width_samples_ = sample_rate_ * ssi_width_cycles_ / pivot_cf_;
tomwalters@15 92 if (ssi_width_samples_ > buffer_length_) {
tomwalters@15 93 ssi_width_samples_ = buffer_length_;
tomwalters@32 94 float cycles = ssi_width_samples_ * pivot_cf_ / sample_rate_;
tomwalters@15 95 LOG_INFO(_T("Requested SSI width of %f cycles is too long for the "
tomwalters@15 96 "input buffer length of %d samples. The SSI will be "
tomwalters@15 97 "truncated at %d samples wide. This corresponds to a width "
tomwalters@15 98 "of %f cycles."), ssi_width_cycles_, buffer_length_,
tomwalters@15 99 ssi_width_samples_, cycles);
tomwalters@15 100 ssi_width_cycles_ = cycles;
tomwalters@15 101 }
tomwalters@237 102 for (int i = 0; i < input.channel_count(); ++i) {
tomwalters@237 103 output_.set_centre_frequency(i, input.centre_frequency(i));
tomwalters@237 104 }
tomwalters@237 105
tom@246 106 h_.resize(ssi_width_samples_, 0.0);
tom@246 107 float gamma_min = -1.0f;
tom@246 108 float gamma_max = log2(ssi_width_cycles_);
tom@246 109 for (int i = 0; i < ssi_width_samples_; ++i) {
tom@246 110 if (log_cycles_axis_) {
tom@246 111 float gamma = gamma_min + (gamma_max - gamma_min)
tom@246 112 * static_cast<float>(i)
tom@246 113 / static_cast<float>(ssi_width_samples_);
tom@246 114 h_[i]= pow(2.0f, gamma);
tom@246 115 } else {
tom@246 116 h_[i] = static_cast<float>(i) * ssi_width_cycles_
tom@246 117 / static_cast<float>(ssi_width_samples_);
tom@246 118 }
tom@246 119 }
tom@246 120
tomwalters@15 121 output_.Initialize(channel_count_, ssi_width_samples_, sample_rate_);
tomwalters@12 122 return true;
tomwalters@12 123 }
tomwalters@12 124
tomwalters@12 125 void ModuleSSI::ResetInternal() {
tomwalters@12 126 }
tomwalters@12 127
tomwalters@32 128 int ModuleSSI::ExtractPitchIndex(const SignalBank &input) const {
tomwalters@32 129 // Generate temporal profile of the SAI
tomwalters@32 130 vector<float> sai_temporal_profile(buffer_length_, 0.0f);
tomwalters@32 131 for (int i = 0; i < buffer_length_; ++i) {
tomwalters@32 132 float val = 0.0f;
tomwalters@32 133 for (int ch = 0; ch < channel_count_; ++ch) {
tomwalters@32 134 val += input.sample(ch, i);
tomwalters@32 135 }
tomwalters@32 136 sai_temporal_profile[i] = val;
tomwalters@32 137 }
tomwalters@32 138
tomwalters@32 139 // Find pitch value
tomwalters@32 140 int start_sample = floor(pitch_search_start_ms_ * sample_rate_ / 1000.0f);
tomwalters@32 141 int max_idx = 0;
tomwalters@32 142 float max_val = 0.0f;
tomwalters@32 143 for (int i = start_sample; i < buffer_length_; ++i) {
tomwalters@32 144 if (sai_temporal_profile[i] > max_val) {
tomwalters@32 145 max_idx = i;
tomwalters@32 146 max_val = sai_temporal_profile[i];
tomwalters@32 147 }
tomwalters@32 148 }
tomwalters@32 149 return max_idx;
tomwalters@32 150 }
tomwalters@32 151
tomwalters@12 152 void ModuleSSI::Process(const SignalBank &input) {
tomwalters@12 153 // Check to see if the module has been initialized. If not, processing
tomwalters@12 154 // should not continue.
tomwalters@12 155 if (!initialized_) {
tomwalters@13 156 LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str());
tomwalters@12 157 return;
tomwalters@12 158 }
tomwalters@12 159
tomwalters@12 160 // Check that ths input this time is the same as the input passed to
tomwalters@12 161 // Initialize()
tomwalters@12 162 if (buffer_length_ != input.buffer_length()
tomwalters@12 163 || channel_count_ != input.channel_count()) {
tomwalters@12 164 LOG_ERROR(_T("Mismatch between input to Initialize() and input to "
tomwalters@13 165 "Process() in module %s."), module_identifier_.c_str());
tomwalters@12 166 return;
tomwalters@12 167 }
tomwalters@12 168
tomwalters@15 169 output_.set_start_time(input.start_time());
tomwalters@12 170
tomwalters@32 171 int pitch_index = buffer_length_ - 1;
tomwalters@32 172 if (do_pitch_cutoff_) {
tomwalters@32 173 pitch_index = ExtractPitchIndex(input);
tomwalters@32 174 }
tomwalters@32 175
tomwalters@15 176 for (int ch = 0; ch < channel_count_; ++ch) {
tomwalters@32 177 float centre_frequency = input.centre_frequency(ch);
tomwalters@237 178 float cycle_samples = sample_rate_ / centre_frequency;
tomwalters@227 179
tomwalters@227 180 float channel_weight = 1.0f;
tomwalters@227 181 int cutoff_index = buffer_length_ - 1;
tomwalters@227 182 if (do_pitch_cutoff_) {
tomwalters@227 183 if (pitch_index < cutoff_index) {
tomwalters@227 184 if (weight_by_cutoff_) {
tomwalters@227 185 channel_weight = static_cast<float>(buffer_length_)
tomwalters@237 186 / static_cast<float>(pitch_index);
tomwalters@227 187 }
tomwalters@227 188 cutoff_index = pitch_index;
tomwalters@227 189 }
tomwalters@227 190 }
tomwalters@227 191
tomwalters@227 192 // tanh(3) is about 0.995. Seems reasonable.
tomwalters@237 193 float smooth_pitch_constant = 3.0f / smooth_offset_cycles_;
tomwalters@227 194 float pitch_h = 0.0f;
tomwalters@227 195 if (do_smooth_offset_) {
tomwalters@237 196 pitch_h = static_cast<float>(pitch_index) / cycle_samples;
tomwalters@227 197 }
tomwalters@227 198
tomwalters@237 199 // Copy the buffer from input to output, addressing by h-value.
tomwalters@162 200 for (int i = 0; i < ssi_width_samples_; ++i) {
tom@246 201
tomwalters@15 202 // The index into the input array is a floating-point number, which is
tomwalters@15 203 // split into a whole part and a fractional part. The whole part and
tomwalters@15 204 // fractional part are found, and are used to linearly interpolate
tomwalters@15 205 // between input samples to yield an output sample.
tomwalters@15 206 double whole_part;
tom@246 207 float frac_part = modf(h_[i] * cycle_samples, &whole_part);
tomwalters@32 208 int sample = floor(whole_part);
tomwalters@32 209
tomwalters@227 210 float weight = channel_weight;
tomwalters@227 211
tomwalters@227 212 if (do_smooth_offset_ && do_pitch_cutoff_) {
tomwalters@227 213 // Smoothing around the pitch cutoff line.
tom@246 214 float pitch_weight = (1.0f + tanh((pitch_h - h_[i])
tomwalters@237 215 * smooth_pitch_constant)) / 2.0f;
tomwalters@237 216 weight *= pitch_weight;
tomwalters@237 217 //LOG_INFO("Channel %d, Sample %d. Pitch weight: %f", ch, i, pitch_weight);
tomwalters@32 218 }
tomwalters@32 219
tomwalters@32 220 if (weight_by_scaling_) {
tomwalters@32 221 if (centre_frequency > pivot_cf_) {
tomwalters@32 222 weight *= (centre_frequency / pivot_cf_);
tomwalters@32 223 }
tomwalters@32 224 }
tomwalters@15 225
tomwalters@15 226 float val;
tomwalters@227 227 if (sample < cutoff_index || do_smooth_offset_) {
tomwalters@15 228 float curr_sample = input.sample(ch, sample);
tomwalters@15 229 float next_sample = input.sample(ch, sample + 1);
tomwalters@32 230 val = weight * (curr_sample
tomwalters@32 231 + frac_part * (next_sample - curr_sample));
tomwalters@15 232 } else {
tomwalters@36 233 val = 0.0f;
tomwalters@15 234 }
tomwalters@15 235 output_.set_sample(ch, i, val);
tomwalters@15 236 }
tomwalters@15 237 }
tomwalters@12 238 PushOutput();
tomwalters@12 239 }
tomwalters@12 240 } // namespace aimc
tomwalters@12 241