annotate src/Modules/SSI/ModuleSSI.cc @ 32:9122efd2b227

-New AIMCopy main for the SSI features (temporary hack till I get a working module load system) -LocalMax strobe criterion. This is faster and better than the parabola version, which still seems buggy. -Noise generator module. Adds noise to a signal. Uses boost for the random number generator. -New options for the SSI -Slice now respects all its flags (oops!). -MATLAB functions for visualisation -Scripts for generating data to view in MATLAB -Script to download and build HTK - useful for running experiments
author tomwalters
date Thu, 25 Feb 2010 22:02:00 +0000
parents f4e712d41321
children 74196ff1cb98
rev   line source
tomwalters@12 1 // Copyright 2010, Thomas Walters
tomwalters@12 2 //
tomwalters@12 3 // AIM-C: A C++ implementation of the Auditory Image Model
tomwalters@12 4 // http://www.acousticscale.org/AIMC
tomwalters@12 5 //
tomwalters@12 6 // This program is free software: you can redistribute it and/or modify
tomwalters@12 7 // it under the terms of the GNU General Public License as published by
tomwalters@12 8 // the Free Software Foundation, either version 3 of the License, or
tomwalters@12 9 // (at your option) any later version.
tomwalters@12 10 //
tomwalters@12 11 // This program is distributed in the hope that it will be useful,
tomwalters@12 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
tomwalters@12 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
tomwalters@12 14 // GNU General Public License for more details.
tomwalters@12 15 //
tomwalters@12 16 // You should have received a copy of the GNU General Public License
tomwalters@12 17 // along with this program. If not, see <http://www.gnu.org/licenses/>.
tomwalters@12 18
tomwalters@12 19 /*!
tomwalters@12 20 * \author Thomas Walters <tom@acousticscale.org>
tomwalters@12 21 * \date created 2010/02/19
tomwalters@12 22 * \version \$Id$
tomwalters@12 23 */
tomwalters@12 24
tomwalters@15 25 #include <cmath>
tomwalters@15 26
tomwalters@12 27 #include "Modules/SSI/ModuleSSI.h"
tomwalters@12 28
tomwalters@12 29 namespace aimc {
tomwalters@12 30 ModuleSSI::ModuleSSI(Parameters *params) : Module(params) {
tomwalters@12 31 module_description_ = "Size-shape image (aka the 'sscAI')";
tomwalters@12 32 module_identifier_ = "ssi";
tomwalters@12 33 module_type_ = "ssi";
tomwalters@12 34 module_version_ = "$Id$";
tomwalters@12 35
tomwalters@32 36 // Cut off the SSI at the end of the first cycle
tomwalters@32 37 do_pitch_cutoff_ = parameters_->DefaultBool("ssi.pitch_cutoff", false);
tomwalters@32 38
tomwalters@32 39 // Weight the values in each channel more strongly if the channel was
tomwalters@32 40 // truncated due to the pitch cutoff. This ensures that the same amount of
tomwalters@32 41 // energy remains in the SSI spectral profile
tomwalters@32 42 weight_by_cutoff_ = parameters_->DefaultBool("ssi.weight_by_cutoff", false);
tomwalters@32 43
tomwalters@32 44 // Weight the values in each channel more strongly if the channel was
tomwalters@32 45 // scaled such that the end goes off the edge of the computed SSI.
tomwalters@32 46 // Again, this ensures that the overall energy of the spectral profile
tomwalters@32 47 // remains the same.
tomwalters@32 48 weight_by_scaling_ = parameters_->DefaultBool("ssi.weight_by_scaling",
tomwalters@32 49 false);
tomwalters@32 50
tomwalters@32 51 // Time from the zero-lag line of the SAI from which to start searching
tomwalters@32 52 // for a maximum in the input SAI's temporal profile.
tomwalters@32 53 pitch_search_start_ms_ = parameters_->DefaultFloat(
tomwalters@32 54 "ssi.pitch_search_start_ms", 2.0f);
tomwalters@32 55
tomwalters@32 56 // Total width in cycles of the whole SSI
tomwalters@32 57 ssi_width_cycles_ = parameters_->DefaultFloat("ssi.width_cycles", 10.0f);
tomwalters@32 58
tomwalters@32 59 // Set to true to make the cycles axis logarithmic (ie indexing by gamma
tomwalters@32 60 // rather than by cycles)
tomwalters@32 61 log_cycles_axis_ = parameters_->DefaultBool("ssi.log_cycles_axis", true);
tomwalters@32 62
tomwalters@32 63 // The centre frequency of the channel which will just fill the complete
tomwalters@32 64 // width of the SSI buffer
tomwalters@32 65 pivot_cf_ = parameters_->DefaultFloat("ssi.pivot_cf", 1000.0f);
tomwalters@12 66 }
tomwalters@12 67
tomwalters@12 68 ModuleSSI::~ModuleSSI() {
tomwalters@12 69 }
tomwalters@12 70
tomwalters@12 71 bool ModuleSSI::InitializeInternal(const SignalBank &input) {
tomwalters@12 72 // Copy the parameters of the input signal bank into internal variables, so
tomwalters@12 73 // that they can be checked later.
tomwalters@12 74 sample_rate_ = input.sample_rate();
tomwalters@12 75 buffer_length_ = input.buffer_length();
tomwalters@12 76 channel_count_ = input.channel_count();
tomwalters@12 77
tomwalters@32 78 ssi_width_samples_ = sample_rate_ * ssi_width_cycles_ / pivot_cf_;
tomwalters@15 79 if (ssi_width_samples_ > buffer_length_) {
tomwalters@15 80 ssi_width_samples_ = buffer_length_;
tomwalters@32 81 float cycles = ssi_width_samples_ * pivot_cf_ / sample_rate_;
tomwalters@15 82 LOG_INFO(_T("Requested SSI width of %f cycles is too long for the "
tomwalters@15 83 "input buffer length of %d samples. The SSI will be "
tomwalters@15 84 "truncated at %d samples wide. This corresponds to a width "
tomwalters@15 85 "of %f cycles."), ssi_width_cycles_, buffer_length_,
tomwalters@15 86 ssi_width_samples_, cycles);
tomwalters@15 87 ssi_width_cycles_ = cycles;
tomwalters@15 88 }
tomwalters@15 89 output_.Initialize(channel_count_, ssi_width_samples_, sample_rate_);
tomwalters@12 90 return true;
tomwalters@12 91 }
tomwalters@12 92
tomwalters@12 93 void ModuleSSI::ResetInternal() {
tomwalters@12 94 }
tomwalters@12 95
tomwalters@32 96 int ModuleSSI::ExtractPitchIndex(const SignalBank &input) const {
tomwalters@32 97 // Generate temporal profile of the SAI
tomwalters@32 98 vector<float> sai_temporal_profile(buffer_length_, 0.0f);
tomwalters@32 99 for (int i = 0; i < buffer_length_; ++i) {
tomwalters@32 100 float val = 0.0f;
tomwalters@32 101 for (int ch = 0; ch < channel_count_; ++ch) {
tomwalters@32 102 val += input.sample(ch, i);
tomwalters@32 103 }
tomwalters@32 104 sai_temporal_profile[i] = val;
tomwalters@32 105 }
tomwalters@32 106
tomwalters@32 107 // Find pitch value
tomwalters@32 108 int start_sample = floor(pitch_search_start_ms_ * sample_rate_ / 1000.0f);
tomwalters@32 109 int max_idx = 0;
tomwalters@32 110 float max_val = 0.0f;
tomwalters@32 111 for (int i = start_sample; i < buffer_length_; ++i) {
tomwalters@32 112 if (sai_temporal_profile[i] > max_val) {
tomwalters@32 113 max_idx = i;
tomwalters@32 114 max_val = sai_temporal_profile[i];
tomwalters@32 115 }
tomwalters@32 116 }
tomwalters@32 117 return max_idx;
tomwalters@32 118 }
tomwalters@32 119
tomwalters@12 120 void ModuleSSI::Process(const SignalBank &input) {
tomwalters@12 121 // Check to see if the module has been initialized. If not, processing
tomwalters@12 122 // should not continue.
tomwalters@12 123 if (!initialized_) {
tomwalters@13 124 LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str());
tomwalters@12 125 return;
tomwalters@12 126 }
tomwalters@12 127
tomwalters@12 128 // Check that ths input this time is the same as the input passed to
tomwalters@12 129 // Initialize()
tomwalters@12 130 if (buffer_length_ != input.buffer_length()
tomwalters@12 131 || channel_count_ != input.channel_count()) {
tomwalters@12 132 LOG_ERROR(_T("Mismatch between input to Initialize() and input to "
tomwalters@13 133 "Process() in module %s."), module_identifier_.c_str());
tomwalters@12 134 return;
tomwalters@12 135 }
tomwalters@12 136
tomwalters@15 137 output_.set_start_time(input.start_time());
tomwalters@12 138
tomwalters@32 139 int pitch_index = buffer_length_ - 1;
tomwalters@32 140 if (do_pitch_cutoff_) {
tomwalters@32 141 pitch_index = ExtractPitchIndex(input);
tomwalters@32 142 }
tomwalters@32 143
tomwalters@15 144 for (int ch = 0; ch < channel_count_; ++ch) {
tomwalters@32 145 float centre_frequency = input.centre_frequency(ch);
tomwalters@15 146 // Copy the buffer from input to output, addressing by h-value
tomwalters@15 147 for (int i = 0; i < ssi_width_samples_; ++i) {
tomwalters@32 148 float h;
tomwalters@32 149 float cycle_samples = sample_rate_ / centre_frequency;
tomwalters@32 150 if (log_cycles_axis_) {
tomwalters@32 151 float gamma_min = -1.0f;
tomwalters@32 152 float gamma_max = log2(ssi_width_cycles_);
tomwalters@32 153 float gamma = gamma_min + (gamma_max - gamma_min)
tomwalters@32 154 * static_cast<float>(i)
tomwalters@32 155 / static_cast<float>(ssi_width_samples_);
tomwalters@32 156 h = pow(2.0f, gamma);
tomwalters@32 157 } else {
tomwalters@32 158 h = static_cast<float>(i) * ssi_width_cycles_
tomwalters@32 159 / static_cast<float>(ssi_width_samples_);
tomwalters@32 160 }
tomwalters@12 161
tomwalters@15 162 // The index into the input array is a floating-point number, which is
tomwalters@15 163 // split into a whole part and a fractional part. The whole part and
tomwalters@15 164 // fractional part are found, and are used to linearly interpolate
tomwalters@15 165 // between input samples to yield an output sample.
tomwalters@15 166 double whole_part;
tomwalters@15 167 float frac_part = modf(h * cycle_samples, &whole_part);
tomwalters@32 168 int sample = floor(whole_part);
tomwalters@32 169
tomwalters@32 170 float weight = 1.0f;
tomwalters@32 171
tomwalters@32 172 int cutoff_index = buffer_length_ - 1;
tomwalters@32 173 if (do_pitch_cutoff_) {
tomwalters@32 174 if (pitch_index < cutoff_index) {
tomwalters@32 175 if (weight_by_cutoff_) {
tomwalters@32 176 weight *= static_cast<float>(buffer_length_)
tomwalters@32 177 / static_cast<float>(pitch_index);
tomwalters@32 178 }
tomwalters@32 179 cutoff_index = pitch_index;
tomwalters@32 180 }
tomwalters@32 181 }
tomwalters@32 182
tomwalters@32 183 if (weight_by_scaling_) {
tomwalters@32 184 if (centre_frequency > pivot_cf_) {
tomwalters@32 185 weight *= (centre_frequency / pivot_cf_);
tomwalters@32 186 }
tomwalters@32 187 }
tomwalters@15 188
tomwalters@15 189 float val;
tomwalters@32 190 if (sample < cutoff_index) {
tomwalters@15 191 float curr_sample = input.sample(ch, sample);
tomwalters@15 192 float next_sample = input.sample(ch, sample + 1);
tomwalters@32 193 val = weight * (curr_sample
tomwalters@32 194 + frac_part * (next_sample - curr_sample));
tomwalters@15 195 } else {
tomwalters@32 196 // Set out-of-range values to a negative number to signify that
tomwalters@32 197 // they really don't exist, and shouldn't be used in feature
tomwalters@32 198 // calculations.
tomwalters@32 199 val = -1.0f;
tomwalters@15 200 }
tomwalters@15 201 output_.set_sample(ch, i, val);
tomwalters@15 202 }
tomwalters@15 203 }
tomwalters@12 204 PushOutput();
tomwalters@12 205 }
tomwalters@12 206 } // namespace aimc
tomwalters@12 207