tomwalters@284
|
1 // Copyright 2010, Thomas Walters
|
tomwalters@284
|
2 //
|
tomwalters@284
|
3 // AIM-C: A C++ implementation of the Auditory Image Model
|
tomwalters@284
|
4 // http://www.acousticscale.org/AIMC
|
tomwalters@284
|
5 //
|
tomwalters@318
|
6 // Licensed under the Apache License, Version 2.0 (the "License");
|
tomwalters@318
|
7 // you may not use this file except in compliance with the License.
|
tomwalters@318
|
8 // You may obtain a copy of the License at
|
tomwalters@284
|
9 //
|
tomwalters@318
|
10 // http://www.apache.org/licenses/LICENSE-2.0
|
tomwalters@284
|
11 //
|
tomwalters@318
|
12 // Unless required by applicable law or agreed to in writing, software
|
tomwalters@318
|
13 // distributed under the License is distributed on an "AS IS" BASIS,
|
tomwalters@318
|
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
tomwalters@318
|
15 // See the License for the specific language governing permissions and
|
tomwalters@318
|
16 // limitations under the License.
|
tomwalters@284
|
17
|
tomwalters@284
|
18 /*!
|
tomwalters@284
|
19 * \author Thomas Walters <tom@acousticscale.org>
|
tomwalters@284
|
20 * \date created 2010/02/19
|
tomwalters@284
|
21 * \version \$Id$
|
tomwalters@284
|
22 */
|
tomwalters@284
|
23
|
tomwalters@287
|
24 #include <cmath>
|
tomwalters@287
|
25
|
tomwalters@284
|
26 #include "Modules/SSI/ModuleSSI.h"
|
tomwalters@284
|
27
|
tomwalters@284
|
28 namespace aimc {
|
tomwalters@330
|
29 #ifdef _MSC_VER
|
tomwalters@330
|
30 // MSVC doesn't define log2()
|
tomwalters@330
|
31 float log2(float n) {
|
tomwalters@330
|
32 return log(n) / log(2.0);
|
tomwalters@330
|
33 }
|
tomwalters@330
|
34 #endif
|
tomwalters@330
|
35
|
tomwalters@284
|
36 ModuleSSI::ModuleSSI(Parameters *params) : Module(params) {
|
tomwalters@284
|
37 module_description_ = "Size-shape image (aka the 'sscAI')";
|
tomwalters@284
|
38 module_identifier_ = "ssi";
|
tomwalters@284
|
39 module_type_ = "ssi";
|
tomwalters@284
|
40 module_version_ = "$Id$";
|
tomwalters@284
|
41
|
tomwalters@305
|
42 // Cut off the SSI at the end of the first cycle
|
tomwalters@305
|
43 do_pitch_cutoff_ = parameters_->DefaultBool("ssi.pitch_cutoff", false);
|
tomwalters@305
|
44
|
tomwalters@305
|
45 // Weight the values in each channel more strongly if the channel was
|
tomwalters@305
|
46 // truncated due to the pitch cutoff. This ensures that the same amount of
|
tomwalters@305
|
47 // energy remains in the SSI spectral profile
|
tomwalters@305
|
48 weight_by_cutoff_ = parameters_->DefaultBool("ssi.weight_by_cutoff", false);
|
tomwalters@305
|
49
|
tomwalters@305
|
50 // Weight the values in each channel more strongly if the channel was
|
tomwalters@305
|
51 // scaled such that the end goes off the edge of the computed SSI.
|
tomwalters@305
|
52 // Again, this ensures that the overall energy of the spectral profile
|
tomwalters@305
|
53 // remains the same.
|
tomwalters@305
|
54 weight_by_scaling_ = parameters_->DefaultBool("ssi.weight_by_scaling",
|
tomwalters@305
|
55 false);
|
tomwalters@305
|
56
|
tomwalters@305
|
57 // Time from the zero-lag line of the SAI from which to start searching
|
tomwalters@305
|
58 // for a maximum in the input SAI's temporal profile.
|
tomwalters@305
|
59 pitch_search_start_ms_ = parameters_->DefaultFloat(
|
tomwalters@411
|
60 "ssi.pitch_search_start_ms", 2.0f);
|
tomwalters@305
|
61
|
tomwalters@305
|
62 // Total width in cycles of the whole SSI
|
tomwalters@305
|
63 ssi_width_cycles_ = parameters_->DefaultFloat("ssi.width_cycles", 10.0f);
|
tomwalters@305
|
64
|
tomwalters@305
|
65 // Set to true to make the cycles axis logarithmic (ie indexing by gamma
|
tomwalters@305
|
66 // rather than by cycles)
|
tomwalters@305
|
67 log_cycles_axis_ = parameters_->DefaultBool("ssi.log_cycles_axis", true);
|
tomwalters@305
|
68
|
tomwalters@305
|
69 // The centre frequency of the channel which will just fill the complete
|
tomwalters@305
|
70 // width of the SSI buffer
|
tomwalters@305
|
71 pivot_cf_ = parameters_->DefaultFloat("ssi.pivot_cf", 1000.0f);
|
tomwalters@397
|
72
|
tomwalters@397
|
73 // Whether or not to do smooth offset when the pitch cutoff is active.
|
tomwalters@397
|
74 do_smooth_offset_ = parameters_->DefaultBool("ssi.do_smooth_offset", false);
|
tomwalters@397
|
75
|
tomwalters@397
|
76 // The number of cycles, centered on the pitch line, over which the SSI is taken
|
tomwalters@397
|
77 // to zero when doing the pitch cutoff.
|
tomwalters@397
|
78 smooth_offset_cycles_ = parameters_->DefaultFloat("ssi.smooth_offset_cycles", 3.0f);
|
tomwalters@284
|
79 }
|
tomwalters@284
|
80
|
tomwalters@284
|
81 ModuleSSI::~ModuleSSI() {
|
tomwalters@284
|
82 }
|
tomwalters@284
|
83
|
tomwalters@284
|
84 bool ModuleSSI::InitializeInternal(const SignalBank &input) {
|
tomwalters@284
|
85 // Copy the parameters of the input signal bank into internal variables, so
|
tomwalters@284
|
86 // that they can be checked later.
|
tomwalters@284
|
87 sample_rate_ = input.sample_rate();
|
tomwalters@284
|
88 buffer_length_ = input.buffer_length();
|
tomwalters@284
|
89 channel_count_ = input.channel_count();
|
tomwalters@284
|
90
|
tomwalters@305
|
91 ssi_width_samples_ = sample_rate_ * ssi_width_cycles_ / pivot_cf_;
|
tomwalters@287
|
92 if (ssi_width_samples_ > buffer_length_) {
|
tomwalters@287
|
93 ssi_width_samples_ = buffer_length_;
|
tomwalters@305
|
94 float cycles = ssi_width_samples_ * pivot_cf_ / sample_rate_;
|
tomwalters@287
|
95 LOG_INFO(_T("Requested SSI width of %f cycles is too long for the "
|
tomwalters@287
|
96 "input buffer length of %d samples. The SSI will be "
|
tomwalters@287
|
97 "truncated at %d samples wide. This corresponds to a width "
|
tomwalters@287
|
98 "of %f cycles."), ssi_width_cycles_, buffer_length_,
|
tomwalters@287
|
99 ssi_width_samples_, cycles);
|
tomwalters@287
|
100 ssi_width_cycles_ = cycles;
|
tomwalters@287
|
101 }
|
tomwalters@411
|
102 for (int i = 0; i < input.channel_count(); ++i) {
|
tomwalters@411
|
103 output_.set_centre_frequency(i, input.centre_frequency(i));
|
tomwalters@411
|
104 }
|
tomwalters@411
|
105
|
tom@420
|
106 h_.resize(ssi_width_samples_, 0.0);
|
tom@420
|
107 float gamma_min = -1.0f;
|
tom@420
|
108 float gamma_max = log2(ssi_width_cycles_);
|
tom@420
|
109 for (int i = 0; i < ssi_width_samples_; ++i) {
|
tom@420
|
110 if (log_cycles_axis_) {
|
tom@420
|
111 float gamma = gamma_min + (gamma_max - gamma_min)
|
tom@420
|
112 * static_cast<float>(i)
|
tom@420
|
113 / static_cast<float>(ssi_width_samples_);
|
tom@420
|
114 h_[i]= pow(2.0f, gamma);
|
tom@420
|
115 } else {
|
tom@420
|
116 h_[i] = static_cast<float>(i) * ssi_width_cycles_
|
tom@420
|
117 / static_cast<float>(ssi_width_samples_);
|
tom@420
|
118 }
|
tom@420
|
119 }
|
tom@420
|
120
|
tomwalters@287
|
121 output_.Initialize(channel_count_, ssi_width_samples_, sample_rate_);
|
tomwalters@284
|
122 return true;
|
tomwalters@284
|
123 }
|
tomwalters@284
|
124
|
tomwalters@284
|
125 void ModuleSSI::ResetInternal() {
|
tomwalters@284
|
126 }
|
tomwalters@284
|
127
|
tomwalters@305
|
128 int ModuleSSI::ExtractPitchIndex(const SignalBank &input) const {
|
tomwalters@305
|
129 // Generate temporal profile of the SAI
|
tomwalters@305
|
130 vector<float> sai_temporal_profile(buffer_length_, 0.0f);
|
tomwalters@305
|
131 for (int i = 0; i < buffer_length_; ++i) {
|
tomwalters@305
|
132 float val = 0.0f;
|
tomwalters@305
|
133 for (int ch = 0; ch < channel_count_; ++ch) {
|
tomwalters@305
|
134 val += input.sample(ch, i);
|
tomwalters@305
|
135 }
|
tomwalters@305
|
136 sai_temporal_profile[i] = val;
|
tomwalters@305
|
137 }
|
tomwalters@305
|
138
|
tomwalters@305
|
139 // Find pitch value
|
tomwalters@305
|
140 int start_sample = floor(pitch_search_start_ms_ * sample_rate_ / 1000.0f);
|
tomwalters@305
|
141 int max_idx = 0;
|
tomwalters@305
|
142 float max_val = 0.0f;
|
tomwalters@305
|
143 for (int i = start_sample; i < buffer_length_; ++i) {
|
tomwalters@305
|
144 if (sai_temporal_profile[i] > max_val) {
|
tomwalters@305
|
145 max_idx = i;
|
tomwalters@305
|
146 max_val = sai_temporal_profile[i];
|
tomwalters@305
|
147 }
|
tomwalters@305
|
148 }
|
tomwalters@305
|
149 return max_idx;
|
tomwalters@305
|
150 }
|
tomwalters@305
|
151
|
tomwalters@284
|
152 void ModuleSSI::Process(const SignalBank &input) {
|
tomwalters@284
|
153 // Check to see if the module has been initialized. If not, processing
|
tomwalters@284
|
154 // should not continue.
|
tomwalters@284
|
155 if (!initialized_) {
|
tomwalters@285
|
156 LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str());
|
tomwalters@284
|
157 return;
|
tomwalters@284
|
158 }
|
tomwalters@284
|
159
|
tomwalters@284
|
160 // Check that ths input this time is the same as the input passed to
|
tomwalters@284
|
161 // Initialize()
|
tomwalters@284
|
162 if (buffer_length_ != input.buffer_length()
|
tomwalters@284
|
163 || channel_count_ != input.channel_count()) {
|
tomwalters@284
|
164 LOG_ERROR(_T("Mismatch between input to Initialize() and input to "
|
tomwalters@285
|
165 "Process() in module %s."), module_identifier_.c_str());
|
tomwalters@284
|
166 return;
|
tomwalters@284
|
167 }
|
tomwalters@284
|
168
|
tomwalters@287
|
169 output_.set_start_time(input.start_time());
|
tomwalters@284
|
170
|
tomwalters@305
|
171 int pitch_index = buffer_length_ - 1;
|
tomwalters@305
|
172 if (do_pitch_cutoff_) {
|
tomwalters@305
|
173 pitch_index = ExtractPitchIndex(input);
|
tomwalters@305
|
174 }
|
tomwalters@305
|
175
|
tomwalters@287
|
176 for (int ch = 0; ch < channel_count_; ++ch) {
|
tomwalters@305
|
177 float centre_frequency = input.centre_frequency(ch);
|
tomwalters@411
|
178 float cycle_samples = sample_rate_ / centre_frequency;
|
tomwalters@397
|
179
|
tomwalters@397
|
180 float channel_weight = 1.0f;
|
tomwalters@397
|
181 int cutoff_index = buffer_length_ - 1;
|
tomwalters@397
|
182 if (do_pitch_cutoff_) {
|
tomwalters@397
|
183 if (pitch_index < cutoff_index) {
|
tomwalters@397
|
184 if (weight_by_cutoff_) {
|
tomwalters@397
|
185 channel_weight = static_cast<float>(buffer_length_)
|
tomwalters@411
|
186 / static_cast<float>(pitch_index);
|
tomwalters@397
|
187 }
|
tomwalters@397
|
188 cutoff_index = pitch_index;
|
tomwalters@397
|
189 }
|
tomwalters@397
|
190 }
|
tomwalters@397
|
191
|
tomwalters@397
|
192 // tanh(3) is about 0.995. Seems reasonable.
|
tomwalters@411
|
193 float smooth_pitch_constant = 3.0f / smooth_offset_cycles_;
|
tomwalters@397
|
194 float pitch_h = 0.0f;
|
tomwalters@397
|
195 if (do_smooth_offset_) {
|
tomwalters@411
|
196 pitch_h = static_cast<float>(pitch_index) / cycle_samples;
|
tomwalters@397
|
197 }
|
tomwalters@397
|
198
|
tomwalters@411
|
199 // Copy the buffer from input to output, addressing by h-value.
|
tomwalters@287
|
200 for (int i = 0; i < ssi_width_samples_; ++i) {
|
tom@420
|
201
|
tomwalters@287
|
202 // The index into the input array is a floating-point number, which is
|
tomwalters@287
|
203 // split into a whole part and a fractional part. The whole part and
|
tomwalters@287
|
204 // fractional part are found, and are used to linearly interpolate
|
tomwalters@287
|
205 // between input samples to yield an output sample.
|
tomwalters@287
|
206 double whole_part;
|
tom@420
|
207 float frac_part = modf(h_[i] * cycle_samples, &whole_part);
|
tomwalters@305
|
208 int sample = floor(whole_part);
|
tomwalters@305
|
209
|
tomwalters@397
|
210 float weight = channel_weight;
|
tomwalters@397
|
211
|
tomwalters@397
|
212 if (do_smooth_offset_ && do_pitch_cutoff_) {
|
tomwalters@397
|
213 // Smoothing around the pitch cutoff line.
|
tom@420
|
214 float pitch_weight = (1.0f + tanh((pitch_h - h_[i])
|
tomwalters@411
|
215 * smooth_pitch_constant)) / 2.0f;
|
tomwalters@411
|
216 weight *= pitch_weight;
|
tomwalters@411
|
217 //LOG_INFO("Channel %d, Sample %d. Pitch weight: %f", ch, i, pitch_weight);
|
tomwalters@305
|
218 }
|
tomwalters@305
|
219
|
tomwalters@305
|
220 if (weight_by_scaling_) {
|
tomwalters@305
|
221 if (centre_frequency > pivot_cf_) {
|
tomwalters@305
|
222 weight *= (centre_frequency / pivot_cf_);
|
tomwalters@305
|
223 }
|
tomwalters@305
|
224 }
|
tomwalters@287
|
225
|
tomwalters@287
|
226 float val;
|
tomwalters@397
|
227 if (sample < cutoff_index || do_smooth_offset_) {
|
tomwalters@287
|
228 float curr_sample = input.sample(ch, sample);
|
tomwalters@287
|
229 float next_sample = input.sample(ch, sample + 1);
|
tomwalters@305
|
230 val = weight * (curr_sample
|
tomwalters@305
|
231 + frac_part * (next_sample - curr_sample));
|
tomwalters@287
|
232 } else {
|
tomwalters@309
|
233 val = 0.0f;
|
tomwalters@287
|
234 }
|
tomwalters@287
|
235 output_.set_sample(ch, i, val);
|
tomwalters@287
|
236 }
|
tomwalters@287
|
237 }
|
tomwalters@284
|
238 PushOutput();
|
tomwalters@284
|
239 }
|
tomwalters@284
|
240 } // namespace aimc
|
tomwalters@284
|
241
|