tom@419: // Copyright 2010, Google tom@419: // tom@419: // AIM-C: A C++ implementation of the Auditory Image Model tom@419: // http://www.acousticscale.org/AIMC tom@419: // tom@419: // Licensed under the Apache License, Version 2.0 (the "License"); tom@419: // you may not use this file except in compliance with the License. tom@419: // You may obtain a copy of the License at tom@419: // tom@419: // http://www.apache.org/licenses/LICENSE-2.0 tom@419: // tom@419: // Unless required by applicable law or agreed to in writing, software tom@419: // distributed under the License is distributed on an "AS IS" BASIS, tom@419: // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. tom@419: // See the License for the specific language governing permissions and tom@419: // limitations under the License. tom@419: tom@419: /*! tom@419: * \author Thomas Walters tom@419: * \date created 2010-06-14 tom@419: * \version \$Id$ tom@419: */ tom@419: tom@419: #include "Modules/Features/ModuleBoxes.h" tom@419: tom@419: namespace aimc { tom@419: ModuleBoxes::ModuleBoxes(Parameters *params) : Module(params) { tom@419: module_description_ = "'Box-cutting' routine to generate dense features"; tom@419: module_identifier_ = "boxes"; tom@419: module_type_ = "features"; tom@419: module_version_ = "$Id$"; tom@419: tom@419: box_size_spectral_ = parameters_->DefaultInt("boxes.spectral_size", 16); tom@419: box_size_temporal_ = parameters_->DefaultInt("boxes.temporal_size", 32); tom@419: } tom@419: tom@419: ModuleBoxes::~ModuleBoxes() { tom@419: } tom@419: tom@419: bool ModuleBoxes::InitializeInternal(const SignalBank &input) { tom@419: // Copy the parameters of the input signal bank into internal variables, so tom@419: // that they can be checked later. tom@419: sample_rate_ = input.sample_rate(); tom@419: buffer_length_ = input.buffer_length(); tom@419: channel_count_ = input.channel_count(); tom@419: tom@419: int channels_height = box_size_spectral_; tom@419: while (channels_height < channel_count_ / 2) { tom@419: int top = channel_count_ - 1; tom@419: while (top - channels_height >= 0) { tom@419: box_limits_channels_.push_back(std::make_pair(top, tom@419: top - channels_height)); tom@419: LOG_INFO("ch: t %d, b %d", top, top - channels_height); tom@419: top -= channels_height / 2; tom@419: } tom@419: channels_height *= 2; tom@419: } tom@419: tom@419: int temporal_width = box_size_temporal_; tom@419: while (temporal_width < buffer_length_) { tom@419: box_limits_time_.push_back(temporal_width); tom@419: LOG_INFO("sp: %d", temporal_width); tom@419: temporal_width *= 2; tom@419: } tom@419: tom@419: box_count_ = box_limits_time_.size() * box_limits_channels_.size(); tom@419: feature_size_ = box_size_spectral_ + box_size_temporal_; tom@419: LOG_INFO("Total box count is %d", box_count_); tom@419: LOG_INFO("Total feature size is %d", feature_size_); tom@419: tom@419: output_.Initialize(box_count_, feature_size_, 1.0f); tom@419: return true; tom@419: } tom@419: tom@419: void ModuleBoxes::ResetInternal() { tom@419: } tom@419: tom@419: void ModuleBoxes::Process(const SignalBank &input) { tom@419: // Check to see if the module has been initialized. If not, processing tom@419: // should not continue. tom@419: if (!initialized_) { tom@419: LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str()); tom@419: return; tom@419: } tom@419: tom@419: // Check that ths input this time is the same as the input passed to tom@419: // Initialize() tom@419: if (buffer_length_ != input.buffer_length() tom@419: || channel_count_ != input.channel_count()) { tom@419: LOG_ERROR(_T("Mismatch between input to Initialize() and input to " tom@419: "Process() in module %s."), module_identifier_.c_str()); tom@419: return; tom@419: } tom@419: tom@419: int box_index = 0; tom@419: for (int c = 0; c < static_cast(box_limits_channels_.size()); ++c) { tom@419: for (int s = 0; s < static_cast(box_limits_time_.size()); ++s) { tom@419: int pixel_size_channels = (box_limits_channels_[c].first tom@419: - box_limits_channels_[c].second) tom@419: / box_size_spectral_; tom@419: int pixel_size_samples = box_limits_time_[s] / box_size_temporal_; tom@419: vector > box; tom@419: vector line; tom@419: line.resize(box_size_temporal_, 0.0f); tom@419: box.resize(box_size_spectral_, line); tom@419: for (int i = 0; i < box_size_spectral_; ++i) { tom@419: for (int j = 0; j < box_size_temporal_; ++j) { tom@419: float pixel_value = 0.0f; tom@419: for (int k = i * pixel_size_channels; tom@419: k < (i + 1) * pixel_size_channels; ++k) { tom@419: for (int l = j * pixel_size_samples; tom@419: l < (j + 1) * pixel_size_samples; ++l) { tom@419: pixel_value += input.sample(k tom@419: + box_limits_channels_[c].second, l); tom@419: } tom@419: } tom@419: pixel_value /= (pixel_size_channels * pixel_size_samples); tom@419: box[i][j] = pixel_value; tom@419: } tom@419: } tom@419: int feature_index = 0; tom@419: for (int i = 0; i < box_size_spectral_; ++i) { tom@419: float feature_value = 0.0f; tom@419: for (int j = 0; j < box_size_temporal_; ++j) { tom@419: feature_value += box[i][j]; tom@419: } tom@419: feature_value /= box_size_temporal_; tom@419: output_.set_sample(box_index, feature_index, feature_value); tom@419: ++feature_index; tom@419: } tom@419: for (int j = 0; j < box_size_temporal_; ++j) { tom@419: float feature_value = 0.0f; tom@419: for (int i = 0; i < box_size_spectral_; ++i) { tom@419: feature_value += box[i][j]; tom@419: } tom@419: feature_value /= box_size_spectral_; tom@419: output_.set_sample(box_index, feature_index, feature_value); tom@419: ++feature_index; tom@419: } tom@419: ++box_index; tom@419: } tom@419: } tom@419: tom@419: PushOutput(); tom@419: } tom@419: } // namespace aimc