tom@134: // Copyright 2010, Google tom@134: // tom@134: // AIM-C: A C++ implementation of the Auditory Image Model tom@134: // http://www.acousticscale.org/AIMC tom@134: // tom@134: // Licensed under the Apache License, Version 2.0 (the "License"); tom@134: // you may not use this file except in compliance with the License. tom@134: // You may obtain a copy of the License at tom@134: // tom@134: // http://www.apache.org/licenses/LICENSE-2.0 tom@134: // tom@134: // Unless required by applicable law or agreed to in writing, software tom@134: // distributed under the License is distributed on an "AS IS" BASIS, tom@134: // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. tom@134: // See the License for the specific language governing permissions and tom@134: // limitations under the License. tom@134: tom@134: /*! tom@134: * \author Thomas Walters tom@134: * \date created 2010-06-14 tom@134: * \version \$Id$ tom@134: */ tom@134: tom@134: #include "Modules/Features/ModuleBoxes.h" tom@134: tom@134: namespace aimc { tom@134: ModuleBoxes::ModuleBoxes(Parameters *params) : Module(params) { tom@134: module_description_ = "'Box-cutting' routine to generate dense features"; tom@134: module_identifier_ = "boxes"; tom@134: module_type_ = "features"; tom@134: module_version_ = "$Id$"; tom@134: tom@134: box_size_spectral_ = parameters_->DefaultInt("boxes.spectral_size", 16); tom@134: box_size_temporal_ = parameters_->DefaultInt("boxes.temporal_size", 32); tom@134: } tom@134: tom@134: ModuleBoxes::~ModuleBoxes() { tom@134: } tom@134: tom@134: bool ModuleBoxes::InitializeInternal(const SignalBank &input) { tom@134: // Copy the parameters of the input signal bank into internal variables, so tom@134: // that they can be checked later. tom@134: sample_rate_ = input.sample_rate(); tom@134: buffer_length_ = input.buffer_length(); tom@134: channel_count_ = input.channel_count(); tom@134: tom@134: int channels_height = box_size_spectral_; tom@134: while (channels_height < channel_count_ / 2) { tom@134: int top = channel_count_ - 1; tom@134: while (top - channels_height >= 0) { tom@134: box_limits_channels_.push_back(std::make_pair(top, tom@134: top - channels_height)); tom@134: LOG_INFO("ch: t %d, b %d", top, top - channels_height); tom@134: top -= channels_height / 2; tom@134: } tom@134: channels_height *= 2; tom@134: } tom@134: tom@134: int temporal_width = box_size_temporal_; tom@134: while (temporal_width < buffer_length_) { tom@134: box_limits_time_.push_back(temporal_width); tom@134: LOG_INFO("sp: %d", temporal_width); tom@134: temporal_width *= 2; tom@134: } tom@134: tom@134: box_count_ = box_limits_time_.size() * box_limits_channels_.size(); tom@134: feature_size_ = box_size_spectral_ + box_size_temporal_; tom@134: LOG_INFO("Total box count is %d", box_count_); tom@134: LOG_INFO("Total feature size is %d", feature_size_); tom@134: tom@134: output_.Initialize(box_count_, feature_size_, 1.0f); tom@134: return true; tom@134: } tom@134: tom@134: void ModuleBoxes::ResetInternal() { tom@134: } tom@134: tom@134: void ModuleBoxes::Process(const SignalBank &input) { tom@134: // Check to see if the module has been initialized. If not, processing tom@134: // should not continue. tom@134: if (!initialized_) { tom@134: LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str()); tom@134: return; tom@134: } tom@134: tom@134: // Check that ths input this time is the same as the input passed to tom@134: // Initialize() tom@134: if (buffer_length_ != input.buffer_length() tom@134: || channel_count_ != input.channel_count()) { tom@134: LOG_ERROR(_T("Mismatch between input to Initialize() and input to " tom@134: "Process() in module %s."), module_identifier_.c_str()); tom@134: return; tom@134: } tom@134: tom@134: int box_index = 0; tom@134: for (int c = 0; c < static_cast(box_limits_channels_.size()); ++c) { tom@134: for (int s = 0; s < static_cast(box_limits_time_.size()); ++s) { tom@134: int pixel_size_channels = (box_limits_channels_[c].first tom@134: - box_limits_channels_[c].second) tom@134: / box_size_spectral_; tom@134: int pixel_size_samples = box_limits_time_[s] / box_size_temporal_; tom@134: vector > box; tom@134: vector line; tom@134: line.resize(box_size_temporal_, 0.0f); tom@134: box.resize(box_size_spectral_, line); tom@134: for (int i = 0; i < box_size_spectral_; ++i) { tom@134: for (int j = 0; j < box_size_temporal_; ++j) { tom@134: float pixel_value = 0.0f; tom@134: for (int k = i * pixel_size_channels; tom@134: k < (i + 1) * pixel_size_channels; ++k) { tom@134: for (int l = j * pixel_size_samples; tom@134: l < (j + 1) * pixel_size_samples; ++l) { tom@134: pixel_value += input.sample(k tom@134: + box_limits_channels_[c].second, l); tom@134: } tom@134: } tom@134: pixel_value /= (pixel_size_channels * pixel_size_samples); tom@134: box[i][j] = pixel_value; tom@134: } tom@134: } tom@134: int feature_index = 0; tom@134: for (int i = 0; i < box_size_spectral_; ++i) { tom@134: float feature_value = 0.0f; tom@134: for (int j = 0; j < box_size_temporal_; ++j) { tom@134: feature_value += box[i][j]; tom@134: } tom@134: feature_value /= box_size_temporal_; tom@134: output_.set_sample(box_index, feature_index, feature_value); tom@134: ++feature_index; tom@134: } tom@134: for (int j = 0; j < box_size_temporal_; ++j) { tom@134: float feature_value = 0.0f; tom@134: for (int i = 0; i < box_size_spectral_; ++i) { tom@134: feature_value += box[i][j]; tom@134: } tom@134: feature_value /= box_size_spectral_; tom@134: output_.set_sample(box_index, feature_index, feature_value); tom@134: ++feature_index; tom@134: } tom@134: ++box_index; tom@134: } tom@134: } tom@134: tom@134: PushOutput(); tom@134: } tom@134: } // namespace aimc