tom@419
|
1 // Copyright 2010, Google
|
tom@419
|
2 //
|
tom@419
|
3 // AIM-C: A C++ implementation of the Auditory Image Model
|
tom@419
|
4 // http://www.acousticscale.org/AIMC
|
tom@419
|
5 //
|
tom@419
|
6 // Licensed under the Apache License, Version 2.0 (the "License");
|
tom@419
|
7 // you may not use this file except in compliance with the License.
|
tom@419
|
8 // You may obtain a copy of the License at
|
tom@419
|
9 //
|
tom@419
|
10 // http://www.apache.org/licenses/LICENSE-2.0
|
tom@419
|
11 //
|
tom@419
|
12 // Unless required by applicable law or agreed to in writing, software
|
tom@419
|
13 // distributed under the License is distributed on an "AS IS" BASIS,
|
tom@419
|
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
tom@419
|
15 // See the License for the specific language governing permissions and
|
tom@419
|
16 // limitations under the License.
|
tom@419
|
17
|
tom@419
|
18 /*!
|
tom@419
|
19 * \author Thomas Walters <tom@acousticscale.org>
|
tom@419
|
20 * \date created 2010-06-14
|
tom@419
|
21 * \version \$Id$
|
tom@419
|
22 */
|
tom@419
|
23
|
tom@419
|
24 #include "Modules/Features/ModuleBoxes.h"
|
tom@419
|
25
|
tom@419
|
26 namespace aimc {
|
tom@419
|
27 ModuleBoxes::ModuleBoxes(Parameters *params) : Module(params) {
|
tom@419
|
28 module_description_ = "'Box-cutting' routine to generate dense features";
|
tom@419
|
29 module_identifier_ = "boxes";
|
tom@419
|
30 module_type_ = "features";
|
tom@419
|
31 module_version_ = "$Id$";
|
tom@419
|
32
|
tom@419
|
33 box_size_spectral_ = parameters_->DefaultInt("boxes.spectral_size", 16);
|
tom@419
|
34 box_size_temporal_ = parameters_->DefaultInt("boxes.temporal_size", 32);
|
tom@419
|
35 }
|
tom@419
|
36
|
tom@419
|
37 ModuleBoxes::~ModuleBoxes() {
|
tom@419
|
38 }
|
tom@419
|
39
|
tom@419
|
40 bool ModuleBoxes::InitializeInternal(const SignalBank &input) {
|
tom@419
|
41 // Copy the parameters of the input signal bank into internal variables, so
|
tom@419
|
42 // that they can be checked later.
|
tom@419
|
43 sample_rate_ = input.sample_rate();
|
tom@419
|
44 buffer_length_ = input.buffer_length();
|
tom@419
|
45 channel_count_ = input.channel_count();
|
tom@419
|
46
|
tom@419
|
47 int channels_height = box_size_spectral_;
|
tom@419
|
48 while (channels_height < channel_count_ / 2) {
|
tom@419
|
49 int top = channel_count_ - 1;
|
tom@419
|
50 while (top - channels_height >= 0) {
|
tom@419
|
51 box_limits_channels_.push_back(std::make_pair(top,
|
tom@419
|
52 top - channels_height));
|
tom@419
|
53 LOG_INFO("ch: t %d, b %d", top, top - channels_height);
|
tom@419
|
54 top -= channels_height / 2;
|
tom@419
|
55 }
|
tom@419
|
56 channels_height *= 2;
|
tom@419
|
57 }
|
tom@419
|
58
|
tom@419
|
59 int temporal_width = box_size_temporal_;
|
tom@419
|
60 while (temporal_width < buffer_length_) {
|
tom@419
|
61 box_limits_time_.push_back(temporal_width);
|
tom@419
|
62 LOG_INFO("sp: %d", temporal_width);
|
tom@419
|
63 temporal_width *= 2;
|
tom@419
|
64 }
|
tom@419
|
65
|
tom@419
|
66 box_count_ = box_limits_time_.size() * box_limits_channels_.size();
|
tom@419
|
67 feature_size_ = box_size_spectral_ + box_size_temporal_;
|
tom@419
|
68 LOG_INFO("Total box count is %d", box_count_);
|
tom@419
|
69 LOG_INFO("Total feature size is %d", feature_size_);
|
tom@419
|
70
|
tom@419
|
71 output_.Initialize(box_count_, feature_size_, 1.0f);
|
tom@419
|
72 return true;
|
tom@419
|
73 }
|
tom@419
|
74
|
tom@419
|
75 void ModuleBoxes::ResetInternal() {
|
tom@419
|
76 }
|
tom@419
|
77
|
tom@419
|
78 void ModuleBoxes::Process(const SignalBank &input) {
|
tom@419
|
79 // Check to see if the module has been initialized. If not, processing
|
tom@419
|
80 // should not continue.
|
tom@419
|
81 if (!initialized_) {
|
tom@419
|
82 LOG_ERROR(_T("Module %s not initialized."), module_identifier_.c_str());
|
tom@419
|
83 return;
|
tom@419
|
84 }
|
tom@419
|
85
|
tom@419
|
86 // Check that ths input this time is the same as the input passed to
|
tom@419
|
87 // Initialize()
|
tom@419
|
88 if (buffer_length_ != input.buffer_length()
|
tom@419
|
89 || channel_count_ != input.channel_count()) {
|
tom@419
|
90 LOG_ERROR(_T("Mismatch between input to Initialize() and input to "
|
tom@419
|
91 "Process() in module %s."), module_identifier_.c_str());
|
tom@419
|
92 return;
|
tom@419
|
93 }
|
tom@419
|
94
|
tom@419
|
95 int box_index = 0;
|
tom@419
|
96 for (int c = 0; c < static_cast<int>(box_limits_channels_.size()); ++c) {
|
tom@419
|
97 for (int s = 0; s < static_cast<int>(box_limits_time_.size()); ++s) {
|
tom@419
|
98 int pixel_size_channels = (box_limits_channels_[c].first
|
tom@419
|
99 - box_limits_channels_[c].second)
|
tom@419
|
100 / box_size_spectral_;
|
tom@419
|
101 int pixel_size_samples = box_limits_time_[s] / box_size_temporal_;
|
tom@419
|
102 vector<vector<float> > box;
|
tom@419
|
103 vector<float> line;
|
tom@419
|
104 line.resize(box_size_temporal_, 0.0f);
|
tom@419
|
105 box.resize(box_size_spectral_, line);
|
tom@419
|
106 for (int i = 0; i < box_size_spectral_; ++i) {
|
tom@419
|
107 for (int j = 0; j < box_size_temporal_; ++j) {
|
tom@419
|
108 float pixel_value = 0.0f;
|
tom@419
|
109 for (int k = i * pixel_size_channels;
|
tom@419
|
110 k < (i + 1) * pixel_size_channels; ++k) {
|
tom@419
|
111 for (int l = j * pixel_size_samples;
|
tom@419
|
112 l < (j + 1) * pixel_size_samples; ++l) {
|
tom@419
|
113 pixel_value += input.sample(k
|
tom@419
|
114 + box_limits_channels_[c].second, l);
|
tom@419
|
115 }
|
tom@419
|
116 }
|
tom@419
|
117 pixel_value /= (pixel_size_channels * pixel_size_samples);
|
tom@419
|
118 box[i][j] = pixel_value;
|
tom@419
|
119 }
|
tom@419
|
120 }
|
tom@419
|
121 int feature_index = 0;
|
tom@419
|
122 for (int i = 0; i < box_size_spectral_; ++i) {
|
tom@419
|
123 float feature_value = 0.0f;
|
tom@419
|
124 for (int j = 0; j < box_size_temporal_; ++j) {
|
tom@419
|
125 feature_value += box[i][j];
|
tom@419
|
126 }
|
tom@419
|
127 feature_value /= box_size_temporal_;
|
tom@419
|
128 output_.set_sample(box_index, feature_index, feature_value);
|
tom@419
|
129 ++feature_index;
|
tom@419
|
130 }
|
tom@419
|
131 for (int j = 0; j < box_size_temporal_; ++j) {
|
tom@419
|
132 float feature_value = 0.0f;
|
tom@419
|
133 for (int i = 0; i < box_size_spectral_; ++i) {
|
tom@419
|
134 feature_value += box[i][j];
|
tom@419
|
135 }
|
tom@419
|
136 feature_value /= box_size_spectral_;
|
tom@419
|
137 output_.set_sample(box_index, feature_index, feature_value);
|
tom@419
|
138 ++feature_index;
|
tom@419
|
139 }
|
tom@419
|
140 ++box_index;
|
tom@419
|
141 }
|
tom@419
|
142 }
|
tom@419
|
143
|
tom@419
|
144 PushOutput();
|
tom@419
|
145 }
|
tom@419
|
146 } // namespace aimc
|