annotate trunk/src/Modules/Features/ModuleGaussians.cc @ 365:2f4530363f7a

- Added as-yet-unfinished support for a proper configuraiton file format - Added a couple of pythin scripts to generate HMM configuration files - Variable name changes and other cosmetic things - Added the option for the noise generation to do pink noise (untested)
author tomwalters
date Thu, 12 Aug 2010 11:28:11 +0000
parents 0a8e7d0c70dc
children 3b22559cd848
rev   line source
tomwalters@268 1 // Copyright 2008-2010, Thomas Walters
tomwalters@268 2 //
tomwalters@268 3 // AIM-C: A C++ implementation of the Auditory Image Model
tomwalters@268 4 // http://www.acousticscale.org/AIMC
tomwalters@268 5 //
tomwalters@318 6 // Licensed under the Apache License, Version 2.0 (the "License");
tomwalters@318 7 // you may not use this file except in compliance with the License.
tomwalters@318 8 // You may obtain a copy of the License at
tomwalters@268 9 //
tomwalters@318 10 // http://www.apache.org/licenses/LICENSE-2.0
tomwalters@268 11 //
tomwalters@318 12 // Unless required by applicable law or agreed to in writing, software
tomwalters@318 13 // distributed under the License is distributed on an "AS IS" BASIS,
tomwalters@318 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
tomwalters@318 15 // See the License for the specific language governing permissions and
tomwalters@318 16 // limitations under the License.
tomwalters@268 17
tomwalters@268 18 /*! \file
tomwalters@268 19 * \brief Gaussian features - based on MATLAB code by Christian Feldbauer
tomwalters@268 20 */
tomwalters@268 21
tomwalters@268 22 /*!
tomwalters@273 23 * \author Thomas Walters <tom@acousticscale.org>
tomwalters@268 24 * \date created 2008/06/23
tomwalters@296 25 * \version \$Id$
tomwalters@268 26 */
tomwalters@268 27
tomwalters@268 28 #include <math.h>
tomwalters@268 29
tomwalters@268 30 #include "Modules/Features/ModuleGaussians.h"
tomwalters@268 31 #include "Support/Common.h"
tomwalters@268 32
tomwalters@268 33 namespace aimc {
tomwalters@278 34 ModuleGaussians::ModuleGaussians(Parameters *params) : Module(params) {
tomwalters@268 35 // Set module metadata
tomwalters@268 36 module_description_ = "Gaussian Fitting to SSI profile";
tomwalters@273 37 module_identifier_ = "gaussians";
tomwalters@268 38 module_type_ = "features";
tomwalters@296 39 module_version_ = "$Id$";
tomwalters@268 40
tomwalters@365 41 m_iParamNComp = parameters_->DefaultInt("gaussians.ncomp", 4);
tomwalters@365 42 m_fParamVar = parameters_->DefaultFloat("gaussians.var", 115.0);
tomwalters@365 43 m_fParamPosteriorExp = parameters_->DefaultFloat("gaussians.posterior_exp",
tomwalters@365 44 6.0);
tomwalters@365 45 m_iParamMaxIt = parameters_->DefaultInt("gaussians.maxit", 250);
tomwalters@268 46
tomwalters@273 47 // The parameters system doesn't support tiny numbers well, to define this
tomwalters@273 48 // variable as a string, then convert it to a float afterwards
tomwalters@365 49 parameters_->DefaultString("gaussians.priors_converged", "1e-7");
tomwalters@365 50 priors_converged_ = parameters_->GetFloat("gaussians.priors_converged");
tomwalters@365 51 output_positions_ = parameters_->DefaultBool("gaussians.positions", false);
tomwalters@268 52 }
tomwalters@268 53
tomwalters@268 54 ModuleGaussians::~ModuleGaussians() {
tomwalters@268 55 }
tomwalters@268 56
tomwalters@268 57 bool ModuleGaussians::InitializeInternal(const SignalBank &input) {
tomwalters@268 58 m_pA.resize(m_iParamNComp, 0.0f);
tomwalters@268 59 m_pMu.resize(m_iParamNComp, 0.0f);
tomwalters@268 60
tomwalters@268 61 // Assuming the number of channels is greater than twice the number of
tomwalters@268 62 // Gaussian components, this is ok
tomwalters@365 63 output_component_count_ = 1; // Energy component
tomwalters@268 64 if (input.channel_count() >= 2 * m_iParamNComp) {
tomwalters@365 65 output_component_count_ += (m_iParamNComp - 1);
tomwalters@268 66 } else {
tomwalters@268 67 LOG_ERROR(_T("Too few channels in filterbank to produce sensible "
tomwalters@268 68 "Gaussian features. Either increase the number of filterbank"
tomwalters@268 69 " channels, or decrease the number of Gaussian components"));
tomwalters@268 70 return false;
tomwalters@268 71 }
tomwalters@268 72
tomwalters@365 73 if (output_positions_) {
tomwalters@365 74 output_component_count_ += m_iParamNComp;
tomwalters@365 75 }
tomwalters@365 76
tomwalters@365 77 output_.Initialize(output_component_count_, 1, input.sample_rate());
tomwalters@365 78
tomwalters@268 79 m_iNumChannels = input.channel_count();
tomwalters@268 80 m_pSpectralProfile.resize(m_iNumChannels, 0.0f);
tomwalters@268 81
tomwalters@268 82 return true;
tomwalters@268 83 }
tomwalters@268 84
tomwalters@275 85 void ModuleGaussians::ResetInternal() {
tomwalters@268 86 m_pSpectralProfile.clear();
tomwalters@268 87 m_pSpectralProfile.resize(m_iNumChannels, 0.0f);
tomwalters@292 88 m_pA.clear();
tomwalters@292 89 m_pA.resize(m_iParamNComp, 0.0f);
tomwalters@292 90 m_pMu.clear();
tomwalters@292 91 m_pMu.resize(m_iParamNComp, 0.0f);
tomwalters@268 92 }
tomwalters@268 93
tomwalters@268 94 void ModuleGaussians::Process(const SignalBank &input) {
tomwalters@273 95 if (!initialized_) {
tomwalters@273 96 LOG_ERROR(_T("Module ModuleGaussians not initialized."));
tomwalters@273 97 return;
tomwalters@273 98 }
tomwalters@268 99 // Calculate spectral profile
tomwalters@365 100 for (int ch = 0; ch < input.channel_count(); ++ch) {
tomwalters@365 101 m_pSpectralProfile[ch] = 0.0f;
tomwalters@365 102 for (int i = 0; i < input.buffer_length(); ++i) {
tomwalters@365 103 m_pSpectralProfile[ch] += input[ch][i];
tomwalters@268 104 }
tomwalters@365 105 m_pSpectralProfile[ch] /= static_cast<float>(input.buffer_length());
tomwalters@273 106 }
tomwalters@273 107
tomwalters@280 108 float spectral_profile_sum = 0.0f;
tomwalters@273 109 for (int i = 0; i < input.channel_count(); ++i) {
tomwalters@273 110 spectral_profile_sum += m_pSpectralProfile[i];
tomwalters@273 111 }
tomwalters@273 112
tomwalters@365 113 // Set the last component of the feature vector to be the log energy
tomwalters@280 114 float logsum = log(spectral_profile_sum);
tomwalters@273 115 if (!isinf(logsum)) {
tomwalters@365 116 output_.set_sample(output_component_count_ - 1, 0, logsum);
tomwalters@273 117 } else {
tomwalters@365 118 output_.set_sample(output_component_count_ - 1, 0, -1000.0);
tomwalters@268 119 }
tomwalters@268 120
tomwalters@365 121 for (int ch = 0; ch < input.channel_count(); ++ch) {
tomwalters@365 122 m_pSpectralProfile[ch] = pow(m_pSpectralProfile[ch], 0.8);
tomwalters@268 123 }
tomwalters@268 124
tomwalters@268 125 RubberGMMCore(2, true);
tomwalters@268 126
tomwalters@365 127 float mean1 = m_pMu[0];
tomwalters@365 128 float mean2 = m_pMu[1];
tomwalters@280 129 // LOG_INFO(_T("Orig. mean 0 = %f"), m_pMu[0]);
tomwalters@280 130 // LOG_INFO(_T("Orig. mean 1 = %f"), m_pMu[1]);
tomwalters@280 131 // LOG_INFO(_T("Orig. prob 0 = %f"), m_pA[0]);
tomwalters@280 132 // LOG_INFO(_T("Orig. prob 1 = %f"), m_pA[1]);
tomwalters@268 133
tomwalters@365 134 float a1 = 0.05 * m_pA[0];
tomwalters@365 135 float a2 = 1.0 - 0.25 * m_pA[1];
tomwalters@268 136
tomwalters@280 137 // LOG_INFO(_T("fA1 = %f"), fA1);
tomwalters@280 138 // LOG_INFO(_T("fA2 = %f"), fA2);
tomwalters@274 139
tomwalters@365 140 float gradient = (mean2 - mean1) / (a2 - a1);
tomwalters@365 141 float intercept = mean2 - gradient * a2;
tomwalters@274 142
tomwalters@280 143 // LOG_INFO(_T("fGradient = %f"), fGradient);
tomwalters@280 144 // LOG_INFO(_T("fIntercept = %f"), fIntercept);
tomwalters@268 145
tomwalters@268 146 for (int i = 0; i < m_iParamNComp; ++i) {
tomwalters@280 147 m_pMu[i] = (static_cast<float>(i)
tomwalters@280 148 / (static_cast<float>(m_iParamNComp) - 1.0f))
tomwalters@365 149 * gradient + intercept;
tomwalters@280 150 // LOG_INFO(_T("mean %d = %f"), i, m_pMu[i]);
tomwalters@268 151 }
tomwalters@268 152
tomwalters@268 153 for (int i = 0; i < m_iParamNComp; ++i) {
tomwalters@280 154 m_pA[i] = 1.0f / static_cast<float>(m_iParamNComp);
tomwalters@268 155 }
tomwalters@268 156
tomwalters@268 157 RubberGMMCore(m_iParamNComp, false);
tomwalters@268 158
tomwalters@365 159 // Amplitudes first
tomwalters@268 160 for (int i = 0; i < m_iParamNComp - 1; ++i) {
tomwalters@268 161 if (!isnan(m_pA[i])) {
tomwalters@268 162 output_.set_sample(i, 0, m_pA[i]);
tomwalters@268 163 } else {
tomwalters@268 164 output_.set_sample(i, 0, 0.0f);
tomwalters@268 165 }
tomwalters@268 166 }
tomwalters@273 167
tomwalters@365 168 // Then means if required
tomwalters@365 169 if (output_positions_) {
tomwalters@365 170 int idx = 0;
tomwalters@365 171 for (int i = m_iParamNComp - 1; i < 2 * m_iParamNComp - 1; ++i) {
tomwalters@365 172 if (!isnan(m_pMu[i])) {
tomwalters@365 173 output_.set_sample(i, 0, m_pMu[idx]);
tomwalters@365 174 } else {
tomwalters@365 175 output_.set_sample(i, 0, 0.0f);
tomwalters@365 176 }
tomwalters@365 177 ++idx;
tomwalters@365 178 }
tomwalters@365 179 }
tomwalters@365 180
tomwalters@268 181 PushOutput();
tomwalters@268 182 }
tomwalters@268 183
tomwalters@268 184 bool ModuleGaussians::RubberGMMCore(int iNComponents, bool bDoInit) {
tomwalters@268 185 int iSizeX = m_iNumChannels;
tomwalters@268 186
tomwalters@268 187 // Normalise the spectral profile
tomwalters@365 188 float SpectralProfileTotal = 0.0f;
tomwalters@268 189 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@365 190 SpectralProfileTotal += m_pSpectralProfile[iCount];
tomwalters@268 191 }
tomwalters@268 192 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@365 193 m_pSpectralProfile[iCount] /= SpectralProfileTotal;
tomwalters@268 194 }
tomwalters@268 195
tomwalters@268 196 if (bDoInit) {
tomwalters@268 197 // Uniformly spaced components
tomwalters@280 198 float dd = (iSizeX - 1.0f) / iNComponents;
tomwalters@268 199 for (int i = 0; i < iNComponents; i++) {
tomwalters@268 200 m_pMu[i] = dd / 2.0f + (i * dd);
tomwalters@268 201 m_pA[i] = 1.0f / iNComponents;
tomwalters@268 202 }
tomwalters@268 203 }
tomwalters@268 204
tomwalters@280 205 vector<float> pA_old;
tomwalters@268 206 pA_old.resize(iNComponents);
tomwalters@280 207 vector<float> pP_mod_X;
tomwalters@268 208 pP_mod_X.resize(iSizeX);
tomwalters@280 209 vector<float> pP_comp;
tomwalters@268 210 pP_comp.resize(iSizeX * iNComponents);
tomwalters@268 211
tomwalters@268 212 for (int iIteration = 0; iIteration < m_iParamMaxIt; iIteration++) {
tomwalters@268 213 // (re)calculate posteriors (component probability given observation)
tomwalters@268 214 // denominator: the model density at all observation points X
tomwalters@268 215 for (int i = 0; i < iSizeX; ++i) {
tomwalters@268 216 pP_mod_X[i] = 0.0f;
tomwalters@268 217 }
tomwalters@268 218
tomwalters@365 219 for (int c = 0; c < iNComponents; c++) {
tomwalters@268 220 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@268 221 pP_mod_X[iCount] += 1.0f / sqrt(2.0f * M_PI * m_fParamVar)
tomwalters@280 222 * exp((-0.5f)
tomwalters@365 223 * pow(static_cast<float>(iCount+1) - m_pMu[c], 2)
tomwalters@365 224 / m_fParamVar) * m_pA[c];
tomwalters@268 225 }
tomwalters@268 226 }
tomwalters@268 227
tomwalters@268 228 for (int i = 0; i < iSizeX * iNComponents; ++i) {
tomwalters@268 229 pP_comp[i] = 0.0f;
tomwalters@268 230 }
tomwalters@268 231
tomwalters@268 232 for (int i = 0; i < iNComponents; i++) {
tomwalters@268 233 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@268 234 pP_comp[iCount + i * iSizeX] =
tomwalters@268 235 1.0f / sqrt(2.0f * M_PI * m_fParamVar)
tomwalters@280 236 * exp((-0.5f) * pow((static_cast<float>(iCount + 1) - m_pMu[i]), 2)
tomwalters@280 237 / m_fParamVar);
tomwalters@268 238 pP_comp[iCount + i * iSizeX] =
tomwalters@268 239 pP_comp[iCount + i * iSizeX] * m_pA[i] / pP_mod_X[iCount];
tomwalters@268 240 }
tomwalters@268 241 }
tomwalters@268 242
tomwalters@268 243 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@280 244 float fSum = 0.0f;
tomwalters@268 245 for (int i = 0; i < iNComponents; ++i) {
tomwalters@268 246 pP_comp[iCount+i*iSizeX] = pow(pP_comp[iCount + i * iSizeX],
tomwalters@280 247 m_fParamPosteriorExp); // expansion
tomwalters@268 248 fSum += pP_comp[iCount+i*iSizeX];
tomwalters@268 249 }
tomwalters@268 250 for (int i = 0; i < iNComponents; ++i)
tomwalters@268 251 pP_comp[iCount+i*iSizeX] = pP_comp[iCount + i * iSizeX] / fSum;
tomwalters@268 252 // renormalisation
tomwalters@268 253 }
tomwalters@268 254
tomwalters@268 255 for (int i = 0; i < iNComponents; ++i) {
tomwalters@268 256 pA_old[i] = m_pA[i];
tomwalters@268 257 m_pA[i] = 0.0f;
tomwalters@268 258 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@268 259 m_pA[i] += pP_comp[iCount + i * iSizeX] * m_pSpectralProfile[iCount];
tomwalters@268 260 }
tomwalters@268 261 }
tomwalters@268 262
tomwalters@268 263 // finish when already converged
tomwalters@280 264 float fPrdist = 0.0f;
tomwalters@268 265 for (int i = 0; i < iNComponents; ++i) {
tomwalters@268 266 fPrdist += pow((m_pA[i] - pA_old[i]), 2);
tomwalters@268 267 }
tomwalters@268 268 fPrdist /= iNComponents;
tomwalters@268 269
tomwalters@365 270 if (fPrdist < priors_converged_) {
tomwalters@280 271 // LOG_INFO("Converged!");
tomwalters@268 272 break;
tomwalters@268 273 }
tomwalters@280 274 // LOG_INFO("Didn't converge!");
tomwalters@274 275
tomwalters@268 276
tomwalters@268 277 // update means (positions)
tomwalters@268 278 for (int i = 0 ; i < iNComponents; ++i) {
tomwalters@280 279 float mu_old = m_pMu[i];
tomwalters@268 280 if (m_pA[i] > 0.0f) {
tomwalters@268 281 m_pMu[i] = 0.0f;
tomwalters@268 282 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@268 283 m_pMu[i] += m_pSpectralProfile[iCount]
tomwalters@280 284 * pP_comp[iCount + i * iSizeX]
tomwalters@280 285 * static_cast<float>(iCount + 1);
tomwalters@268 286 }
tomwalters@268 287 m_pMu[i] /= m_pA[i];
tomwalters@268 288 if (isnan(m_pMu[i])) {
tomwalters@268 289 m_pMu[i] = mu_old;
tomwalters@268 290 }
tomwalters@268 291 }
tomwalters@268 292 }
tomwalters@280 293 } // loop over iterations
tomwalters@268 294
tomwalters@268 295 // Ensure they are sorted, using a really simple bubblesort
tomwalters@268 296 bool bSorted = false;
tomwalters@268 297 while (!bSorted) {
tomwalters@268 298 bSorted = true;
tomwalters@268 299 for (int i = 0; i < iNComponents - 1; ++i) {
tomwalters@268 300 if (m_pMu[i] > m_pMu[i + 1]) {
tomwalters@280 301 float fTemp = m_pMu[i];
tomwalters@268 302 m_pMu[i] = m_pMu[i + 1];
tomwalters@268 303 m_pMu[i + 1] = fTemp;
tomwalters@268 304 fTemp = m_pA[i];
tomwalters@268 305 m_pA[i] = m_pA[i + 1];
tomwalters@268 306 m_pA[i + 1] = fTemp;
tomwalters@268 307 bSorted = false;
tomwalters@268 308 }
tomwalters@268 309 }
tomwalters@268 310 }
tomwalters@268 311 return true;
tomwalters@268 312 }
tomwalters@280 313 } // namespace aimc
tomwalters@268 314