annotate src/Modules/Features/ModuleGaussians.cc @ 611:0fbaf443ec82

Carfac C++ revision 3, indluding more style improvements. The output structs are now classes again, and have separate storage methods for each output structure along with flags in the Run and RunSegment methods to allow for only storing NAPs if desired.
author alexbrandmeyer
date Fri, 17 May 2013 19:52:45 +0000
parents 988c8b6f7946
children
rev   line source
tomwalters@0 1 // Copyright 2008-2010, Thomas Walters
tomwalters@0 2 //
tomwalters@0 3 // AIM-C: A C++ implementation of the Auditory Image Model
tomwalters@0 4 // http://www.acousticscale.org/AIMC
tomwalters@0 5 //
tomwalters@45 6 // Licensed under the Apache License, Version 2.0 (the "License");
tomwalters@45 7 // you may not use this file except in compliance with the License.
tomwalters@45 8 // You may obtain a copy of the License at
tomwalters@0 9 //
tomwalters@45 10 // http://www.apache.org/licenses/LICENSE-2.0
tomwalters@0 11 //
tomwalters@45 12 // Unless required by applicable law or agreed to in writing, software
tomwalters@45 13 // distributed under the License is distributed on an "AS IS" BASIS,
tomwalters@45 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
tomwalters@45 15 // See the License for the specific language governing permissions and
tomwalters@45 16 // limitations under the License.
tomwalters@0 17
tomwalters@0 18 /*! \file
tomwalters@0 19 * \brief Gaussian features - based on MATLAB code by Christian Feldbauer
tomwalters@0 20 */
tomwalters@0 21
tomwalters@0 22 /*!
tomwalters@1 23 * \author Thomas Walters <tom@acousticscale.org>
tomwalters@0 24 * \date created 2008/06/23
tomwalters@23 25 * \version \$Id$
tomwalters@0 26 */
tomwalters@0 27
tomwalters@0 28 #include <math.h>
tomwalters@0 29
tom@253 30 #ifdef _WINDOWS
tomwalters@252 31 #include <float.h>
tomwalters@252 32 #endif
tomwalters@252 33
tomwalters@0 34 #include "Modules/Features/ModuleGaussians.h"
tomwalters@0 35 #include "Support/Common.h"
tomwalters@0 36
tomwalters@0 37 namespace aimc {
tomwalters@6 38 ModuleGaussians::ModuleGaussians(Parameters *params) : Module(params) {
tomwalters@0 39 // Set module metadata
tomwalters@0 40 module_description_ = "Gaussian Fitting to SSI profile";
tomwalters@1 41 module_identifier_ = "gaussians";
tomwalters@0 42 module_type_ = "features";
tomwalters@23 43 module_version_ = "$Id$";
tomwalters@0 44
tomwalters@195 45 m_iParamNComp = parameters_->DefaultInt("gaussians.ncomp", 4);
tomwalters@195 46 m_fParamVar = parameters_->DefaultFloat("gaussians.var", 115.0);
tomwalters@195 47 m_fParamPosteriorExp = parameters_->DefaultFloat("gaussians.posterior_exp",
tomwalters@195 48 6.0);
tomwalters@195 49 m_iParamMaxIt = parameters_->DefaultInt("gaussians.maxit", 250);
tomwalters@0 50
tomwalters@1 51 // The parameters system doesn't support tiny numbers well, to define this
tomwalters@1 52 // variable as a string, then convert it to a float afterwards
tomwalters@195 53 parameters_->DefaultString("gaussians.priors_converged", "1e-7");
tomwalters@195 54 priors_converged_ = parameters_->GetFloat("gaussians.priors_converged");
tomwalters@195 55 output_positions_ = parameters_->DefaultBool("gaussians.positions", false);
tomwalters@0 56 }
tomwalters@0 57
tomwalters@0 58 ModuleGaussians::~ModuleGaussians() {
tomwalters@0 59 }
tomwalters@0 60
tomwalters@0 61 bool ModuleGaussians::InitializeInternal(const SignalBank &input) {
tomwalters@0 62 m_pA.resize(m_iParamNComp, 0.0f);
tomwalters@0 63 m_pMu.resize(m_iParamNComp, 0.0f);
tomwalters@0 64
tomwalters@0 65 // Assuming the number of channels is greater than twice the number of
tomwalters@0 66 // Gaussian components, this is ok
tomwalters@195 67 output_component_count_ = 1; // Energy component
tomwalters@0 68 if (input.channel_count() >= 2 * m_iParamNComp) {
tomwalters@195 69 output_component_count_ += (m_iParamNComp - 1);
tomwalters@0 70 } else {
tomwalters@0 71 LOG_ERROR(_T("Too few channels in filterbank to produce sensible "
tomwalters@0 72 "Gaussian features. Either increase the number of filterbank"
tomwalters@0 73 " channels, or decrease the number of Gaussian components"));
tomwalters@0 74 return false;
tomwalters@0 75 }
tomwalters@0 76
tomwalters@195 77 if (output_positions_) {
tomwalters@195 78 output_component_count_ += m_iParamNComp;
tomwalters@195 79 }
tomwalters@195 80
tomwalters@195 81 output_.Initialize(output_component_count_, 1, input.sample_rate());
tomwalters@195 82
tomwalters@0 83 m_iNumChannels = input.channel_count();
tomwalters@0 84 m_pSpectralProfile.resize(m_iNumChannels, 0.0f);
tomwalters@0 85
tomwalters@0 86 return true;
tomwalters@0 87 }
tomwalters@0 88
tomwalters@3 89 void ModuleGaussians::ResetInternal() {
tomwalters@0 90 m_pSpectralProfile.clear();
tomwalters@0 91 m_pSpectralProfile.resize(m_iNumChannels, 0.0f);
tomwalters@20 92 m_pA.clear();
tomwalters@20 93 m_pA.resize(m_iParamNComp, 0.0f);
tomwalters@20 94 m_pMu.clear();
tomwalters@20 95 m_pMu.resize(m_iParamNComp, 0.0f);
tomwalters@0 96 }
tomwalters@0 97
tomwalters@0 98 void ModuleGaussians::Process(const SignalBank &input) {
tomwalters@1 99 if (!initialized_) {
tomwalters@1 100 LOG_ERROR(_T("Module ModuleGaussians not initialized."));
tomwalters@1 101 return;
tomwalters@1 102 }
tom@247 103 output_.set_start_time(input.start_time());
tomwalters@0 104 // Calculate spectral profile
tomwalters@195 105 for (int ch = 0; ch < input.channel_count(); ++ch) {
tomwalters@195 106 m_pSpectralProfile[ch] = 0.0f;
tomwalters@195 107 for (int i = 0; i < input.buffer_length(); ++i) {
tomwalters@195 108 m_pSpectralProfile[ch] += input[ch][i];
tomwalters@0 109 }
tomwalters@195 110 m_pSpectralProfile[ch] /= static_cast<float>(input.buffer_length());
tomwalters@1 111 }
tomwalters@1 112
tomwalters@8 113 float spectral_profile_sum = 0.0f;
tomwalters@1 114 for (int i = 0; i < input.channel_count(); ++i) {
tomwalters@1 115 spectral_profile_sum += m_pSpectralProfile[i];
tomwalters@1 116 }
tomwalters@1 117
tomwalters@195 118 // Set the last component of the feature vector to be the log energy
tomwalters@8 119 float logsum = log(spectral_profile_sum);
tomwalters@1 120 if (!isinf(logsum)) {
tomwalters@195 121 output_.set_sample(output_component_count_ - 1, 0, logsum);
tomwalters@1 122 } else {
tomwalters@195 123 output_.set_sample(output_component_count_ - 1, 0, -1000.0);
tomwalters@0 124 }
tomwalters@0 125
tomwalters@195 126 for (int ch = 0; ch < input.channel_count(); ++ch) {
tomwalters@252 127 m_pSpectralProfile[ch] = pow(m_pSpectralProfile[ch], 0.8f);
tomwalters@0 128 }
tomwalters@0 129
tomwalters@0 130 RubberGMMCore(2, true);
tomwalters@0 131
tomwalters@195 132 float mean1 = m_pMu[0];
tomwalters@195 133 float mean2 = m_pMu[1];
tomwalters@8 134 // LOG_INFO(_T("Orig. mean 0 = %f"), m_pMu[0]);
tomwalters@8 135 // LOG_INFO(_T("Orig. mean 1 = %f"), m_pMu[1]);
tomwalters@8 136 // LOG_INFO(_T("Orig. prob 0 = %f"), m_pA[0]);
tomwalters@8 137 // LOG_INFO(_T("Orig. prob 1 = %f"), m_pA[1]);
tomwalters@0 138
tomwalters@195 139 float a1 = 0.05 * m_pA[0];
tomwalters@195 140 float a2 = 1.0 - 0.25 * m_pA[1];
tomwalters@0 141
tomwalters@8 142 // LOG_INFO(_T("fA1 = %f"), fA1);
tomwalters@8 143 // LOG_INFO(_T("fA2 = %f"), fA2);
tomwalters@2 144
tomwalters@195 145 float gradient = (mean2 - mean1) / (a2 - a1);
tomwalters@195 146 float intercept = mean2 - gradient * a2;
tomwalters@2 147
tomwalters@8 148 // LOG_INFO(_T("fGradient = %f"), fGradient);
tomwalters@8 149 // LOG_INFO(_T("fIntercept = %f"), fIntercept);
tomwalters@0 150
tomwalters@0 151 for (int i = 0; i < m_iParamNComp; ++i) {
tomwalters@8 152 m_pMu[i] = (static_cast<float>(i)
tomwalters@8 153 / (static_cast<float>(m_iParamNComp) - 1.0f))
tomwalters@195 154 * gradient + intercept;
tomwalters@8 155 // LOG_INFO(_T("mean %d = %f"), i, m_pMu[i]);
tomwalters@0 156 }
tomwalters@0 157
tomwalters@0 158 for (int i = 0; i < m_iParamNComp; ++i) {
tomwalters@8 159 m_pA[i] = 1.0f / static_cast<float>(m_iParamNComp);
tomwalters@0 160 }
tomwalters@0 161
tomwalters@0 162 RubberGMMCore(m_iParamNComp, false);
tomwalters@0 163
tomwalters@195 164 // Amplitudes first
tomwalters@0 165 for (int i = 0; i < m_iParamNComp - 1; ++i) {
tomwalters@0 166 if (!isnan(m_pA[i])) {
tomwalters@0 167 output_.set_sample(i, 0, m_pA[i]);
tomwalters@0 168 } else {
tomwalters@0 169 output_.set_sample(i, 0, 0.0f);
tomwalters@0 170 }
tomwalters@0 171 }
tomwalters@1 172
tomwalters@195 173 // Then means if required
tomwalters@195 174 if (output_positions_) {
tomwalters@195 175 int idx = 0;
tomwalters@195 176 for (int i = m_iParamNComp - 1; i < 2 * m_iParamNComp - 1; ++i) {
tomwalters@195 177 if (!isnan(m_pMu[i])) {
tomwalters@195 178 output_.set_sample(i, 0, m_pMu[idx]);
tomwalters@195 179 } else {
tomwalters@195 180 output_.set_sample(i, 0, 0.0f);
tomwalters@195 181 }
tomwalters@195 182 ++idx;
tomwalters@195 183 }
tomwalters@195 184 }
tomwalters@195 185
tomwalters@0 186 PushOutput();
tomwalters@0 187 }
tomwalters@0 188
tomwalters@0 189 bool ModuleGaussians::RubberGMMCore(int iNComponents, bool bDoInit) {
tomwalters@0 190 int iSizeX = m_iNumChannels;
tomwalters@0 191
tomwalters@0 192 // Normalise the spectral profile
tomwalters@195 193 float SpectralProfileTotal = 0.0f;
tomwalters@0 194 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@195 195 SpectralProfileTotal += m_pSpectralProfile[iCount];
tomwalters@0 196 }
tomwalters@0 197 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@195 198 m_pSpectralProfile[iCount] /= SpectralProfileTotal;
tomwalters@0 199 }
tomwalters@0 200
tomwalters@0 201 if (bDoInit) {
tomwalters@0 202 // Uniformly spaced components
tomwalters@8 203 float dd = (iSizeX - 1.0f) / iNComponents;
tomwalters@0 204 for (int i = 0; i < iNComponents; i++) {
tomwalters@0 205 m_pMu[i] = dd / 2.0f + (i * dd);
tomwalters@0 206 m_pA[i] = 1.0f / iNComponents;
tomwalters@0 207 }
tomwalters@0 208 }
tomwalters@0 209
tomwalters@8 210 vector<float> pA_old;
tomwalters@0 211 pA_old.resize(iNComponents);
tomwalters@8 212 vector<float> pP_mod_X;
tomwalters@0 213 pP_mod_X.resize(iSizeX);
tomwalters@8 214 vector<float> pP_comp;
tomwalters@0 215 pP_comp.resize(iSizeX * iNComponents);
tomwalters@0 216
tomwalters@0 217 for (int iIteration = 0; iIteration < m_iParamMaxIt; iIteration++) {
tomwalters@0 218 // (re)calculate posteriors (component probability given observation)
tomwalters@0 219 // denominator: the model density at all observation points X
tomwalters@0 220 for (int i = 0; i < iSizeX; ++i) {
tomwalters@0 221 pP_mod_X[i] = 0.0f;
tomwalters@0 222 }
tomwalters@0 223
tomwalters@195 224 for (int c = 0; c < iNComponents; c++) {
tomwalters@0 225 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@0 226 pP_mod_X[iCount] += 1.0f / sqrt(2.0f * M_PI * m_fParamVar)
tomwalters@8 227 * exp((-0.5f)
tomwalters@195 228 * pow(static_cast<float>(iCount+1) - m_pMu[c], 2)
tomwalters@195 229 / m_fParamVar) * m_pA[c];
tomwalters@0 230 }
tomwalters@0 231 }
tomwalters@0 232
tomwalters@0 233 for (int i = 0; i < iSizeX * iNComponents; ++i) {
tomwalters@0 234 pP_comp[i] = 0.0f;
tomwalters@0 235 }
tomwalters@0 236
tomwalters@0 237 for (int i = 0; i < iNComponents; i++) {
tomwalters@0 238 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@0 239 pP_comp[iCount + i * iSizeX] =
tomwalters@0 240 1.0f / sqrt(2.0f * M_PI * m_fParamVar)
tomwalters@8 241 * exp((-0.5f) * pow((static_cast<float>(iCount + 1) - m_pMu[i]), 2)
tomwalters@8 242 / m_fParamVar);
tomwalters@0 243 pP_comp[iCount + i * iSizeX] =
tomwalters@0 244 pP_comp[iCount + i * iSizeX] * m_pA[i] / pP_mod_X[iCount];
tomwalters@0 245 }
tomwalters@0 246 }
tomwalters@0 247
tomwalters@0 248 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@8 249 float fSum = 0.0f;
tomwalters@0 250 for (int i = 0; i < iNComponents; ++i) {
tomwalters@0 251 pP_comp[iCount+i*iSizeX] = pow(pP_comp[iCount + i * iSizeX],
tomwalters@8 252 m_fParamPosteriorExp); // expansion
tomwalters@0 253 fSum += pP_comp[iCount+i*iSizeX];
tomwalters@0 254 }
tomwalters@0 255 for (int i = 0; i < iNComponents; ++i)
tomwalters@0 256 pP_comp[iCount+i*iSizeX] = pP_comp[iCount + i * iSizeX] / fSum;
tomwalters@0 257 // renormalisation
tomwalters@0 258 }
tomwalters@0 259
tomwalters@0 260 for (int i = 0; i < iNComponents; ++i) {
tomwalters@0 261 pA_old[i] = m_pA[i];
tomwalters@0 262 m_pA[i] = 0.0f;
tomwalters@0 263 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@0 264 m_pA[i] += pP_comp[iCount + i * iSizeX] * m_pSpectralProfile[iCount];
tomwalters@0 265 }
tomwalters@0 266 }
tomwalters@0 267
tomwalters@0 268 // finish when already converged
tomwalters@8 269 float fPrdist = 0.0f;
tomwalters@0 270 for (int i = 0; i < iNComponents; ++i) {
tomwalters@0 271 fPrdist += pow((m_pA[i] - pA_old[i]), 2);
tomwalters@0 272 }
tomwalters@0 273 fPrdist /= iNComponents;
tomwalters@0 274
tomwalters@195 275 if (fPrdist < priors_converged_) {
tomwalters@8 276 // LOG_INFO("Converged!");
tomwalters@0 277 break;
tomwalters@0 278 }
tomwalters@8 279 // LOG_INFO("Didn't converge!");
tomwalters@2 280
tomwalters@0 281
tomwalters@0 282 // update means (positions)
tomwalters@0 283 for (int i = 0 ; i < iNComponents; ++i) {
tomwalters@8 284 float mu_old = m_pMu[i];
tomwalters@0 285 if (m_pA[i] > 0.0f) {
tomwalters@0 286 m_pMu[i] = 0.0f;
tomwalters@0 287 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@0 288 m_pMu[i] += m_pSpectralProfile[iCount]
tomwalters@8 289 * pP_comp[iCount + i * iSizeX]
tomwalters@8 290 * static_cast<float>(iCount + 1);
tomwalters@0 291 }
tomwalters@0 292 m_pMu[i] /= m_pA[i];
tomwalters@0 293 if (isnan(m_pMu[i])) {
tomwalters@0 294 m_pMu[i] = mu_old;
tomwalters@0 295 }
tomwalters@0 296 }
tomwalters@0 297 }
tomwalters@8 298 } // loop over iterations
tomwalters@0 299
tomwalters@0 300 // Ensure they are sorted, using a really simple bubblesort
tomwalters@0 301 bool bSorted = false;
tomwalters@0 302 while (!bSorted) {
tomwalters@0 303 bSorted = true;
tomwalters@0 304 for (int i = 0; i < iNComponents - 1; ++i) {
tomwalters@0 305 if (m_pMu[i] > m_pMu[i + 1]) {
tomwalters@8 306 float fTemp = m_pMu[i];
tomwalters@0 307 m_pMu[i] = m_pMu[i + 1];
tomwalters@0 308 m_pMu[i + 1] = fTemp;
tomwalters@0 309 fTemp = m_pA[i];
tomwalters@0 310 m_pA[i] = m_pA[i + 1];
tomwalters@0 311 m_pA[i + 1] = fTemp;
tomwalters@0 312 bSorted = false;
tomwalters@0 313 }
tomwalters@0 314 }
tomwalters@0 315 }
tomwalters@0 316 return true;
tomwalters@0 317 }
tomwalters@8 318 } // namespace aimc
tomwalters@0 319