annotate trunk/src/Modules/Features/ModuleGaussians.cc @ 706:f8e90b5d85fd tip

Delete CARFAC code from this repository. It has been moved to https://github.com/google/carfac Please email me with your github username to get access. I've also created a new mailing list to discuss CARFAC development: https://groups.google.com/forum/#!forum/carfac-dev
author ronw@google.com
date Thu, 18 Jul 2013 20:56:51 +0000
parents 99f9bf0f7798
children
rev   line source
tomwalters@268 1 // Copyright 2008-2010, Thomas Walters
tomwalters@268 2 //
tomwalters@268 3 // AIM-C: A C++ implementation of the Auditory Image Model
tomwalters@268 4 // http://www.acousticscale.org/AIMC
tomwalters@268 5 //
tomwalters@318 6 // Licensed under the Apache License, Version 2.0 (the "License");
tomwalters@318 7 // you may not use this file except in compliance with the License.
tomwalters@318 8 // You may obtain a copy of the License at
tomwalters@268 9 //
tomwalters@318 10 // http://www.apache.org/licenses/LICENSE-2.0
tomwalters@268 11 //
tomwalters@318 12 // Unless required by applicable law or agreed to in writing, software
tomwalters@318 13 // distributed under the License is distributed on an "AS IS" BASIS,
tomwalters@318 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
tomwalters@318 15 // See the License for the specific language governing permissions and
tomwalters@318 16 // limitations under the License.
tomwalters@268 17
tomwalters@268 18 /*! \file
tomwalters@268 19 * \brief Gaussian features - based on MATLAB code by Christian Feldbauer
tomwalters@268 20 */
tomwalters@268 21
tomwalters@268 22 /*!
tomwalters@273 23 * \author Thomas Walters <tom@acousticscale.org>
tomwalters@268 24 * \date created 2008/06/23
tomwalters@296 25 * \version \$Id$
tomwalters@268 26 */
tomwalters@268 27
tomwalters@268 28 #include <math.h>
tomwalters@268 29
tom@440 30 #ifdef _WINDOWS
tomwalters@427 31 #include <float.h>
tomwalters@427 32 #endif
tomwalters@427 33
tomwalters@268 34 #include "Modules/Features/ModuleGaussians.h"
tomwalters@268 35 #include "Support/Common.h"
tomwalters@268 36
tomwalters@268 37 namespace aimc {
tomwalters@278 38 ModuleGaussians::ModuleGaussians(Parameters *params) : Module(params) {
tomwalters@268 39 // Set module metadata
tomwalters@268 40 module_description_ = "Gaussian Fitting to SSI profile";
tomwalters@273 41 module_identifier_ = "gaussians";
tomwalters@268 42 module_type_ = "features";
tomwalters@296 43 module_version_ = "$Id$";
tomwalters@268 44
tomwalters@365 45 m_iParamNComp = parameters_->DefaultInt("gaussians.ncomp", 4);
tomwalters@365 46 m_fParamVar = parameters_->DefaultFloat("gaussians.var", 115.0);
tomwalters@365 47 m_fParamPosteriorExp = parameters_->DefaultFloat("gaussians.posterior_exp",
tomwalters@365 48 6.0);
tomwalters@365 49 m_iParamMaxIt = parameters_->DefaultInt("gaussians.maxit", 250);
tomwalters@268 50
tomwalters@273 51 // The parameters system doesn't support tiny numbers well, to define this
tomwalters@273 52 // variable as a string, then convert it to a float afterwards
tomwalters@365 53 parameters_->DefaultString("gaussians.priors_converged", "1e-7");
tomwalters@365 54 priors_converged_ = parameters_->GetFloat("gaussians.priors_converged");
tomwalters@365 55 output_positions_ = parameters_->DefaultBool("gaussians.positions", false);
tomwalters@268 56 }
tomwalters@268 57
tomwalters@268 58 ModuleGaussians::~ModuleGaussians() {
tomwalters@268 59 }
tomwalters@268 60
tomwalters@268 61 bool ModuleGaussians::InitializeInternal(const SignalBank &input) {
tomwalters@268 62 m_pA.resize(m_iParamNComp, 0.0f);
tomwalters@268 63 m_pMu.resize(m_iParamNComp, 0.0f);
tomwalters@268 64
tomwalters@268 65 // Assuming the number of channels is greater than twice the number of
tomwalters@268 66 // Gaussian components, this is ok
tomwalters@365 67 output_component_count_ = 1; // Energy component
tomwalters@268 68 if (input.channel_count() >= 2 * m_iParamNComp) {
tomwalters@365 69 output_component_count_ += (m_iParamNComp - 1);
tomwalters@268 70 } else {
tomwalters@268 71 LOG_ERROR(_T("Too few channels in filterbank to produce sensible "
tomwalters@268 72 "Gaussian features. Either increase the number of filterbank"
tomwalters@268 73 " channels, or decrease the number of Gaussian components"));
tomwalters@268 74 return false;
tomwalters@268 75 }
tomwalters@268 76
tomwalters@365 77 if (output_positions_) {
tomwalters@365 78 output_component_count_ += m_iParamNComp;
tomwalters@365 79 }
tomwalters@365 80
tomwalters@365 81 output_.Initialize(output_component_count_, 1, input.sample_rate());
tomwalters@365 82
tomwalters@268 83 m_iNumChannels = input.channel_count();
tomwalters@268 84 m_pSpectralProfile.resize(m_iNumChannels, 0.0f);
tomwalters@268 85
tomwalters@268 86 return true;
tomwalters@268 87 }
tomwalters@268 88
tomwalters@275 89 void ModuleGaussians::ResetInternal() {
tomwalters@268 90 m_pSpectralProfile.clear();
tomwalters@268 91 m_pSpectralProfile.resize(m_iNumChannels, 0.0f);
tomwalters@292 92 m_pA.clear();
tomwalters@292 93 m_pA.resize(m_iParamNComp, 0.0f);
tomwalters@292 94 m_pMu.clear();
tomwalters@292 95 m_pMu.resize(m_iParamNComp, 0.0f);
tomwalters@268 96 }
tomwalters@268 97
tomwalters@268 98 void ModuleGaussians::Process(const SignalBank &input) {
tomwalters@273 99 if (!initialized_) {
tomwalters@273 100 LOG_ERROR(_T("Module ModuleGaussians not initialized."));
tomwalters@273 101 return;
tomwalters@273 102 }
tom@421 103 output_.set_start_time(input.start_time());
tomwalters@268 104 // Calculate spectral profile
tomwalters@365 105 for (int ch = 0; ch < input.channel_count(); ++ch) {
tomwalters@365 106 m_pSpectralProfile[ch] = 0.0f;
tomwalters@365 107 for (int i = 0; i < input.buffer_length(); ++i) {
tomwalters@365 108 m_pSpectralProfile[ch] += input[ch][i];
tomwalters@268 109 }
tomwalters@365 110 m_pSpectralProfile[ch] /= static_cast<float>(input.buffer_length());
tomwalters@273 111 }
tomwalters@273 112
tomwalters@280 113 float spectral_profile_sum = 0.0f;
tomwalters@273 114 for (int i = 0; i < input.channel_count(); ++i) {
tomwalters@273 115 spectral_profile_sum += m_pSpectralProfile[i];
tomwalters@273 116 }
tomwalters@273 117
tomwalters@365 118 // Set the last component of the feature vector to be the log energy
tomwalters@280 119 float logsum = log(spectral_profile_sum);
tomwalters@273 120 if (!isinf(logsum)) {
tomwalters@365 121 output_.set_sample(output_component_count_ - 1, 0, logsum);
tomwalters@273 122 } else {
tomwalters@365 123 output_.set_sample(output_component_count_ - 1, 0, -1000.0);
tomwalters@268 124 }
tomwalters@268 125
tomwalters@365 126 for (int ch = 0; ch < input.channel_count(); ++ch) {
tomwalters@427 127 m_pSpectralProfile[ch] = pow(m_pSpectralProfile[ch], 0.8f);
tomwalters@268 128 }
tomwalters@268 129
tomwalters@268 130 RubberGMMCore(2, true);
tomwalters@268 131
tomwalters@365 132 float mean1 = m_pMu[0];
tomwalters@365 133 float mean2 = m_pMu[1];
tomwalters@280 134 // LOG_INFO(_T("Orig. mean 0 = %f"), m_pMu[0]);
tomwalters@280 135 // LOG_INFO(_T("Orig. mean 1 = %f"), m_pMu[1]);
tomwalters@280 136 // LOG_INFO(_T("Orig. prob 0 = %f"), m_pA[0]);
tomwalters@280 137 // LOG_INFO(_T("Orig. prob 1 = %f"), m_pA[1]);
tomwalters@268 138
tomwalters@365 139 float a1 = 0.05 * m_pA[0];
tomwalters@365 140 float a2 = 1.0 - 0.25 * m_pA[1];
tomwalters@268 141
tomwalters@280 142 // LOG_INFO(_T("fA1 = %f"), fA1);
tomwalters@280 143 // LOG_INFO(_T("fA2 = %f"), fA2);
tomwalters@274 144
tomwalters@365 145 float gradient = (mean2 - mean1) / (a2 - a1);
tomwalters@365 146 float intercept = mean2 - gradient * a2;
tomwalters@274 147
tomwalters@280 148 // LOG_INFO(_T("fGradient = %f"), fGradient);
tomwalters@280 149 // LOG_INFO(_T("fIntercept = %f"), fIntercept);
tomwalters@268 150
tomwalters@268 151 for (int i = 0; i < m_iParamNComp; ++i) {
tomwalters@280 152 m_pMu[i] = (static_cast<float>(i)
tomwalters@280 153 / (static_cast<float>(m_iParamNComp) - 1.0f))
tomwalters@365 154 * gradient + intercept;
tomwalters@280 155 // LOG_INFO(_T("mean %d = %f"), i, m_pMu[i]);
tomwalters@268 156 }
tomwalters@268 157
tomwalters@268 158 for (int i = 0; i < m_iParamNComp; ++i) {
tomwalters@280 159 m_pA[i] = 1.0f / static_cast<float>(m_iParamNComp);
tomwalters@268 160 }
tomwalters@268 161
tomwalters@268 162 RubberGMMCore(m_iParamNComp, false);
tomwalters@268 163
tomwalters@365 164 // Amplitudes first
tomwalters@268 165 for (int i = 0; i < m_iParamNComp - 1; ++i) {
tomwalters@268 166 if (!isnan(m_pA[i])) {
tomwalters@268 167 output_.set_sample(i, 0, m_pA[i]);
tomwalters@268 168 } else {
tomwalters@268 169 output_.set_sample(i, 0, 0.0f);
tomwalters@268 170 }
tomwalters@268 171 }
tomwalters@273 172
tomwalters@365 173 // Then means if required
tomwalters@365 174 if (output_positions_) {
tomwalters@365 175 int idx = 0;
tomwalters@365 176 for (int i = m_iParamNComp - 1; i < 2 * m_iParamNComp - 1; ++i) {
tomwalters@365 177 if (!isnan(m_pMu[i])) {
tomwalters@365 178 output_.set_sample(i, 0, m_pMu[idx]);
tomwalters@365 179 } else {
tomwalters@365 180 output_.set_sample(i, 0, 0.0f);
tomwalters@365 181 }
tomwalters@365 182 ++idx;
tomwalters@365 183 }
tomwalters@365 184 }
tomwalters@365 185
tomwalters@268 186 PushOutput();
tomwalters@268 187 }
tomwalters@268 188
tomwalters@268 189 bool ModuleGaussians::RubberGMMCore(int iNComponents, bool bDoInit) {
tomwalters@268 190 int iSizeX = m_iNumChannels;
tomwalters@268 191
tomwalters@268 192 // Normalise the spectral profile
tomwalters@365 193 float SpectralProfileTotal = 0.0f;
tomwalters@268 194 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@365 195 SpectralProfileTotal += m_pSpectralProfile[iCount];
tomwalters@268 196 }
tomwalters@268 197 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@365 198 m_pSpectralProfile[iCount] /= SpectralProfileTotal;
tomwalters@268 199 }
tomwalters@268 200
tomwalters@268 201 if (bDoInit) {
tomwalters@268 202 // Uniformly spaced components
tomwalters@280 203 float dd = (iSizeX - 1.0f) / iNComponents;
tomwalters@268 204 for (int i = 0; i < iNComponents; i++) {
tomwalters@268 205 m_pMu[i] = dd / 2.0f + (i * dd);
tomwalters@268 206 m_pA[i] = 1.0f / iNComponents;
tomwalters@268 207 }
tomwalters@268 208 }
tomwalters@268 209
tomwalters@280 210 vector<float> pA_old;
tomwalters@268 211 pA_old.resize(iNComponents);
tomwalters@280 212 vector<float> pP_mod_X;
tomwalters@268 213 pP_mod_X.resize(iSizeX);
tomwalters@280 214 vector<float> pP_comp;
tomwalters@268 215 pP_comp.resize(iSizeX * iNComponents);
tomwalters@268 216
tomwalters@268 217 for (int iIteration = 0; iIteration < m_iParamMaxIt; iIteration++) {
tomwalters@268 218 // (re)calculate posteriors (component probability given observation)
tomwalters@268 219 // denominator: the model density at all observation points X
tomwalters@268 220 for (int i = 0; i < iSizeX; ++i) {
tomwalters@268 221 pP_mod_X[i] = 0.0f;
tomwalters@268 222 }
tomwalters@268 223
tomwalters@365 224 for (int c = 0; c < iNComponents; c++) {
tomwalters@268 225 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@268 226 pP_mod_X[iCount] += 1.0f / sqrt(2.0f * M_PI * m_fParamVar)
tomwalters@280 227 * exp((-0.5f)
tomwalters@365 228 * pow(static_cast<float>(iCount+1) - m_pMu[c], 2)
tomwalters@365 229 / m_fParamVar) * m_pA[c];
tomwalters@268 230 }
tomwalters@268 231 }
tomwalters@268 232
tomwalters@268 233 for (int i = 0; i < iSizeX * iNComponents; ++i) {
tomwalters@268 234 pP_comp[i] = 0.0f;
tomwalters@268 235 }
tomwalters@268 236
tomwalters@268 237 for (int i = 0; i < iNComponents; i++) {
tomwalters@268 238 for (int iCount = 0; iCount < iSizeX; iCount++) {
tomwalters@268 239 pP_comp[iCount + i * iSizeX] =
tomwalters@268 240 1.0f / sqrt(2.0f * M_PI * m_fParamVar)
tomwalters@280 241 * exp((-0.5f) * pow((static_cast<float>(iCount + 1) - m_pMu[i]), 2)
tomwalters@280 242 / m_fParamVar);
tomwalters@268 243 pP_comp[iCount + i * iSizeX] =
tomwalters@268 244 pP_comp[iCount + i * iSizeX] * m_pA[i] / pP_mod_X[iCount];
tomwalters@268 245 }
tomwalters@268 246 }
tomwalters@268 247
tomwalters@268 248 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@280 249 float fSum = 0.0f;
tomwalters@268 250 for (int i = 0; i < iNComponents; ++i) {
tomwalters@268 251 pP_comp[iCount+i*iSizeX] = pow(pP_comp[iCount + i * iSizeX],
tomwalters@280 252 m_fParamPosteriorExp); // expansion
tomwalters@268 253 fSum += pP_comp[iCount+i*iSizeX];
tomwalters@268 254 }
tomwalters@268 255 for (int i = 0; i < iNComponents; ++i)
tomwalters@268 256 pP_comp[iCount+i*iSizeX] = pP_comp[iCount + i * iSizeX] / fSum;
tomwalters@268 257 // renormalisation
tomwalters@268 258 }
tomwalters@268 259
tomwalters@268 260 for (int i = 0; i < iNComponents; ++i) {
tomwalters@268 261 pA_old[i] = m_pA[i];
tomwalters@268 262 m_pA[i] = 0.0f;
tomwalters@268 263 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@268 264 m_pA[i] += pP_comp[iCount + i * iSizeX] * m_pSpectralProfile[iCount];
tomwalters@268 265 }
tomwalters@268 266 }
tomwalters@268 267
tomwalters@268 268 // finish when already converged
tomwalters@280 269 float fPrdist = 0.0f;
tomwalters@268 270 for (int i = 0; i < iNComponents; ++i) {
tomwalters@268 271 fPrdist += pow((m_pA[i] - pA_old[i]), 2);
tomwalters@268 272 }
tomwalters@268 273 fPrdist /= iNComponents;
tomwalters@268 274
tomwalters@365 275 if (fPrdist < priors_converged_) {
tomwalters@280 276 // LOG_INFO("Converged!");
tomwalters@268 277 break;
tomwalters@268 278 }
tomwalters@280 279 // LOG_INFO("Didn't converge!");
tomwalters@274 280
tomwalters@268 281
tomwalters@268 282 // update means (positions)
tomwalters@268 283 for (int i = 0 ; i < iNComponents; ++i) {
tomwalters@280 284 float mu_old = m_pMu[i];
tomwalters@268 285 if (m_pA[i] > 0.0f) {
tomwalters@268 286 m_pMu[i] = 0.0f;
tomwalters@268 287 for (int iCount = 0; iCount < iSizeX; ++iCount) {
tomwalters@268 288 m_pMu[i] += m_pSpectralProfile[iCount]
tomwalters@280 289 * pP_comp[iCount + i * iSizeX]
tomwalters@280 290 * static_cast<float>(iCount + 1);
tomwalters@268 291 }
tomwalters@268 292 m_pMu[i] /= m_pA[i];
tomwalters@268 293 if (isnan(m_pMu[i])) {
tomwalters@268 294 m_pMu[i] = mu_old;
tomwalters@268 295 }
tomwalters@268 296 }
tomwalters@268 297 }
tomwalters@280 298 } // loop over iterations
tomwalters@268 299
tomwalters@268 300 // Ensure they are sorted, using a really simple bubblesort
tomwalters@268 301 bool bSorted = false;
tomwalters@268 302 while (!bSorted) {
tomwalters@268 303 bSorted = true;
tomwalters@268 304 for (int i = 0; i < iNComponents - 1; ++i) {
tomwalters@268 305 if (m_pMu[i] > m_pMu[i + 1]) {
tomwalters@280 306 float fTemp = m_pMu[i];
tomwalters@268 307 m_pMu[i] = m_pMu[i + 1];
tomwalters@268 308 m_pMu[i + 1] = fTemp;
tomwalters@268 309 fTemp = m_pA[i];
tomwalters@268 310 m_pA[i] = m_pA[i + 1];
tomwalters@268 311 m_pA[i + 1] = fTemp;
tomwalters@268 312 bSorted = false;
tomwalters@268 313 }
tomwalters@268 314 }
tomwalters@268 315 }
tomwalters@268 316 return true;
tomwalters@268 317 }
tomwalters@280 318 } // namespace aimc
tomwalters@268 319