Mercurial > hg > aimc
comparison src/Modules/Features/ModuleGaussians.cc @ 84:bee31e7ebf4b
- Added as-yet-unfinished support for a proper configuraiton file format
- Added a couple of pythin scripts to generate HMM configuration files
- Variable name changes and other cosmetic things
- Added the option for the noise generation to do pink noise (untested)
author | tomwalters |
---|---|
date | Thu, 12 Aug 2010 11:28:11 +0000 |
parents | e914b02b31b0 |
children | 4abed4cf1e87 |
comparison
equal
deleted
inserted
replaced
83:9c3cac29f300 | 84:bee31e7ebf4b |
---|---|
36 module_description_ = "Gaussian Fitting to SSI profile"; | 36 module_description_ = "Gaussian Fitting to SSI profile"; |
37 module_identifier_ = "gaussians"; | 37 module_identifier_ = "gaussians"; |
38 module_type_ = "features"; | 38 module_type_ = "features"; |
39 module_version_ = "$Id$"; | 39 module_version_ = "$Id$"; |
40 | 40 |
41 m_iParamNComp = parameters_->DefaultInt("features.gaussians.ncomp", 4); | 41 m_iParamNComp = parameters_->DefaultInt("gaussians.ncomp", 4); |
42 m_fParamVar = parameters_->DefaultFloat("features.gaussians.var", 115.0); | 42 m_fParamVar = parameters_->DefaultFloat("gaussians.var", 115.0); |
43 m_fParamPosteriorExp = | 43 m_fParamPosteriorExp = parameters_->DefaultFloat("gaussians.posterior_exp", |
44 parameters_->DefaultFloat("features.gaussians.posterior_exp", 6.0); | 44 6.0); |
45 m_iParamMaxIt = parameters_->DefaultInt("features.gaussians.maxit", 250); | 45 m_iParamMaxIt = parameters_->DefaultInt("gaussians.maxit", 250); |
46 | 46 |
47 // The parameters system doesn't support tiny numbers well, to define this | 47 // The parameters system doesn't support tiny numbers well, to define this |
48 // variable as a string, then convert it to a float afterwards | 48 // variable as a string, then convert it to a float afterwards |
49 parameters_->DefaultString("features.gaussians.priors_converged", "1e-7"); | 49 parameters_->DefaultString("gaussians.priors_converged", "1e-7"); |
50 m_fParamPriorsConverged = | 50 priors_converged_ = parameters_->GetFloat("gaussians.priors_converged"); |
51 parameters_->GetFloat("features.gaussians.priors_converged"); | 51 output_positions_ = parameters_->DefaultBool("gaussians.positions", false); |
52 } | 52 } |
53 | 53 |
54 ModuleGaussians::~ModuleGaussians() { | 54 ModuleGaussians::~ModuleGaussians() { |
55 } | 55 } |
56 | 56 |
58 m_pA.resize(m_iParamNComp, 0.0f); | 58 m_pA.resize(m_iParamNComp, 0.0f); |
59 m_pMu.resize(m_iParamNComp, 0.0f); | 59 m_pMu.resize(m_iParamNComp, 0.0f); |
60 | 60 |
61 // Assuming the number of channels is greater than twice the number of | 61 // Assuming the number of channels is greater than twice the number of |
62 // Gaussian components, this is ok | 62 // Gaussian components, this is ok |
63 output_component_count_ = 1; // Energy component | |
63 if (input.channel_count() >= 2 * m_iParamNComp) { | 64 if (input.channel_count() >= 2 * m_iParamNComp) { |
64 output_.Initialize(m_iParamNComp, 1, input.sample_rate()); | 65 output_component_count_ += (m_iParamNComp - 1); |
65 } else { | 66 } else { |
66 LOG_ERROR(_T("Too few channels in filterbank to produce sensible " | 67 LOG_ERROR(_T("Too few channels in filterbank to produce sensible " |
67 "Gaussian features. Either increase the number of filterbank" | 68 "Gaussian features. Either increase the number of filterbank" |
68 " channels, or decrease the number of Gaussian components")); | 69 " channels, or decrease the number of Gaussian components")); |
69 return false; | 70 return false; |
70 } | 71 } |
72 | |
73 if (output_positions_) { | |
74 output_component_count_ += m_iParamNComp; | |
75 } | |
76 | |
77 output_.Initialize(output_component_count_, 1, input.sample_rate()); | |
71 | 78 |
72 m_iNumChannels = input.channel_count(); | 79 m_iNumChannels = input.channel_count(); |
73 m_pSpectralProfile.resize(m_iNumChannels, 0.0f); | 80 m_pSpectralProfile.resize(m_iNumChannels, 0.0f); |
74 | 81 |
75 return true; | 82 return true; |
88 if (!initialized_) { | 95 if (!initialized_) { |
89 LOG_ERROR(_T("Module ModuleGaussians not initialized.")); | 96 LOG_ERROR(_T("Module ModuleGaussians not initialized.")); |
90 return; | 97 return; |
91 } | 98 } |
92 // Calculate spectral profile | 99 // Calculate spectral profile |
93 for (int iChannel = 0; | 100 for (int ch = 0; ch < input.channel_count(); ++ch) { |
94 iChannel < input.channel_count(); | 101 m_pSpectralProfile[ch] = 0.0f; |
95 ++iChannel) { | 102 for (int i = 0; i < input.buffer_length(); ++i) { |
96 m_pSpectralProfile[iChannel] = 0.0f; | 103 m_pSpectralProfile[ch] += input[ch][i]; |
97 for (int iSample = 0; | 104 } |
98 iSample < input.buffer_length(); | 105 m_pSpectralProfile[ch] /= static_cast<float>(input.buffer_length()); |
99 ++iSample) { | |
100 m_pSpectralProfile[iChannel] += input[iChannel][iSample]; | |
101 } | |
102 m_pSpectralProfile[iChannel] /= static_cast<float>(input.buffer_length()); | |
103 } | 106 } |
104 | 107 |
105 float spectral_profile_sum = 0.0f; | 108 float spectral_profile_sum = 0.0f; |
106 for (int i = 0; i < input.channel_count(); ++i) { | 109 for (int i = 0; i < input.channel_count(); ++i) { |
107 spectral_profile_sum += m_pSpectralProfile[i]; | 110 spectral_profile_sum += m_pSpectralProfile[i]; |
108 } | 111 } |
109 | 112 |
113 // Set the last component of the feature vector to be the log energy | |
110 float logsum = log(spectral_profile_sum); | 114 float logsum = log(spectral_profile_sum); |
111 if (!isinf(logsum)) { | 115 if (!isinf(logsum)) { |
112 output_.set_sample(m_iParamNComp - 1, 0, logsum); | 116 output_.set_sample(output_component_count_ - 1, 0, logsum); |
113 } else { | 117 } else { |
114 output_.set_sample(m_iParamNComp - 1, 0, -1000.0); | 118 output_.set_sample(output_component_count_ - 1, 0, -1000.0); |
115 } | 119 } |
116 | 120 |
117 for (int iChannel = 0; | 121 for (int ch = 0; ch < input.channel_count(); ++ch) { |
118 iChannel < input.channel_count(); | 122 m_pSpectralProfile[ch] = pow(m_pSpectralProfile[ch], 0.8); |
119 ++iChannel) { | |
120 m_pSpectralProfile[iChannel] = pow(m_pSpectralProfile[iChannel], 0.8f); | |
121 } | 123 } |
122 | 124 |
123 RubberGMMCore(2, true); | 125 RubberGMMCore(2, true); |
124 | 126 |
125 float fMean1 = m_pMu[0]; | 127 float mean1 = m_pMu[0]; |
126 float fMean2 = m_pMu[1]; | 128 float mean2 = m_pMu[1]; |
127 // LOG_INFO(_T("Orig. mean 0 = %f"), m_pMu[0]); | 129 // LOG_INFO(_T("Orig. mean 0 = %f"), m_pMu[0]); |
128 // LOG_INFO(_T("Orig. mean 1 = %f"), m_pMu[1]); | 130 // LOG_INFO(_T("Orig. mean 1 = %f"), m_pMu[1]); |
129 // LOG_INFO(_T("Orig. prob 0 = %f"), m_pA[0]); | 131 // LOG_INFO(_T("Orig. prob 0 = %f"), m_pA[0]); |
130 // LOG_INFO(_T("Orig. prob 1 = %f"), m_pA[1]); | 132 // LOG_INFO(_T("Orig. prob 1 = %f"), m_pA[1]); |
131 | 133 |
132 float fA1 = 0.05 * m_pA[0]; | 134 float a1 = 0.05 * m_pA[0]; |
133 float fA2 = 1.0 - 0.25 * m_pA[1]; | 135 float a2 = 1.0 - 0.25 * m_pA[1]; |
134 | 136 |
135 // LOG_INFO(_T("fA1 = %f"), fA1); | 137 // LOG_INFO(_T("fA1 = %f"), fA1); |
136 // LOG_INFO(_T("fA2 = %f"), fA2); | 138 // LOG_INFO(_T("fA2 = %f"), fA2); |
137 | 139 |
138 float fGradient = (fMean2 - fMean1) / (fA2 - fA1); | 140 float gradient = (mean2 - mean1) / (a2 - a1); |
139 float fIntercept = fMean2 - fGradient * fA2; | 141 float intercept = mean2 - gradient * a2; |
140 | 142 |
141 // LOG_INFO(_T("fGradient = %f"), fGradient); | 143 // LOG_INFO(_T("fGradient = %f"), fGradient); |
142 // LOG_INFO(_T("fIntercept = %f"), fIntercept); | 144 // LOG_INFO(_T("fIntercept = %f"), fIntercept); |
143 | 145 |
144 for (int i = 0; i < m_iParamNComp; ++i) { | 146 for (int i = 0; i < m_iParamNComp; ++i) { |
145 m_pMu[i] = (static_cast<float>(i) | 147 m_pMu[i] = (static_cast<float>(i) |
146 / (static_cast<float>(m_iParamNComp) - 1.0f)) | 148 / (static_cast<float>(m_iParamNComp) - 1.0f)) |
147 * fGradient + fIntercept; | 149 * gradient + intercept; |
148 // LOG_INFO(_T("mean %d = %f"), i, m_pMu[i]); | 150 // LOG_INFO(_T("mean %d = %f"), i, m_pMu[i]); |
149 } | 151 } |
150 | 152 |
151 for (int i = 0; i < m_iParamNComp; ++i) { | 153 for (int i = 0; i < m_iParamNComp; ++i) { |
152 m_pA[i] = 1.0f / static_cast<float>(m_iParamNComp); | 154 m_pA[i] = 1.0f / static_cast<float>(m_iParamNComp); |
153 } | 155 } |
154 | 156 |
155 RubberGMMCore(m_iParamNComp, false); | 157 RubberGMMCore(m_iParamNComp, false); |
156 | 158 |
159 // Amplitudes first | |
157 for (int i = 0; i < m_iParamNComp - 1; ++i) { | 160 for (int i = 0; i < m_iParamNComp - 1; ++i) { |
158 if (!isnan(m_pA[i])) { | 161 if (!isnan(m_pA[i])) { |
159 output_.set_sample(i, 0, m_pA[i]); | 162 output_.set_sample(i, 0, m_pA[i]); |
160 } else { | 163 } else { |
161 output_.set_sample(i, 0, 0.0f); | 164 output_.set_sample(i, 0, 0.0f); |
162 } | 165 } |
163 } | 166 } |
164 | 167 |
168 // Then means if required | |
169 if (output_positions_) { | |
170 int idx = 0; | |
171 for (int i = m_iParamNComp - 1; i < 2 * m_iParamNComp - 1; ++i) { | |
172 if (!isnan(m_pMu[i])) { | |
173 output_.set_sample(i, 0, m_pMu[idx]); | |
174 } else { | |
175 output_.set_sample(i, 0, 0.0f); | |
176 } | |
177 ++idx; | |
178 } | |
179 } | |
180 | |
165 PushOutput(); | 181 PushOutput(); |
166 } | 182 } |
167 | 183 |
168 bool ModuleGaussians::RubberGMMCore(int iNComponents, bool bDoInit) { | 184 bool ModuleGaussians::RubberGMMCore(int iNComponents, bool bDoInit) { |
169 int iSizeX = m_iNumChannels; | 185 int iSizeX = m_iNumChannels; |
170 | 186 |
171 // Normalise the spectral profile | 187 // Normalise the spectral profile |
172 float fSpectralProfileTotal = 0.0f; | 188 float SpectralProfileTotal = 0.0f; |
173 for (int iCount = 0; iCount < iSizeX; iCount++) { | 189 for (int iCount = 0; iCount < iSizeX; iCount++) { |
174 fSpectralProfileTotal += m_pSpectralProfile[iCount]; | 190 SpectralProfileTotal += m_pSpectralProfile[iCount]; |
175 } | 191 } |
176 for (int iCount = 0; iCount < iSizeX; iCount++) { | 192 for (int iCount = 0; iCount < iSizeX; iCount++) { |
177 m_pSpectralProfile[iCount] /= fSpectralProfileTotal; | 193 m_pSpectralProfile[iCount] /= SpectralProfileTotal; |
178 } | 194 } |
179 | 195 |
180 if (bDoInit) { | 196 if (bDoInit) { |
181 // Uniformly spaced components | 197 // Uniformly spaced components |
182 float dd = (iSizeX - 1.0f) / iNComponents; | 198 float dd = (iSizeX - 1.0f) / iNComponents; |
198 // denominator: the model density at all observation points X | 214 // denominator: the model density at all observation points X |
199 for (int i = 0; i < iSizeX; ++i) { | 215 for (int i = 0; i < iSizeX; ++i) { |
200 pP_mod_X[i] = 0.0f; | 216 pP_mod_X[i] = 0.0f; |
201 } | 217 } |
202 | 218 |
203 for (int i = 0; i < iNComponents; i++) { | 219 for (int c = 0; c < iNComponents; c++) { |
204 for (int iCount = 0; iCount < iSizeX; iCount++) { | 220 for (int iCount = 0; iCount < iSizeX; iCount++) { |
205 pP_mod_X[iCount] += 1.0f / sqrt(2.0f * M_PI * m_fParamVar) | 221 pP_mod_X[iCount] += 1.0f / sqrt(2.0f * M_PI * m_fParamVar) |
206 * exp((-0.5f) | 222 * exp((-0.5f) |
207 * pow(static_cast<float>(iCount+1) - m_pMu[i], 2) | 223 * pow(static_cast<float>(iCount+1) - m_pMu[c], 2) |
208 / m_fParamVar) * m_pA[i]; | 224 / m_fParamVar) * m_pA[c]; |
209 } | 225 } |
210 } | 226 } |
211 | 227 |
212 for (int i = 0; i < iSizeX * iNComponents; ++i) { | 228 for (int i = 0; i < iSizeX * iNComponents; ++i) { |
213 pP_comp[i] = 0.0f; | 229 pP_comp[i] = 0.0f; |
249 for (int i = 0; i < iNComponents; ++i) { | 265 for (int i = 0; i < iNComponents; ++i) { |
250 fPrdist += pow((m_pA[i] - pA_old[i]), 2); | 266 fPrdist += pow((m_pA[i] - pA_old[i]), 2); |
251 } | 267 } |
252 fPrdist /= iNComponents; | 268 fPrdist /= iNComponents; |
253 | 269 |
254 if (fPrdist < m_fParamPriorsConverged) { | 270 if (fPrdist < priors_converged_) { |
255 // LOG_INFO("Converged!"); | 271 // LOG_INFO("Converged!"); |
256 break; | 272 break; |
257 } | 273 } |
258 // LOG_INFO("Didn't converge!"); | 274 // LOG_INFO("Didn't converge!"); |
259 | 275 |