Mercurial > hg > aimc
changeset 365:2f4530363f7a
- Added as-yet-unfinished support for a proper configuraiton file format
- Added a couple of pythin scripts to generate HMM configuration files
- Variable name changes and other cosmetic things
- Added the option for the noise generation to do pink noise (untested)
author | tomwalters |
---|---|
date | Thu, 12 Aug 2010 11:28:11 +0000 |
parents | 5a12172dcb73 |
children | 43bada33b705 |
files | trunk/experiments/scripts/cnbh-syllables/feature_generation/nap_profile_features.aimcopycfg trunk/experiments/scripts/cnbh-syllables/run_training_and_testing/gen_hhed_script.py trunk/experiments/scripts/cnbh-syllables/run_training_and_testing/gen_hmmproto.py trunk/src/Modules/Features/ModuleGaussians.cc trunk/src/Modules/Features/ModuleGaussians.h trunk/src/Modules/SNR/ModuleNoise.cc trunk/src/Modules/SNR/ModuleNoise.h trunk/src/Support/ModuleTree.cc trunk/src/Support/ModuleTree.h |
diffstat | 9 files changed, 333 insertions(+), 43 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/experiments/scripts/cnbh-syllables/feature_generation/nap_profile_features.aimcopycfg Thu Aug 12 11:28:11 2010 +0000 @@ -0,0 +1,32 @@ +module1.name = FileInput +module1.id = file_input +module1.parameters = <<<ENDPARAMS +input.buffersize = 512 +ENDPARAMS +module1.child1 = GammatoneFilterbank + +module2.name = GammatoneFilterbank +module2.id = gt +module2.child1 = StandardNAP +module2.child2 = SmoothNAP + +module3.name = SmoothNAP +module3.id = nap +module3.parameters = <<<ENDPARAMS +nap.lowpass_cutoff = 100.0 +ENDPARAMS +module3.child1 = SmoothNAPProfile + +module4.name = SmoothNAPProfile +module4.id = slice +module4.parameters = <<<ENDPARAMS +slice.all = true +ENDPARAMS +module4.child1 = SmoothNAPFeatures + +module5.name = SmoothNAPFeatures +module5.id = gaussians +module5.child1 = SmoothNAPOutput + +moudule6.name = SmoothNAPOutput +module6.id = htk_out \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/experiments/scripts/cnbh-syllables/run_training_and_testing/gen_hhed_script.py Thu Aug 12 11:28:11 2010 +0000 @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +gen_hhed_script.py + +Created by Thomas Walters on 2010-07-08. +""" + +import sys +import getopt + + +help_message = ''' +Generate an HTK HHed script to change the number of means in the output +distribution to num_means for the emitting states of an HMM with +total_hmm_states states +''' + + +class Usage(Exception): + def __init__(self, msg): + self.msg = msg + + +def main(argv=None): + if argv is None: + argv = sys.argv + try: + try: + opts, args = getopt.getopt(argv[1:], "hn:s:v", ["help", "num_means=", "total_hmm_states="]) + except getopt.error, msg: + raise Usage(msg) + + # defaults + num_means = 3 + total_hmm_states = 6 + + # option processing + for option, value in opts: + if option == "-v": + verbose = True + if option in ("-h", "--help"): + raise Usage(help_message) + if option in ("-n", "--num_means"): + num_means = int(value) + if option in ("-s", "--total_hmm_states"): + total_hmm_states = int(value) + + except Usage, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + print >> sys.stderr, "\t for help use --help" + return 2 + + out_string = "" + for state in xrange(2, total_hmm_states): + out_string += ("MU " + str(num_means) + " {*.state[" + str(state) + "].mix} ") + print out_string + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/experiments/scripts/cnbh-syllables/run_training_and_testing/gen_hmmproto.py Thu Aug 12 11:28:11 2010 +0000 @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +gen_hmmproto.py + +Created by Thomas Walters on 2010-07-08. +""" + +import sys +import getopt + + +help_message = ''' +Generate an HTK HMM prototype with an input_size dimensional input and +total_hmm_states total HMM states (including start and end state) +The feature type string can be specified in feature_type +''' + + +class Usage(Exception): + def __init__(self, msg): + self.msg = msg + + +def main(argv=None): + if argv is None: + argv = sys.argv + try: + try: + opts, args = getopt.getopt(argv[1:], "hi:s:t:v", ["help", "input_size=", "total_hmm_states=", "feature_type="]) + except getopt.error, msg: + raise Usage(msg) + + # defaults + input_size = 39 + total_hmm_states = 6 + feature_type = "MFCC_0_D_A" + + # option processing + for option, value in opts: + if option == "-v": + verbose = True + if option in ("-h", "--help"): + raise Usage(help_message) + if option in ("-i", "--input_size"): + input_size = int(value) + if option in ("-s", "--total_hmm_states"): + total_hmm_states = int(value) + if option in ("-t", "--feature_type"): + feature_type = value + + except Usage, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + print >> sys.stderr, "\t for help use --help" + return 2 + + print "~o<VECSIZE> " + str(input_size) + "<NULLD>" + "<" + feature_type + ">" + print "~h \"proto\"" + print "<BEGINHMM>" + print "<NUMSTATES> " + str(total_hmm_states) + for state in xrange(2, total_hmm_states): + print "<State> " + str(state) + print "<Mean>" + str(input_size) + print "0 " * input_size + print "<Variance> " + str(input_size) + print "1.0 " * input_size + print + print "<TransP> " + str(total_hmm_states) + print "0.0 1.0 " + "0.0 " * (total_hmm_states - 2) + for state in xrange(1, total_hmm_states - 1): + print ("0.0 " * state) + "0.6 0.4 " + "0.0 " * (total_hmm_states -2 - state) + print "0.0 " * total_hmm_states + print "<EndHMM>" + +if __name__ == "__main__": + sys.exit(main())
--- a/trunk/src/Modules/Features/ModuleGaussians.cc Thu Aug 12 09:15:49 2010 +0000 +++ b/trunk/src/Modules/Features/ModuleGaussians.cc Thu Aug 12 11:28:11 2010 +0000 @@ -38,17 +38,17 @@ module_type_ = "features"; module_version_ = "$Id$"; - m_iParamNComp = parameters_->DefaultInt("features.gaussians.ncomp", 4); - m_fParamVar = parameters_->DefaultFloat("features.gaussians.var", 115.0); - m_fParamPosteriorExp = - parameters_->DefaultFloat("features.gaussians.posterior_exp", 6.0); - m_iParamMaxIt = parameters_->DefaultInt("features.gaussians.maxit", 250); + m_iParamNComp = parameters_->DefaultInt("gaussians.ncomp", 4); + m_fParamVar = parameters_->DefaultFloat("gaussians.var", 115.0); + m_fParamPosteriorExp = parameters_->DefaultFloat("gaussians.posterior_exp", + 6.0); + m_iParamMaxIt = parameters_->DefaultInt("gaussians.maxit", 250); // The parameters system doesn't support tiny numbers well, to define this // variable as a string, then convert it to a float afterwards - parameters_->DefaultString("features.gaussians.priors_converged", "1e-7"); - m_fParamPriorsConverged = - parameters_->GetFloat("features.gaussians.priors_converged"); + parameters_->DefaultString("gaussians.priors_converged", "1e-7"); + priors_converged_ = parameters_->GetFloat("gaussians.priors_converged"); + output_positions_ = parameters_->DefaultBool("gaussians.positions", false); } ModuleGaussians::~ModuleGaussians() { @@ -60,8 +60,9 @@ // Assuming the number of channels is greater than twice the number of // Gaussian components, this is ok + output_component_count_ = 1; // Energy component if (input.channel_count() >= 2 * m_iParamNComp) { - output_.Initialize(m_iParamNComp, 1, input.sample_rate()); + output_component_count_ += (m_iParamNComp - 1); } else { LOG_ERROR(_T("Too few channels in filterbank to produce sensible " "Gaussian features. Either increase the number of filterbank" @@ -69,6 +70,12 @@ return false; } + if (output_positions_) { + output_component_count_ += m_iParamNComp; + } + + output_.Initialize(output_component_count_, 1, input.sample_rate()); + m_iNumChannels = input.channel_count(); m_pSpectralProfile.resize(m_iNumChannels, 0.0f); @@ -90,16 +97,12 @@ return; } // Calculate spectral profile - for (int iChannel = 0; - iChannel < input.channel_count(); - ++iChannel) { - m_pSpectralProfile[iChannel] = 0.0f; - for (int iSample = 0; - iSample < input.buffer_length(); - ++iSample) { - m_pSpectralProfile[iChannel] += input[iChannel][iSample]; + for (int ch = 0; ch < input.channel_count(); ++ch) { + m_pSpectralProfile[ch] = 0.0f; + for (int i = 0; i < input.buffer_length(); ++i) { + m_pSpectralProfile[ch] += input[ch][i]; } - m_pSpectralProfile[iChannel] /= static_cast<float>(input.buffer_length()); + m_pSpectralProfile[ch] /= static_cast<float>(input.buffer_length()); } float spectral_profile_sum = 0.0f; @@ -107,36 +110,35 @@ spectral_profile_sum += m_pSpectralProfile[i]; } + // Set the last component of the feature vector to be the log energy float logsum = log(spectral_profile_sum); if (!isinf(logsum)) { - output_.set_sample(m_iParamNComp - 1, 0, logsum); + output_.set_sample(output_component_count_ - 1, 0, logsum); } else { - output_.set_sample(m_iParamNComp - 1, 0, -1000.0); + output_.set_sample(output_component_count_ - 1, 0, -1000.0); } - for (int iChannel = 0; - iChannel < input.channel_count(); - ++iChannel) { - m_pSpectralProfile[iChannel] = pow(m_pSpectralProfile[iChannel], 0.8f); + for (int ch = 0; ch < input.channel_count(); ++ch) { + m_pSpectralProfile[ch] = pow(m_pSpectralProfile[ch], 0.8); } RubberGMMCore(2, true); - float fMean1 = m_pMu[0]; - float fMean2 = m_pMu[1]; + float mean1 = m_pMu[0]; + float mean2 = m_pMu[1]; // LOG_INFO(_T("Orig. mean 0 = %f"), m_pMu[0]); // LOG_INFO(_T("Orig. mean 1 = %f"), m_pMu[1]); // LOG_INFO(_T("Orig. prob 0 = %f"), m_pA[0]); // LOG_INFO(_T("Orig. prob 1 = %f"), m_pA[1]); - float fA1 = 0.05 * m_pA[0]; - float fA2 = 1.0 - 0.25 * m_pA[1]; + float a1 = 0.05 * m_pA[0]; + float a2 = 1.0 - 0.25 * m_pA[1]; // LOG_INFO(_T("fA1 = %f"), fA1); // LOG_INFO(_T("fA2 = %f"), fA2); - float fGradient = (fMean2 - fMean1) / (fA2 - fA1); - float fIntercept = fMean2 - fGradient * fA2; + float gradient = (mean2 - mean1) / (a2 - a1); + float intercept = mean2 - gradient * a2; // LOG_INFO(_T("fGradient = %f"), fGradient); // LOG_INFO(_T("fIntercept = %f"), fIntercept); @@ -144,7 +146,7 @@ for (int i = 0; i < m_iParamNComp; ++i) { m_pMu[i] = (static_cast<float>(i) / (static_cast<float>(m_iParamNComp) - 1.0f)) - * fGradient + fIntercept; + * gradient + intercept; // LOG_INFO(_T("mean %d = %f"), i, m_pMu[i]); } @@ -154,6 +156,7 @@ RubberGMMCore(m_iParamNComp, false); + // Amplitudes first for (int i = 0; i < m_iParamNComp - 1; ++i) { if (!isnan(m_pA[i])) { output_.set_sample(i, 0, m_pA[i]); @@ -162,6 +165,19 @@ } } + // Then means if required + if (output_positions_) { + int idx = 0; + for (int i = m_iParamNComp - 1; i < 2 * m_iParamNComp - 1; ++i) { + if (!isnan(m_pMu[i])) { + output_.set_sample(i, 0, m_pMu[idx]); + } else { + output_.set_sample(i, 0, 0.0f); + } + ++idx; + } + } + PushOutput(); } @@ -169,12 +185,12 @@ int iSizeX = m_iNumChannels; // Normalise the spectral profile - float fSpectralProfileTotal = 0.0f; + float SpectralProfileTotal = 0.0f; for (int iCount = 0; iCount < iSizeX; iCount++) { - fSpectralProfileTotal += m_pSpectralProfile[iCount]; + SpectralProfileTotal += m_pSpectralProfile[iCount]; } for (int iCount = 0; iCount < iSizeX; iCount++) { - m_pSpectralProfile[iCount] /= fSpectralProfileTotal; + m_pSpectralProfile[iCount] /= SpectralProfileTotal; } if (bDoInit) { @@ -200,12 +216,12 @@ pP_mod_X[i] = 0.0f; } - for (int i = 0; i < iNComponents; i++) { + for (int c = 0; c < iNComponents; c++) { for (int iCount = 0; iCount < iSizeX; iCount++) { pP_mod_X[iCount] += 1.0f / sqrt(2.0f * M_PI * m_fParamVar) * exp((-0.5f) - * pow(static_cast<float>(iCount+1) - m_pMu[i], 2) - / m_fParamVar) * m_pA[i]; + * pow(static_cast<float>(iCount+1) - m_pMu[c], 2) + / m_fParamVar) * m_pA[c]; } } @@ -251,7 +267,7 @@ } fPrdist /= iNComponents; - if (fPrdist < m_fParamPriorsConverged) { + if (fPrdist < priors_converged_) { // LOG_INFO("Converged!"); break; }
--- a/trunk/src/Modules/Features/ModuleGaussians.h Thu Aug 12 09:15:49 2010 +0000 +++ b/trunk/src/Modules/Features/ModuleGaussians.h Thu Aug 12 11:28:11 2010 +0000 @@ -75,7 +75,15 @@ /*! \brief convergence criterion */ - float m_fParamPriorsConverged; + float priors_converged_; + + /*! \brief Output component positions as well as amplitudes + */ + bool output_positions_; + + /*! \brief Total number of values in the output + */ + int output_component_count_; /*! \brief The amplitudes of the components (priors) */
--- a/trunk/src/Modules/SNR/ModuleNoise.cc Thu Aug 12 09:15:49 2010 +0000 +++ b/trunk/src/Modules/SNR/ModuleNoise.cc Thu Aug 12 11:28:11 2010 +0000 @@ -37,6 +37,7 @@ module_type_ = "snr"; module_version_ = "$Id$"; + pink_ = parameters_->DefaultBool("noise.pink", true); // Noise level relative to unit-variance Gaussian noise (ie. 0dB will give a // noise with an RMS level of 1.0) float snr_db = parameters_->DefaultFloat("noise.level_db", 0.0f); @@ -54,11 +55,14 @@ channel_count_ = input.channel_count(); output_.Initialize(input); + ResetInternal(); return true; } void ModuleNoise::ResetInternal() { - + s0_ = 0.0f; + s1_ = 0.0f; + s2_ = 0.0f; } void ModuleNoise::Process(const SignalBank &input) { @@ -81,7 +85,20 @@ for (int c = 0; c < input.channel_count(); ++c) { for (int i = 0; i < input.buffer_length(); ++i) { float s = input[c][i]; - s += (multiplier_ * gaussian_variate_()); + float n = gaussian_variate_(); + if (pink_) { + // Pink noise filter coefficients from + // ccrma.stanford.edu/~jos/sasp/Example_Synthesis_1_F_Noise.html + // Smith, Julius O. Spectral Audio Signal Processing, October 2008 + // Draft, http://ccrma.stanford.edu/~jos/sasp/, online book, + // accessed 2010-02-27. + float f = 0.049922035 * n + s0_; + s0_ = -0.095993537 * n - (-2.494956002 * f) + s1_; + s1_ = 0.050612699 * n - (2.017265875 * f) + s2_; + s2_ = -0.004408786 * n - (-0.522189400 * f); + n = f; + } + s += multiplier_ * n; output_.set_sample(c, i, s); } }
--- a/trunk/src/Modules/SNR/ModuleNoise.h Thu Aug 12 09:15:49 2010 +0000 +++ b/trunk/src/Modules/SNR/ModuleNoise.h Thu Aug 12 11:28:11 2010 +0000 @@ -53,10 +53,18 @@ int buffer_length_; int channel_count_; + // True to generate pink noise, otherwise white noise + bool pink_; + + // Filter state variables + float s0_; + float s1_; + float s2_; + float multiplier_; - // Random number generator which yeilds Gaussian-distributed values by - // The generator is a Mersenne twister + // Mersenne twister random number generator fed into a transform which + // yeilds Gaussian-distributed values boost::variate_generator<boost::mt19937, boost::normal_distribution<float> > gaussian_variate_;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/src/Support/ModuleTree.cc Thu Aug 12 11:28:11 2010 +0000 @@ -0,0 +1,30 @@ +// Copyright 2010, Thomas Walters +// +// AIM-C: A C++ implementation of the Auditory Image Model +// http://www.acousticscale.org/AIMC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file + * \brief Parse a configuration file to generate a tree of modules. + */ + +/*! \author: Thomas Walters <tom@acousticscale.org> + * \date 2010/08/08 + * \version \$Id: $ + */ + +namespace aimc { + ParseConfigFile(string ) + +} // namespace aimc \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trunk/src/Support/ModuleTree.h Thu Aug 12 11:28:11 2010 +0000 @@ -0,0 +1,43 @@ +// Copyright 2010, Thomas Walters +// +// AIM-C: A C++ implementation of the Auditory Image Model +// http://www.acousticscale.org/AIMC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file + * \brief Parse a configuration file to generate a tree of modules. + */ + +/*! \author: Thomas Walters <tom@acousticscale.org> + * \date 2010/08/08 + * \version \$Id: $ + */ + +#include <string> + +namespace aimc { +using std::string; +class ModuleTree { + public: + bool ParseConfigFile(const string &filename); + bool ParseConfigText(const string &config_text); + void set_output_filename_prefix(const string &prefix); + string output_filename_prefix() { + return output_filename_prefix_; + } + private: + string output_filename_prefix_; + DISALLOW_COPY_AND_ASSIGN(ModuleTree); +}; +} // namespace aimc \ No newline at end of file