Mercurial > hg > gpsynth
diff src/mfcc_analyzer.cpp @ 0:add35537fdbb tip
Initial import
author | irh <ian.r.hobson@gmail.com> |
---|---|
date | Thu, 25 Aug 2011 11:05:55 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/mfcc_analyzer.cpp Thu Aug 25 11:05:55 2011 +0100 @@ -0,0 +1,143 @@ +// Copyright 2011, Ian Hobson. +// +// This file is part of gpsynth. +// +// gpsynth is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// gpsynth is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with gpsynth in the file COPYING. +// If not, see http://www.gnu.org/licenses/. + +#include "mfcc_analyzer.hpp" + +#include "boost_ex.hpp" + +#include <algorithm> +#include <cmath> +#include <iostream> +#include <iterator> +#include <limits> +#include <numeric> + +namespace { + +// using the scale described in the HTK book +double MelToHz(double mel) { + return 700.0 * (std::pow(10.0, mel / 2595.0) - 1.0); +} + +double HzToMel(double hz) { + return 2595.0 * std::log10(1.0 + (hz / 700.0)); +} + +} // namespace + +namespace dsp { + +MFCCAnalyzer::MFCCAnalyzer(int frame_size, + int mel_bands, + int number_of_mfccs, + double frequency_min, + double frequency_max) +: frame_size_(frame_size), + mel_bands_(mel_bands), + number_of_mfccs_(number_of_mfccs), + sample_rate_(0), + frequency_min_(frequency_min), + frequency_max_(frequency_max) +{ + InitializeMelDCT(); +} + +MFCCAnalyzer::~MFCCAnalyzer() { + fftw_free(mel_bands_dct_in_); + fftw_free(mel_bands_dct_out_); + fftw_destroy_plan(dct_plan_); +} + +void MFCCAnalyzer::SetSampleRate(double sample_rate) { + if (sample_rate_ != sample_rate) { + sample_rate_ = sample_rate; + InitializeFilters(); + } +} + +// extracts mfccs from a magnitude spectrum +const std::vector<double>& +MFCCAnalyzer::ExtractMFCCS(const std::vector<double>& spectrum) { + // filter spectrum + for (std::size_t i = 0; i < mel_bands_; i++) { + mel_bands_dct_in_[i] = std::inner_product(spectrum.begin(), + spectrum.end(), + mel_filters_[i].begin(), + 0.0); + } + // take the logs of the filter outputs (avoiding log(0) == -inf) + std::replace(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_, + 0.0, std::numeric_limits<double>::min()); + std::transform(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_, + mel_bands_dct_in_, + static_cast<double(*)(double)>(std::log10)); + // take the DCT of the mel bands + fftw_execute(dct_plan_); + // store result + mfccs_.assign(mel_bands_dct_out_, mel_bands_dct_out_ + number_of_mfccs_); + return mfccs_; +} + +void MFCCAnalyzer::InitializeMelDCT() { + int mem_size = sizeof(double) * mel_bands_; + mel_bands_dct_in_ = (double*)fftw_malloc(mem_size); + mel_bands_dct_out_ = (double*)fftw_malloc(mem_size); + dct_plan_ = fftw_plan_r2r_1d(number_of_mfccs_, + mel_bands_dct_in_, + mel_bands_dct_out_, + FFTW_REDFT10, + FFTW_ESTIMATE); +} + +void MFCCAnalyzer::InitializeFilters() { + // create a series of triangular filters mapped linearly on the mel-frequency + // scale. + double bin_frequency = sample_rate_ / (2 * frame_size_); + double mel_min = HzToMel(frequency_min_); + double mel_max = HzToMel(frequency_max_); + double mel_width = (mel_max - mel_min) / (mel_bands_ + 1.0); + double mel_left = mel_min; + mel_filters_.resize(mel_bands_); + for (int i = 0; i < mel_bands_; i++) { + double mel_centre = mel_left + mel_width; + double mel_right = mel_centre + mel_width; + double left_hz = MelToHz(mel_left); + double right_hz = MelToHz(mel_right); + int bin_left = left_hz / bin_frequency + 0.5; + int bin_right = right_hz / bin_frequency + 0.5; + int bin_centre = (bin_left + bin_right) / 2.0 + 0.5; + double filter_height = 2.0 / (bin_right - bin_left); + mel_filters_[i].assign(frame_size_, 0.0); + // generate the up ramp + double bin_delta = filter_height / (bin_centre - bin_left); + double ramp = 0.0; + for (int bin = bin_left; bin < bin_centre; bin++) { + mel_filters_[i][bin] = ramp; + ramp += bin_delta; + } + // down ramp + for (int bin = bin_centre; bin < bin_right; bin++) { + mel_filters_[i][bin] = ramp; + ramp -= bin_delta; + } + // move mel bin along + mel_left += mel_width; + } +} + +} // dsp namespace