Mercurial > hg > gpsynth
view src/mfcc_analyzer.cpp @ 0:add35537fdbb tip
Initial import
author | irh <ian.r.hobson@gmail.com> |
---|---|
date | Thu, 25 Aug 2011 11:05:55 +0100 |
parents | |
children |
line wrap: on
line source
// Copyright 2011, Ian Hobson. // // This file is part of gpsynth. // // gpsynth is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // gpsynth is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with gpsynth in the file COPYING. // If not, see http://www.gnu.org/licenses/. #include "mfcc_analyzer.hpp" #include "boost_ex.hpp" #include <algorithm> #include <cmath> #include <iostream> #include <iterator> #include <limits> #include <numeric> namespace { // using the scale described in the HTK book double MelToHz(double mel) { return 700.0 * (std::pow(10.0, mel / 2595.0) - 1.0); } double HzToMel(double hz) { return 2595.0 * std::log10(1.0 + (hz / 700.0)); } } // namespace namespace dsp { MFCCAnalyzer::MFCCAnalyzer(int frame_size, int mel_bands, int number_of_mfccs, double frequency_min, double frequency_max) : frame_size_(frame_size), mel_bands_(mel_bands), number_of_mfccs_(number_of_mfccs), sample_rate_(0), frequency_min_(frequency_min), frequency_max_(frequency_max) { InitializeMelDCT(); } MFCCAnalyzer::~MFCCAnalyzer() { fftw_free(mel_bands_dct_in_); fftw_free(mel_bands_dct_out_); fftw_destroy_plan(dct_plan_); } void MFCCAnalyzer::SetSampleRate(double sample_rate) { if (sample_rate_ != sample_rate) { sample_rate_ = sample_rate; InitializeFilters(); } } // extracts mfccs from a magnitude spectrum const std::vector<double>& MFCCAnalyzer::ExtractMFCCS(const std::vector<double>& spectrum) { // filter spectrum for (std::size_t i = 0; i < mel_bands_; i++) { mel_bands_dct_in_[i] = std::inner_product(spectrum.begin(), spectrum.end(), mel_filters_[i].begin(), 0.0); } // take the logs of the filter outputs (avoiding log(0) == -inf) std::replace(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_, 0.0, std::numeric_limits<double>::min()); std::transform(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_, mel_bands_dct_in_, static_cast<double(*)(double)>(std::log10)); // take the DCT of the mel bands fftw_execute(dct_plan_); // store result mfccs_.assign(mel_bands_dct_out_, mel_bands_dct_out_ + number_of_mfccs_); return mfccs_; } void MFCCAnalyzer::InitializeMelDCT() { int mem_size = sizeof(double) * mel_bands_; mel_bands_dct_in_ = (double*)fftw_malloc(mem_size); mel_bands_dct_out_ = (double*)fftw_malloc(mem_size); dct_plan_ = fftw_plan_r2r_1d(number_of_mfccs_, mel_bands_dct_in_, mel_bands_dct_out_, FFTW_REDFT10, FFTW_ESTIMATE); } void MFCCAnalyzer::InitializeFilters() { // create a series of triangular filters mapped linearly on the mel-frequency // scale. double bin_frequency = sample_rate_ / (2 * frame_size_); double mel_min = HzToMel(frequency_min_); double mel_max = HzToMel(frequency_max_); double mel_width = (mel_max - mel_min) / (mel_bands_ + 1.0); double mel_left = mel_min; mel_filters_.resize(mel_bands_); for (int i = 0; i < mel_bands_; i++) { double mel_centre = mel_left + mel_width; double mel_right = mel_centre + mel_width; double left_hz = MelToHz(mel_left); double right_hz = MelToHz(mel_right); int bin_left = left_hz / bin_frequency + 0.5; int bin_right = right_hz / bin_frequency + 0.5; int bin_centre = (bin_left + bin_right) / 2.0 + 0.5; double filter_height = 2.0 / (bin_right - bin_left); mel_filters_[i].assign(frame_size_, 0.0); // generate the up ramp double bin_delta = filter_height / (bin_centre - bin_left); double ramp = 0.0; for (int bin = bin_left; bin < bin_centre; bin++) { mel_filters_[i][bin] = ramp; ramp += bin_delta; } // down ramp for (int bin = bin_centre; bin < bin_right; bin++) { mel_filters_[i][bin] = ramp; ramp -= bin_delta; } // move mel bin along mel_left += mel_width; } } } // dsp namespace