ian@0: // Copyright 2011, Ian Hobson. ian@0: // ian@0: // This file is part of gpsynth. ian@0: // ian@0: // gpsynth is free software: you can redistribute it and/or modify ian@0: // it under the terms of the GNU General Public License as published by ian@0: // the Free Software Foundation, either version 3 of the License, or ian@0: // (at your option) any later version. ian@0: // ian@0: // gpsynth is distributed in the hope that it will be useful, ian@0: // but WITHOUT ANY WARRANTY; without even the implied warranty of ian@0: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ian@0: // GNU General Public License for more details. ian@0: // ian@0: // You should have received a copy of the GNU General Public License ian@0: // along with gpsynth in the file COPYING. ian@0: // If not, see http://www.gnu.org/licenses/. ian@0: ian@0: #include "mfcc_analyzer.hpp" ian@0: ian@0: #include "boost_ex.hpp" ian@0: ian@0: #include ian@0: #include ian@0: #include ian@0: #include ian@0: #include ian@0: #include ian@0: ian@0: namespace { ian@0: ian@0: // using the scale described in the HTK book ian@0: double MelToHz(double mel) { ian@0: return 700.0 * (std::pow(10.0, mel / 2595.0) - 1.0); ian@0: } ian@0: ian@0: double HzToMel(double hz) { ian@0: return 2595.0 * std::log10(1.0 + (hz / 700.0)); ian@0: } ian@0: ian@0: } // namespace ian@0: ian@0: namespace dsp { ian@0: ian@0: MFCCAnalyzer::MFCCAnalyzer(int frame_size, ian@0: int mel_bands, ian@0: int number_of_mfccs, ian@0: double frequency_min, ian@0: double frequency_max) ian@0: : frame_size_(frame_size), ian@0: mel_bands_(mel_bands), ian@0: number_of_mfccs_(number_of_mfccs), ian@0: sample_rate_(0), ian@0: frequency_min_(frequency_min), ian@0: frequency_max_(frequency_max) ian@0: { ian@0: InitializeMelDCT(); ian@0: } ian@0: ian@0: MFCCAnalyzer::~MFCCAnalyzer() { ian@0: fftw_free(mel_bands_dct_in_); ian@0: fftw_free(mel_bands_dct_out_); ian@0: fftw_destroy_plan(dct_plan_); ian@0: } ian@0: ian@0: void MFCCAnalyzer::SetSampleRate(double sample_rate) { ian@0: if (sample_rate_ != sample_rate) { ian@0: sample_rate_ = sample_rate; ian@0: InitializeFilters(); ian@0: } ian@0: } ian@0: ian@0: // extracts mfccs from a magnitude spectrum ian@0: const std::vector& ian@0: MFCCAnalyzer::ExtractMFCCS(const std::vector& spectrum) { ian@0: // filter spectrum ian@0: for (std::size_t i = 0; i < mel_bands_; i++) { ian@0: mel_bands_dct_in_[i] = std::inner_product(spectrum.begin(), ian@0: spectrum.end(), ian@0: mel_filters_[i].begin(), ian@0: 0.0); ian@0: } ian@0: // take the logs of the filter outputs (avoiding log(0) == -inf) ian@0: std::replace(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_, ian@0: 0.0, std::numeric_limits::min()); ian@0: std::transform(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_, ian@0: mel_bands_dct_in_, ian@0: static_cast(std::log10)); ian@0: // take the DCT of the mel bands ian@0: fftw_execute(dct_plan_); ian@0: // store result ian@0: mfccs_.assign(mel_bands_dct_out_, mel_bands_dct_out_ + number_of_mfccs_); ian@0: return mfccs_; ian@0: } ian@0: ian@0: void MFCCAnalyzer::InitializeMelDCT() { ian@0: int mem_size = sizeof(double) * mel_bands_; ian@0: mel_bands_dct_in_ = (double*)fftw_malloc(mem_size); ian@0: mel_bands_dct_out_ = (double*)fftw_malloc(mem_size); ian@0: dct_plan_ = fftw_plan_r2r_1d(number_of_mfccs_, ian@0: mel_bands_dct_in_, ian@0: mel_bands_dct_out_, ian@0: FFTW_REDFT10, ian@0: FFTW_ESTIMATE); ian@0: } ian@0: ian@0: void MFCCAnalyzer::InitializeFilters() { ian@0: // create a series of triangular filters mapped linearly on the mel-frequency ian@0: // scale. ian@0: double bin_frequency = sample_rate_ / (2 * frame_size_); ian@0: double mel_min = HzToMel(frequency_min_); ian@0: double mel_max = HzToMel(frequency_max_); ian@0: double mel_width = (mel_max - mel_min) / (mel_bands_ + 1.0); ian@0: double mel_left = mel_min; ian@0: mel_filters_.resize(mel_bands_); ian@0: for (int i = 0; i < mel_bands_; i++) { ian@0: double mel_centre = mel_left + mel_width; ian@0: double mel_right = mel_centre + mel_width; ian@0: double left_hz = MelToHz(mel_left); ian@0: double right_hz = MelToHz(mel_right); ian@0: int bin_left = left_hz / bin_frequency + 0.5; ian@0: int bin_right = right_hz / bin_frequency + 0.5; ian@0: int bin_centre = (bin_left + bin_right) / 2.0 + 0.5; ian@0: double filter_height = 2.0 / (bin_right - bin_left); ian@0: mel_filters_[i].assign(frame_size_, 0.0); ian@0: // generate the up ramp ian@0: double bin_delta = filter_height / (bin_centre - bin_left); ian@0: double ramp = 0.0; ian@0: for (int bin = bin_left; bin < bin_centre; bin++) { ian@0: mel_filters_[i][bin] = ramp; ian@0: ramp += bin_delta; ian@0: } ian@0: // down ramp ian@0: for (int bin = bin_centre; bin < bin_right; bin++) { ian@0: mel_filters_[i][bin] = ramp; ian@0: ramp -= bin_delta; ian@0: } ian@0: // move mel bin along ian@0: mel_left += mel_width; ian@0: } ian@0: } ian@0: ian@0: } // dsp namespace