annotate src/mfcc_analyzer.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
rev   line source
ian@0 1 // Copyright 2011, Ian Hobson.
ian@0 2 //
ian@0 3 // This file is part of gpsynth.
ian@0 4 //
ian@0 5 // gpsynth is free software: you can redistribute it and/or modify
ian@0 6 // it under the terms of the GNU General Public License as published by
ian@0 7 // the Free Software Foundation, either version 3 of the License, or
ian@0 8 // (at your option) any later version.
ian@0 9 //
ian@0 10 // gpsynth is distributed in the hope that it will be useful,
ian@0 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
ian@0 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ian@0 13 // GNU General Public License for more details.
ian@0 14 //
ian@0 15 // You should have received a copy of the GNU General Public License
ian@0 16 // along with gpsynth in the file COPYING.
ian@0 17 // If not, see http://www.gnu.org/licenses/.
ian@0 18
ian@0 19 #include "mfcc_analyzer.hpp"
ian@0 20
ian@0 21 #include "boost_ex.hpp"
ian@0 22
ian@0 23 #include <algorithm>
ian@0 24 #include <cmath>
ian@0 25 #include <iostream>
ian@0 26 #include <iterator>
ian@0 27 #include <limits>
ian@0 28 #include <numeric>
ian@0 29
ian@0 30 namespace {
ian@0 31
ian@0 32 // using the scale described in the HTK book
ian@0 33 double MelToHz(double mel) {
ian@0 34 return 700.0 * (std::pow(10.0, mel / 2595.0) - 1.0);
ian@0 35 }
ian@0 36
ian@0 37 double HzToMel(double hz) {
ian@0 38 return 2595.0 * std::log10(1.0 + (hz / 700.0));
ian@0 39 }
ian@0 40
ian@0 41 } // namespace
ian@0 42
ian@0 43 namespace dsp {
ian@0 44
ian@0 45 MFCCAnalyzer::MFCCAnalyzer(int frame_size,
ian@0 46 int mel_bands,
ian@0 47 int number_of_mfccs,
ian@0 48 double frequency_min,
ian@0 49 double frequency_max)
ian@0 50 : frame_size_(frame_size),
ian@0 51 mel_bands_(mel_bands),
ian@0 52 number_of_mfccs_(number_of_mfccs),
ian@0 53 sample_rate_(0),
ian@0 54 frequency_min_(frequency_min),
ian@0 55 frequency_max_(frequency_max)
ian@0 56 {
ian@0 57 InitializeMelDCT();
ian@0 58 }
ian@0 59
ian@0 60 MFCCAnalyzer::~MFCCAnalyzer() {
ian@0 61 fftw_free(mel_bands_dct_in_);
ian@0 62 fftw_free(mel_bands_dct_out_);
ian@0 63 fftw_destroy_plan(dct_plan_);
ian@0 64 }
ian@0 65
ian@0 66 void MFCCAnalyzer::SetSampleRate(double sample_rate) {
ian@0 67 if (sample_rate_ != sample_rate) {
ian@0 68 sample_rate_ = sample_rate;
ian@0 69 InitializeFilters();
ian@0 70 }
ian@0 71 }
ian@0 72
ian@0 73 // extracts mfccs from a magnitude spectrum
ian@0 74 const std::vector<double>&
ian@0 75 MFCCAnalyzer::ExtractMFCCS(const std::vector<double>& spectrum) {
ian@0 76 // filter spectrum
ian@0 77 for (std::size_t i = 0; i < mel_bands_; i++) {
ian@0 78 mel_bands_dct_in_[i] = std::inner_product(spectrum.begin(),
ian@0 79 spectrum.end(),
ian@0 80 mel_filters_[i].begin(),
ian@0 81 0.0);
ian@0 82 }
ian@0 83 // take the logs of the filter outputs (avoiding log(0) == -inf)
ian@0 84 std::replace(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_,
ian@0 85 0.0, std::numeric_limits<double>::min());
ian@0 86 std::transform(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_,
ian@0 87 mel_bands_dct_in_,
ian@0 88 static_cast<double(*)(double)>(std::log10));
ian@0 89 // take the DCT of the mel bands
ian@0 90 fftw_execute(dct_plan_);
ian@0 91 // store result
ian@0 92 mfccs_.assign(mel_bands_dct_out_, mel_bands_dct_out_ + number_of_mfccs_);
ian@0 93 return mfccs_;
ian@0 94 }
ian@0 95
ian@0 96 void MFCCAnalyzer::InitializeMelDCT() {
ian@0 97 int mem_size = sizeof(double) * mel_bands_;
ian@0 98 mel_bands_dct_in_ = (double*)fftw_malloc(mem_size);
ian@0 99 mel_bands_dct_out_ = (double*)fftw_malloc(mem_size);
ian@0 100 dct_plan_ = fftw_plan_r2r_1d(number_of_mfccs_,
ian@0 101 mel_bands_dct_in_,
ian@0 102 mel_bands_dct_out_,
ian@0 103 FFTW_REDFT10,
ian@0 104 FFTW_ESTIMATE);
ian@0 105 }
ian@0 106
ian@0 107 void MFCCAnalyzer::InitializeFilters() {
ian@0 108 // create a series of triangular filters mapped linearly on the mel-frequency
ian@0 109 // scale.
ian@0 110 double bin_frequency = sample_rate_ / (2 * frame_size_);
ian@0 111 double mel_min = HzToMel(frequency_min_);
ian@0 112 double mel_max = HzToMel(frequency_max_);
ian@0 113 double mel_width = (mel_max - mel_min) / (mel_bands_ + 1.0);
ian@0 114 double mel_left = mel_min;
ian@0 115 mel_filters_.resize(mel_bands_);
ian@0 116 for (int i = 0; i < mel_bands_; i++) {
ian@0 117 double mel_centre = mel_left + mel_width;
ian@0 118 double mel_right = mel_centre + mel_width;
ian@0 119 double left_hz = MelToHz(mel_left);
ian@0 120 double right_hz = MelToHz(mel_right);
ian@0 121 int bin_left = left_hz / bin_frequency + 0.5;
ian@0 122 int bin_right = right_hz / bin_frequency + 0.5;
ian@0 123 int bin_centre = (bin_left + bin_right) / 2.0 + 0.5;
ian@0 124 double filter_height = 2.0 / (bin_right - bin_left);
ian@0 125 mel_filters_[i].assign(frame_size_, 0.0);
ian@0 126 // generate the up ramp
ian@0 127 double bin_delta = filter_height / (bin_centre - bin_left);
ian@0 128 double ramp = 0.0;
ian@0 129 for (int bin = bin_left; bin < bin_centre; bin++) {
ian@0 130 mel_filters_[i][bin] = ramp;
ian@0 131 ramp += bin_delta;
ian@0 132 }
ian@0 133 // down ramp
ian@0 134 for (int bin = bin_centre; bin < bin_right; bin++) {
ian@0 135 mel_filters_[i][bin] = ramp;
ian@0 136 ramp -= bin_delta;
ian@0 137 }
ian@0 138 // move mel bin along
ian@0 139 mel_left += mel_width;
ian@0 140 }
ian@0 141 }
ian@0 142
ian@0 143 } // dsp namespace