diff src/mfcc_analyzer.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/mfcc_analyzer.cpp	Thu Aug 25 11:05:55 2011 +0100
@@ -0,0 +1,143 @@
+//  Copyright 2011, Ian Hobson.
+//
+//  This file is part of gpsynth.
+//
+//  gpsynth is free software: you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation, either version 3 of the License, or
+//  (at your option) any later version.
+//
+//  gpsynth is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+//  You should have received a copy of the GNU General Public License
+//  along with gpsynth in the file COPYING. 
+//  If not, see http://www.gnu.org/licenses/.
+
+#include "mfcc_analyzer.hpp"
+
+#include "boost_ex.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <numeric>
+
+namespace {
+  
+// using the scale described in the HTK book
+double MelToHz(double mel) {
+  return 700.0 * (std::pow(10.0, mel / 2595.0) - 1.0);
+}
+
+double HzToMel(double hz) {
+  return 2595.0 * std::log10(1.0 + (hz / 700.0));
+}
+  
+} // namespace
+
+namespace dsp {
+
+MFCCAnalyzer::MFCCAnalyzer(int frame_size,
+                           int mel_bands,
+                           int number_of_mfccs,
+                           double frequency_min,
+                           double frequency_max)
+: frame_size_(frame_size),
+  mel_bands_(mel_bands),
+  number_of_mfccs_(number_of_mfccs),
+  sample_rate_(0),
+  frequency_min_(frequency_min),
+  frequency_max_(frequency_max)
+{
+  InitializeMelDCT();
+}
+  
+MFCCAnalyzer::~MFCCAnalyzer() {
+  fftw_free(mel_bands_dct_in_);
+  fftw_free(mel_bands_dct_out_);
+  fftw_destroy_plan(dct_plan_);
+}
+
+void MFCCAnalyzer::SetSampleRate(double sample_rate) {
+  if (sample_rate_ != sample_rate) {
+    sample_rate_ = sample_rate;
+    InitializeFilters();
+  }
+}
+
+// extracts mfccs from a magnitude spectrum
+const std::vector<double>&
+MFCCAnalyzer::ExtractMFCCS(const std::vector<double>& spectrum) {
+  // filter spectrum
+  for (std::size_t i = 0; i < mel_bands_; i++) {
+    mel_bands_dct_in_[i] = std::inner_product(spectrum.begin(),
+                                              spectrum.end(),
+                                              mel_filters_[i].begin(),
+                                              0.0);
+  }
+  // take the logs of the filter outputs (avoiding log(0) == -inf)
+  std::replace(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_,
+               0.0, std::numeric_limits<double>::min());
+  std::transform(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_,
+                 mel_bands_dct_in_,
+                 static_cast<double(*)(double)>(std::log10));
+  // take the DCT of the mel bands
+  fftw_execute(dct_plan_);
+  // store result
+  mfccs_.assign(mel_bands_dct_out_, mel_bands_dct_out_ + number_of_mfccs_);
+  return mfccs_;
+}
+
+void MFCCAnalyzer::InitializeMelDCT() {
+  int mem_size = sizeof(double) * mel_bands_;
+  mel_bands_dct_in_ = (double*)fftw_malloc(mem_size);
+  mel_bands_dct_out_ = (double*)fftw_malloc(mem_size);
+  dct_plan_ = fftw_plan_r2r_1d(number_of_mfccs_,
+                               mel_bands_dct_in_,
+                               mel_bands_dct_out_,
+                               FFTW_REDFT10,
+                               FFTW_ESTIMATE);
+}
+
+void MFCCAnalyzer::InitializeFilters() {
+  // create a series of triangular filters mapped linearly on the mel-frequency
+  // scale.
+  double bin_frequency = sample_rate_ / (2 * frame_size_);
+  double mel_min = HzToMel(frequency_min_);
+  double mel_max = HzToMel(frequency_max_);
+  double mel_width = (mel_max - mel_min) / (mel_bands_ + 1.0);
+  double mel_left = mel_min;
+  mel_filters_.resize(mel_bands_);
+  for (int i = 0; i < mel_bands_; i++) {
+    double mel_centre = mel_left + mel_width;
+    double mel_right = mel_centre + mel_width;
+    double left_hz = MelToHz(mel_left);
+    double right_hz = MelToHz(mel_right);
+    int bin_left = left_hz / bin_frequency + 0.5;
+    int bin_right = right_hz / bin_frequency + 0.5;
+    int bin_centre = (bin_left + bin_right) / 2.0 + 0.5;
+    double filter_height = 2.0 / (bin_right - bin_left);
+    mel_filters_[i].assign(frame_size_, 0.0);
+    // generate the up ramp
+    double bin_delta = filter_height / (bin_centre - bin_left);
+    double ramp = 0.0;
+    for (int bin = bin_left; bin < bin_centre; bin++) {
+      mel_filters_[i][bin] = ramp;
+      ramp += bin_delta;
+    }
+    // down ramp
+    for (int bin = bin_centre; bin < bin_right; bin++) {
+      mel_filters_[i][bin] = ramp;
+      ramp -= bin_delta;
+    }
+    // move mel bin along
+    mel_left += mel_width;
+  }
+}
+  
+} // dsp namespace