ian@0
|
1 // Copyright 2011, Ian Hobson.
|
ian@0
|
2 //
|
ian@0
|
3 // This file is part of gpsynth.
|
ian@0
|
4 //
|
ian@0
|
5 // gpsynth is free software: you can redistribute it and/or modify
|
ian@0
|
6 // it under the terms of the GNU General Public License as published by
|
ian@0
|
7 // the Free Software Foundation, either version 3 of the License, or
|
ian@0
|
8 // (at your option) any later version.
|
ian@0
|
9 //
|
ian@0
|
10 // gpsynth is distributed in the hope that it will be useful,
|
ian@0
|
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
ian@0
|
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
ian@0
|
13 // GNU General Public License for more details.
|
ian@0
|
14 //
|
ian@0
|
15 // You should have received a copy of the GNU General Public License
|
ian@0
|
16 // along with gpsynth in the file COPYING.
|
ian@0
|
17 // If not, see http://www.gnu.org/licenses/.
|
ian@0
|
18
|
ian@0
|
19 #include "mfcc_analyzer.hpp"
|
ian@0
|
20
|
ian@0
|
21 #include "boost_ex.hpp"
|
ian@0
|
22
|
ian@0
|
23 #include <algorithm>
|
ian@0
|
24 #include <cmath>
|
ian@0
|
25 #include <iostream>
|
ian@0
|
26 #include <iterator>
|
ian@0
|
27 #include <limits>
|
ian@0
|
28 #include <numeric>
|
ian@0
|
29
|
ian@0
|
30 namespace {
|
ian@0
|
31
|
ian@0
|
32 // using the scale described in the HTK book
|
ian@0
|
33 double MelToHz(double mel) {
|
ian@0
|
34 return 700.0 * (std::pow(10.0, mel / 2595.0) - 1.0);
|
ian@0
|
35 }
|
ian@0
|
36
|
ian@0
|
37 double HzToMel(double hz) {
|
ian@0
|
38 return 2595.0 * std::log10(1.0 + (hz / 700.0));
|
ian@0
|
39 }
|
ian@0
|
40
|
ian@0
|
41 } // namespace
|
ian@0
|
42
|
ian@0
|
43 namespace dsp {
|
ian@0
|
44
|
ian@0
|
45 MFCCAnalyzer::MFCCAnalyzer(int frame_size,
|
ian@0
|
46 int mel_bands,
|
ian@0
|
47 int number_of_mfccs,
|
ian@0
|
48 double frequency_min,
|
ian@0
|
49 double frequency_max)
|
ian@0
|
50 : frame_size_(frame_size),
|
ian@0
|
51 mel_bands_(mel_bands),
|
ian@0
|
52 number_of_mfccs_(number_of_mfccs),
|
ian@0
|
53 sample_rate_(0),
|
ian@0
|
54 frequency_min_(frequency_min),
|
ian@0
|
55 frequency_max_(frequency_max)
|
ian@0
|
56 {
|
ian@0
|
57 InitializeMelDCT();
|
ian@0
|
58 }
|
ian@0
|
59
|
ian@0
|
60 MFCCAnalyzer::~MFCCAnalyzer() {
|
ian@0
|
61 fftw_free(mel_bands_dct_in_);
|
ian@0
|
62 fftw_free(mel_bands_dct_out_);
|
ian@0
|
63 fftw_destroy_plan(dct_plan_);
|
ian@0
|
64 }
|
ian@0
|
65
|
ian@0
|
66 void MFCCAnalyzer::SetSampleRate(double sample_rate) {
|
ian@0
|
67 if (sample_rate_ != sample_rate) {
|
ian@0
|
68 sample_rate_ = sample_rate;
|
ian@0
|
69 InitializeFilters();
|
ian@0
|
70 }
|
ian@0
|
71 }
|
ian@0
|
72
|
ian@0
|
73 // extracts mfccs from a magnitude spectrum
|
ian@0
|
74 const std::vector<double>&
|
ian@0
|
75 MFCCAnalyzer::ExtractMFCCS(const std::vector<double>& spectrum) {
|
ian@0
|
76 // filter spectrum
|
ian@0
|
77 for (std::size_t i = 0; i < mel_bands_; i++) {
|
ian@0
|
78 mel_bands_dct_in_[i] = std::inner_product(spectrum.begin(),
|
ian@0
|
79 spectrum.end(),
|
ian@0
|
80 mel_filters_[i].begin(),
|
ian@0
|
81 0.0);
|
ian@0
|
82 }
|
ian@0
|
83 // take the logs of the filter outputs (avoiding log(0) == -inf)
|
ian@0
|
84 std::replace(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_,
|
ian@0
|
85 0.0, std::numeric_limits<double>::min());
|
ian@0
|
86 std::transform(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_,
|
ian@0
|
87 mel_bands_dct_in_,
|
ian@0
|
88 static_cast<double(*)(double)>(std::log10));
|
ian@0
|
89 // take the DCT of the mel bands
|
ian@0
|
90 fftw_execute(dct_plan_);
|
ian@0
|
91 // store result
|
ian@0
|
92 mfccs_.assign(mel_bands_dct_out_, mel_bands_dct_out_ + number_of_mfccs_);
|
ian@0
|
93 return mfccs_;
|
ian@0
|
94 }
|
ian@0
|
95
|
ian@0
|
96 void MFCCAnalyzer::InitializeMelDCT() {
|
ian@0
|
97 int mem_size = sizeof(double) * mel_bands_;
|
ian@0
|
98 mel_bands_dct_in_ = (double*)fftw_malloc(mem_size);
|
ian@0
|
99 mel_bands_dct_out_ = (double*)fftw_malloc(mem_size);
|
ian@0
|
100 dct_plan_ = fftw_plan_r2r_1d(number_of_mfccs_,
|
ian@0
|
101 mel_bands_dct_in_,
|
ian@0
|
102 mel_bands_dct_out_,
|
ian@0
|
103 FFTW_REDFT10,
|
ian@0
|
104 FFTW_ESTIMATE);
|
ian@0
|
105 }
|
ian@0
|
106
|
ian@0
|
107 void MFCCAnalyzer::InitializeFilters() {
|
ian@0
|
108 // create a series of triangular filters mapped linearly on the mel-frequency
|
ian@0
|
109 // scale.
|
ian@0
|
110 double bin_frequency = sample_rate_ / (2 * frame_size_);
|
ian@0
|
111 double mel_min = HzToMel(frequency_min_);
|
ian@0
|
112 double mel_max = HzToMel(frequency_max_);
|
ian@0
|
113 double mel_width = (mel_max - mel_min) / (mel_bands_ + 1.0);
|
ian@0
|
114 double mel_left = mel_min;
|
ian@0
|
115 mel_filters_.resize(mel_bands_);
|
ian@0
|
116 for (int i = 0; i < mel_bands_; i++) {
|
ian@0
|
117 double mel_centre = mel_left + mel_width;
|
ian@0
|
118 double mel_right = mel_centre + mel_width;
|
ian@0
|
119 double left_hz = MelToHz(mel_left);
|
ian@0
|
120 double right_hz = MelToHz(mel_right);
|
ian@0
|
121 int bin_left = left_hz / bin_frequency + 0.5;
|
ian@0
|
122 int bin_right = right_hz / bin_frequency + 0.5;
|
ian@0
|
123 int bin_centre = (bin_left + bin_right) / 2.0 + 0.5;
|
ian@0
|
124 double filter_height = 2.0 / (bin_right - bin_left);
|
ian@0
|
125 mel_filters_[i].assign(frame_size_, 0.0);
|
ian@0
|
126 // generate the up ramp
|
ian@0
|
127 double bin_delta = filter_height / (bin_centre - bin_left);
|
ian@0
|
128 double ramp = 0.0;
|
ian@0
|
129 for (int bin = bin_left; bin < bin_centre; bin++) {
|
ian@0
|
130 mel_filters_[i][bin] = ramp;
|
ian@0
|
131 ramp += bin_delta;
|
ian@0
|
132 }
|
ian@0
|
133 // down ramp
|
ian@0
|
134 for (int bin = bin_centre; bin < bin_right; bin++) {
|
ian@0
|
135 mel_filters_[i][bin] = ramp;
|
ian@0
|
136 ramp -= bin_delta;
|
ian@0
|
137 }
|
ian@0
|
138 // move mel bin along
|
ian@0
|
139 mel_left += mel_width;
|
ian@0
|
140 }
|
ian@0
|
141 }
|
ian@0
|
142
|
ian@0
|
143 } // dsp namespace
|