Mercurial > hg > gpsynth
comparison src/mfcc_analyzer.cpp @ 0:add35537fdbb tip
Initial import
author | irh <ian.r.hobson@gmail.com> |
---|---|
date | Thu, 25 Aug 2011 11:05:55 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:add35537fdbb |
---|---|
1 // Copyright 2011, Ian Hobson. | |
2 // | |
3 // This file is part of gpsynth. | |
4 // | |
5 // gpsynth is free software: you can redistribute it and/or modify | |
6 // it under the terms of the GNU General Public License as published by | |
7 // the Free Software Foundation, either version 3 of the License, or | |
8 // (at your option) any later version. | |
9 // | |
10 // gpsynth is distributed in the hope that it will be useful, | |
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 // GNU General Public License for more details. | |
14 // | |
15 // You should have received a copy of the GNU General Public License | |
16 // along with gpsynth in the file COPYING. | |
17 // If not, see http://www.gnu.org/licenses/. | |
18 | |
19 #include "mfcc_analyzer.hpp" | |
20 | |
21 #include "boost_ex.hpp" | |
22 | |
23 #include <algorithm> | |
24 #include <cmath> | |
25 #include <iostream> | |
26 #include <iterator> | |
27 #include <limits> | |
28 #include <numeric> | |
29 | |
30 namespace { | |
31 | |
32 // using the scale described in the HTK book | |
33 double MelToHz(double mel) { | |
34 return 700.0 * (std::pow(10.0, mel / 2595.0) - 1.0); | |
35 } | |
36 | |
37 double HzToMel(double hz) { | |
38 return 2595.0 * std::log10(1.0 + (hz / 700.0)); | |
39 } | |
40 | |
41 } // namespace | |
42 | |
43 namespace dsp { | |
44 | |
45 MFCCAnalyzer::MFCCAnalyzer(int frame_size, | |
46 int mel_bands, | |
47 int number_of_mfccs, | |
48 double frequency_min, | |
49 double frequency_max) | |
50 : frame_size_(frame_size), | |
51 mel_bands_(mel_bands), | |
52 number_of_mfccs_(number_of_mfccs), | |
53 sample_rate_(0), | |
54 frequency_min_(frequency_min), | |
55 frequency_max_(frequency_max) | |
56 { | |
57 InitializeMelDCT(); | |
58 } | |
59 | |
60 MFCCAnalyzer::~MFCCAnalyzer() { | |
61 fftw_free(mel_bands_dct_in_); | |
62 fftw_free(mel_bands_dct_out_); | |
63 fftw_destroy_plan(dct_plan_); | |
64 } | |
65 | |
66 void MFCCAnalyzer::SetSampleRate(double sample_rate) { | |
67 if (sample_rate_ != sample_rate) { | |
68 sample_rate_ = sample_rate; | |
69 InitializeFilters(); | |
70 } | |
71 } | |
72 | |
73 // extracts mfccs from a magnitude spectrum | |
74 const std::vector<double>& | |
75 MFCCAnalyzer::ExtractMFCCS(const std::vector<double>& spectrum) { | |
76 // filter spectrum | |
77 for (std::size_t i = 0; i < mel_bands_; i++) { | |
78 mel_bands_dct_in_[i] = std::inner_product(spectrum.begin(), | |
79 spectrum.end(), | |
80 mel_filters_[i].begin(), | |
81 0.0); | |
82 } | |
83 // take the logs of the filter outputs (avoiding log(0) == -inf) | |
84 std::replace(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_, | |
85 0.0, std::numeric_limits<double>::min()); | |
86 std::transform(mel_bands_dct_in_, mel_bands_dct_in_ + mel_bands_, | |
87 mel_bands_dct_in_, | |
88 static_cast<double(*)(double)>(std::log10)); | |
89 // take the DCT of the mel bands | |
90 fftw_execute(dct_plan_); | |
91 // store result | |
92 mfccs_.assign(mel_bands_dct_out_, mel_bands_dct_out_ + number_of_mfccs_); | |
93 return mfccs_; | |
94 } | |
95 | |
96 void MFCCAnalyzer::InitializeMelDCT() { | |
97 int mem_size = sizeof(double) * mel_bands_; | |
98 mel_bands_dct_in_ = (double*)fftw_malloc(mem_size); | |
99 mel_bands_dct_out_ = (double*)fftw_malloc(mem_size); | |
100 dct_plan_ = fftw_plan_r2r_1d(number_of_mfccs_, | |
101 mel_bands_dct_in_, | |
102 mel_bands_dct_out_, | |
103 FFTW_REDFT10, | |
104 FFTW_ESTIMATE); | |
105 } | |
106 | |
107 void MFCCAnalyzer::InitializeFilters() { | |
108 // create a series of triangular filters mapped linearly on the mel-frequency | |
109 // scale. | |
110 double bin_frequency = sample_rate_ / (2 * frame_size_); | |
111 double mel_min = HzToMel(frequency_min_); | |
112 double mel_max = HzToMel(frequency_max_); | |
113 double mel_width = (mel_max - mel_min) / (mel_bands_ + 1.0); | |
114 double mel_left = mel_min; | |
115 mel_filters_.resize(mel_bands_); | |
116 for (int i = 0; i < mel_bands_; i++) { | |
117 double mel_centre = mel_left + mel_width; | |
118 double mel_right = mel_centre + mel_width; | |
119 double left_hz = MelToHz(mel_left); | |
120 double right_hz = MelToHz(mel_right); | |
121 int bin_left = left_hz / bin_frequency + 0.5; | |
122 int bin_right = right_hz / bin_frequency + 0.5; | |
123 int bin_centre = (bin_left + bin_right) / 2.0 + 0.5; | |
124 double filter_height = 2.0 / (bin_right - bin_left); | |
125 mel_filters_[i].assign(frame_size_, 0.0); | |
126 // generate the up ramp | |
127 double bin_delta = filter_height / (bin_centre - bin_left); | |
128 double ramp = 0.0; | |
129 for (int bin = bin_left; bin < bin_centre; bin++) { | |
130 mel_filters_[i][bin] = ramp; | |
131 ramp += bin_delta; | |
132 } | |
133 // down ramp | |
134 for (int bin = bin_centre; bin < bin_right; bin++) { | |
135 mel_filters_[i][bin] = ramp; | |
136 ramp -= bin_delta; | |
137 } | |
138 // move mel bin along | |
139 mel_left += mel_width; | |
140 } | |
141 } | |
142 | |
143 } // dsp namespace |