annotate src/feature_extractor.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
rev   line source
ian@0 1 // Copyright 2011, Ian Hobson.
ian@0 2 //
ian@0 3 // This file is part of gpsynth.
ian@0 4 //
ian@0 5 // gpsynth is free software: you can redistribute it and/or modify
ian@0 6 // it under the terms of the GNU General Public License as published by
ian@0 7 // the Free Software Foundation, either version 3 of the License, or
ian@0 8 // (at your option) any later version.
ian@0 9 //
ian@0 10 // gpsynth is distributed in the hope that it will be useful,
ian@0 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
ian@0 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ian@0 13 // GNU General Public License for more details.
ian@0 14 //
ian@0 15 // You should have received a copy of the GNU General Public License
ian@0 16 // along with gpsynth in the file COPYING.
ian@0 17 // If not, see http://www.gnu.org/licenses/.
ian@0 18
ian@0 19 #include "feature_extractor.hpp"
ian@0 20
ian@0 21 #include "statistics.hpp"
ian@0 22 #include "std_ex.hpp"
ian@0 23 #include "window_makers.hpp"
ian@0 24
ian@0 25 #include "sndfile.hh"
ian@0 26
ian@0 27 #include "boost/bind.hpp"
ian@0 28 #include "boost/filesystem.hpp"
ian@0 29
ian@0 30 #include <algorithm>
ian@0 31 #include <cmath>
ian@0 32 #include <iostream>
ian@0 33 #include <stdexcept>
ian@0 34 #include <sstream>
ian@0 35
ian@0 36 namespace bf = boost::filesystem;
ian@0 37
ian@0 38 namespace dsp {
ian@0 39
ian@0 40 FeatureExtractor::FeatureExtractor(const std::string& file_path /* = "" */,
ian@0 41 int window_size /* = 2048 */,
ian@0 42 int hop_size /* = 512 */)
ian@0 43 : window_size_(-1), // initialized by SetFrameSize
ian@0 44 hop_size_(hop_size),
ian@0 45 bin_frequencies_(window_size),
ian@0 46 spectrum_analyzer_(window_size),
ian@0 47 mfcc_analyzer_(window_size, 40, 13, 30, 10000)
ian@0 48 {
ian@0 49 SetWindowSize(window_size);
ian@0 50 if (!file_path.empty()) {
ian@0 51 LoadFile(file_path);
ian@0 52 }
ian@0 53 }
ian@0 54
ian@0 55 void FeatureExtractor::ClearCaches() {
ian@0 56 spectrum_.clear();
ian@0 57 magnitude_spectrum_.clear();
ian@0 58 log_magnitude_spectrum_.clear();
ian@0 59 spectral_centroid_.clear();
ian@0 60 spectral_spread_.clear();
ian@0 61 spectral_flux_.clear();
ian@0 62 pitch_.clear();
ian@0 63 energy_.clear();
ian@0 64 mfccs_.clear();
ian@0 65 delta_mfccs_.clear();
ian@0 66 double_delta_mfccs_.clear();
ian@0 67 }
ian@0 68
ian@0 69 void FeatureExtractor::SetWindowSize(int window_size) {
ian@0 70 if (window_size_ != window_size) {
ian@0 71 window_size_ = window_size;
ian@0 72 frame_.resize(window_size);
ian@0 73 // generate new window
ian@0 74 window_.resize(window_size);
ian@0 75 dsp::HammingWindow(window_size, window_.begin());
ian@0 76 ClearCaches();
ian@0 77 }
ian@0 78 }
ian@0 79
ian@0 80 void FeatureExtractor::SetHopSize(int hop_size) {
ian@0 81 if (hop_size_ != hop_size) {
ian@0 82 hop_size_ = hop_size;
ian@0 83 ClearCaches();
ian@0 84 }
ian@0 85 }
ian@0 86
ian@0 87 void FeatureExtractor::LoadFile(const std::string& file_path) {
ian@0 88 SndfileHandle file(file_path);
ian@0 89 if (file.error()) {
ian@0 90 std::stringstream message;
ian@0 91 message << "FeatureExtractor::ExtractFeaturesFromFile: "
ian@0 92 << "Error reading from file '" << file_path << "' "
ian@0 93 << '(' << file.error() << ')';
ian@0 94 throw std::runtime_error(message.str());
ian@0 95 }
ian@0 96 sf_count_t frames = file.frames();
ian@0 97 int channels = file.channels();
ian@0 98 file_data_.resize(frames * channels);
ian@0 99 // read the audio data in to the buffer
ian@0 100 file.readf(&file_data_[0], frames);
ian@0 101 // only supporting mono at the moment, remove extra channels
ian@0 102 if (channels > 1) {
ian@0 103 for (int i = 0; i < frames; i++) {
ian@0 104 file_data_[i] = file_data_[i * channels];
ian@0 105 }
ian@0 106 file_data_.resize(frames);
ian@0 107 }
ian@0 108 // success, clear caches
ian@0 109 ClearCaches();
ian@0 110 // update file info
ian@0 111 file_path_ = file_path;
ian@0 112 sample_rate_ = file.samplerate();
ian@0 113 frames_ = static_cast<int>(file_data_.size()) / hop_size_;
ian@0 114 if (frames_ == 0) {
ian@0 115 frames_++;
ian@0 116 }
ian@0 117 // bin info
ian@0 118 bin_size_ = sample_rate_ / static_cast<Value>(window_size_);
ian@0 119 std::generate(bin_frequencies_.begin(), bin_frequencies_.end(),
ian@0 120 stdx::Stepper<Value>(bin_size_));
ian@0 121 }
ian@0 122
ian@0 123 Value FeatureExtractor::Duration() const {
ian@0 124 return file_data_.size() / static_cast<Value>(sample_rate_);
ian@0 125 }
ian@0 126
ian@0 127 const std::vector<ComplexValueList>& FeatureExtractor::Spectrum() {
ian@0 128 if (spectrum_.empty()) {
ian@0 129 const int frame_size = window_size_;
ian@0 130 const int half_frame_size = frame_size / 2;
ian@0 131 const int hop_size = hop_size_;
ian@0 132 spectrum_.resize(frames_);
ian@0 133 ValueList::const_iterator data_begin = file_data_.begin();
ian@0 134 ValueList::const_iterator data_end = file_data_.end();
ian@0 135 for (int i = 0; i < frames_; i++) {
ian@0 136 //std::cout << "frame:" << i << '\n';
ian@0 137 // find the frame bounds
ian@0 138 ValueList::const_iterator read_start = data_begin + (i * hop_size);
ian@0 139 ValueList::const_iterator read_end = read_start + frame_size;
ian@0 140 if (read_end > data_end) {
ian@0 141 read_end = data_end;
ian@0 142 }
ian@0 143 // extract the frame and apply window
ian@0 144 std::transform(read_start, read_end, window_.begin(),
ian@0 145 frame_.begin(), std::multiplies<Value>());
ian@0 146 // on the last frame we'll need to fill the rest of the frame with zeros
ian@0 147 std::size_t data_read = read_end - read_start;
ian@0 148 if (data_read < frame_size) {
ian@0 149 std::fill(frame_.begin() + data_read, frame_.end(), 0);
ian@0 150 }
ian@0 151 // get the spectrum of the current frame
ian@0 152 std::copy(frame_.begin(), frame_.end(), spectrum_analyzer_.Input());
ian@0 153 spectrum_analyzer_.Execute();
ian@0 154 spectrum_[i].assign(spectrum_analyzer_.Output(),
ian@0 155 spectrum_analyzer_.Output() + half_frame_size);
ian@0 156 }
ian@0 157 }
ian@0 158 return spectrum_;
ian@0 159 }
ian@0 160
ian@0 161 const std::vector<ValueList>& FeatureExtractor::MagnitudeSpectrum() {
ian@0 162 if (magnitude_spectrum_.empty()) {
ian@0 163 // get complex spectrum
ian@0 164 const std::vector<ComplexValueList>& spectrum = Spectrum();
ian@0 165 // get magnitude spectrum for each frame
ian@0 166 std::size_t frames = spectrum.size();
ian@0 167 magnitude_spectrum_.resize(frames);
ian@0 168 for (int i = 0; i < frames; i++) {
ian@0 169 // take the absolute value of each complex bin
ian@0 170 magnitude_spectrum_[i].resize(spectrum[i].size());
ian@0 171 std::transform(spectrum[i].begin(), spectrum[i].end(),
ian@0 172 magnitude_spectrum_[i].begin(), std::abs<Value>);
ian@0 173 }
ian@0 174 }
ian@0 175 return magnitude_spectrum_;
ian@0 176 }
ian@0 177
ian@0 178 const std::vector<ValueList>& FeatureExtractor::LogMagnitudeSpectrum() {
ian@0 179 if (log_magnitude_spectrum_.empty()) {
ian@0 180 // get magnitude spectrum
ian@0 181 const std::vector<ValueList>& magnitude_spectrum = MagnitudeSpectrum();
ian@0 182 // get magnitude spectrum for each frame
ian@0 183 std::size_t frames = magnitude_spectrum.size();
ian@0 184 log_magnitude_spectrum_.resize(frames);
ian@0 185 for (int i = 0; i < frames; i++) {
ian@0 186 log_magnitude_spectrum_[i].resize(magnitude_spectrum[i].size());
ian@0 187 // copy magnitudes to log magnitude buffer, avoiding zeros
ian@0 188 std::replace_copy(magnitude_spectrum[i].begin(),
ian@0 189 magnitude_spectrum[i].end(),
ian@0 190 log_magnitude_spectrum_[i].begin(),
ian@0 191 0.0,
ian@0 192 std::numeric_limits<Value>::min());
ian@0 193 // take the log of each magnitude (ugly casting for cmath function...)
ian@0 194 std::transform(log_magnitude_spectrum_[i].begin(),
ian@0 195 log_magnitude_spectrum_[i].end(),
ian@0 196 log_magnitude_spectrum_[i].begin(),
ian@0 197 static_cast<double(*)(double)>(std::log10));
ian@0 198 }
ian@0 199 }
ian@0 200 return log_magnitude_spectrum_;
ian@0 201 }
ian@0 202
ian@0 203 const ValueList& FeatureExtractor::SpectralCentroid() {
ian@0 204 if (spectral_centroid_.empty()) {
ian@0 205 MagnitudeSpectrum();
ian@0 206 std::size_t frames = magnitude_spectrum_.size();
ian@0 207 spectral_centroid_.resize(frames);
ian@0 208 std::size_t bins = magnitude_spectrum_[0].size();
ian@0 209 ValueList weighted_frequencies(bins);
ian@0 210 for (int i = 0; i < frames; i++) {
ian@0 211 const ValueList& magnitudes = magnitude_spectrum_[i];
ian@0 212 // get bin frequencies weighted by bin magnitudes
ian@0 213 std::transform(magnitudes.begin(),
ian@0 214 magnitudes.end(),
ian@0 215 bin_frequencies_.begin(),
ian@0 216 weighted_frequencies.begin(),
ian@0 217 std::multiplies<Value>());
ian@0 218 // divide mean of weighted frequencies by mean of bin magnitudes
ian@0 219 Value magnitude = stats::Sum(magnitudes);
ian@0 220 if (magnitude > 0) {
ian@0 221 spectral_centroid_[i] = stats::Sum(weighted_frequencies) / magnitude;
ian@0 222 } else {
ian@0 223 spectral_centroid_[i] = 0;
ian@0 224 }
ian@0 225 }
ian@0 226 }
ian@0 227 return spectral_centroid_;
ian@0 228 }
ian@0 229
ian@0 230
ian@0 231 const ValueList& FeatureExtractor::SpectralSpread() {
ian@0 232 if (spectral_spread_.empty()) {
ian@0 233 SpectralCentroid();
ian@0 234 std::size_t frames = magnitude_spectrum_.size();
ian@0 235 spectral_spread_.resize(frames);
ian@0 236 std::size_t bins = magnitude_spectrum_[0].size();
ian@0 237 ValueList weighted_difference(bins);
ian@0 238 for (int i = 0; i < frames; i++) {
ian@0 239 Value centroid = spectral_centroid_[i];
ian@0 240 const ValueList& magnitudes = magnitude_spectrum_[i];
ian@0 241 for (int bin = 0; bin < bins; bin++) {
ian@0 242 Value difference = bin_frequencies_[bin] - centroid;
ian@0 243 weighted_difference[bin] = difference * difference * magnitudes[bin];
ian@0 244 }
ian@0 245 Value magnitude = stats::Sum(magnitudes);
ian@0 246 if (magnitude > 0) {
ian@0 247 Value variance = stats::Sum(weighted_difference) / magnitude;
ian@0 248 spectral_spread_[i] = std::sqrt(variance);
ian@0 249 } else {
ian@0 250 spectral_spread_[i] = 0;
ian@0 251 }
ian@0 252 }
ian@0 253 }
ian@0 254 return spectral_spread_;
ian@0 255 }
ian@0 256
ian@0 257 const ValueList& FeatureExtractor::Pitch() {
ian@0 258 if (pitch_.empty()) {
ian@0 259 MagnitudeSpectrum();
ian@0 260 pitch_.resize(frames_);
ian@0 261 std::deque<Value> recent_pitches;
ian@0 262 const int moving_average_size = 8;
ian@0 263 for (int i = 0; i < frames_; i++) {
ian@0 264 const ValueList& mags = magnitude_spectrum_[i];
ian@0 265 // find the peak bin number
ian@0 266 std::ptrdiff_t peak_bin = std::distance(mags.begin(),
ian@0 267 std::max_element(mags.begin(),
ian@0 268 mags.end()));
ian@0 269 // interpolate peak frequency
ian@0 270 double offset;
ian@0 271 double centre = mags[peak_bin];
ian@0 272 if (centre == 0) {
ian@0 273 pitch_[i] = 0;
ian@0 274 } else {
ian@0 275 if (peak_bin == 0) {
ian@0 276 // parabolic interpolation of first 2 bin magnitudes
ian@0 277 double above = mags[peak_bin + 1];
ian@0 278 offset = above / (2 * (2 * centre - above));
ian@0 279 } else if (peak_bin == mags.size() - 1) {
ian@0 280 // parabolic interpolation of last 2 bin magnitudes
ian@0 281 double below = mags[peak_bin - 1];
ian@0 282 offset = (-below) / (2 * (2 * centre - below));
ian@0 283 } else {
ian@0 284 double below = mags[peak_bin - 1];
ian@0 285 double above = mags[peak_bin + 1];
ian@0 286 // gaussian interpolation
ian@0 287 offset = std::log(above / below)
ian@0 288 / (2 * std::log((centre * centre) / (above * below)));
ian@0 289 }
ian@0 290 Value frame_pitch = bin_size_ * (peak_bin + offset);
ian@0 291 if (recent_pitches.size() == moving_average_size) {
ian@0 292 recent_pitches.pop_front();
ian@0 293 }
ian@0 294 recent_pitches.push_back(frame_pitch);
ian@0 295 pitch_[i] = stats::Mean(recent_pitches);
ian@0 296 }
ian@0 297 }
ian@0 298 }
ian@0 299 return pitch_;
ian@0 300 }
ian@0 301
ian@0 302 const ValueList& FeatureExtractor::Energy() {
ian@0 303 if (energy_.empty()) {
ian@0 304 const int frame_size = window_size_;
ian@0 305 const int hop_size = hop_size_;
ian@0 306 energy_.resize(frames_);
ian@0 307 ValueList::const_iterator data_begin = file_data_.begin();
ian@0 308 ValueList::const_iterator data_end = file_data_.end();
ian@0 309 for (int i = 0; i < frames_; i++) {
ian@0 310 // find the frame bounds
ian@0 311 ValueList::const_iterator read_start = data_begin + (i * hop_size);
ian@0 312 ValueList::const_iterator read_end = read_start + frame_size;
ian@0 313 if (read_end > data_end) {
ian@0 314 read_end = data_end;
ian@0 315 }
ian@0 316 Value energy(0);
ian@0 317 std::ptrdiff_t samples = std::distance(read_start, read_end);
ian@0 318 while (read_start != read_end) {
ian@0 319 energy += std::pow(*read_start++, 2);
ian@0 320 }
ian@0 321 energy_[i] = energy / samples;
ian@0 322 }
ian@0 323 }
ian@0 324 return energy_;
ian@0 325 }
ian@0 326
ian@0 327 const std::vector<ValueList>& FeatureExtractor::MFCCs() {
ian@0 328 if (mfccs_.empty()) {
ian@0 329 const std::vector<ValueList>& magnitude_spectrum = MagnitudeSpectrum();
ian@0 330 mfcc_analyzer_.SetSampleRate(sample_rate_);
ian@0 331 mfccs_.resize(frames_);
ian@0 332 for (int i = 0; i < frames_; i++) {
ian@0 333 mfccs_[i] = mfcc_analyzer_.ExtractMFCCS(magnitude_spectrum[i]);
ian@0 334 }
ian@0 335 }
ian@0 336 return mfccs_;
ian@0 337 }
ian@0 338
ian@0 339 const std::vector<ValueList>& FeatureExtractor::DeltaMFCCs() {
ian@0 340 if (delta_mfccs_.empty()) {
ian@0 341 const std::vector<ValueList>& mfccs = MFCCs();
ian@0 342 // resize buffers
ian@0 343 const std::size_t number_of_mfccs = mfccs[0].size();
ian@0 344 ValueList empty_values(number_of_mfccs, 0);
ian@0 345 delta_mfccs_.resize(mfccs.size(), empty_values);
ian@0 346 // calculate deltas based on 2 neighbouring frames in either direction
ian@0 347 for (int i = 2, last_frame = frames_ - 2; i < last_frame; i++) {
ian@0 348 const ValueList& before_2 = mfccs[i - 2];
ian@0 349 const ValueList& before_1 = mfccs[i - 1];
ian@0 350 const ValueList& after_1 = mfccs[i + 1];
ian@0 351 const ValueList& after_2 = mfccs[i + 2];
ian@0 352 ValueList& delta_frame = delta_mfccs_[i];
ian@0 353 for (int m = 0; m < number_of_mfccs; m++) {
ian@0 354 Value delta = after_1[m] - before_1[m]
ian@0 355 + 2.0 * (after_2[m] - before_2[m]);
ian@0 356 delta_frame[m] = delta / 10.0;
ian@0 357 }
ian@0 358 }
ian@0 359 }
ian@0 360 return delta_mfccs_;
ian@0 361 }
ian@0 362
ian@0 363 const std::vector<ValueList>& FeatureExtractor::DoubleDeltaMFCCs() {
ian@0 364 if (double_delta_mfccs_.empty()) {
ian@0 365 const std::vector<ValueList>& delta_mfccs = DeltaMFCCs();
ian@0 366 // resize buffers
ian@0 367 const std::size_t number_of_mfccs = delta_mfccs[0].size();
ian@0 368 ValueList empty_values(number_of_mfccs, 0);
ian@0 369 double_delta_mfccs_.resize(delta_mfccs.size(), empty_values);
ian@0 370 // calculate deltas based on 2 neighbouring frames in either direction
ian@0 371 for (int i = 2, last_frame = frames_ - 2; i < last_frame; i++) {
ian@0 372 const ValueList& before_2 = delta_mfccs[i - 2];
ian@0 373 const ValueList& before_1 = delta_mfccs[i - 1];
ian@0 374 const ValueList& after_1 = delta_mfccs[i + 1];
ian@0 375 const ValueList& after_2 = delta_mfccs[i + 2];
ian@0 376 ValueList& double_delta_frame = double_delta_mfccs_[i];
ian@0 377 for (int m = 0; m < number_of_mfccs; m++) {
ian@0 378 Value delta = after_1[m] - before_1[m]
ian@0 379 + 2.0 * (after_2[m] - before_2[m]);
ian@0 380 double_delta_frame[m] = delta / 10.0;
ian@0 381 }
ian@0 382 }
ian@0 383 }
ian@0 384 return double_delta_mfccs_;
ian@0 385 }
ian@0 386
ian@0 387 const std::vector<ValueList>& FeatureExtractor::SpectralFlux() {
ian@0 388 if (spectral_flux_.empty()) {
ian@0 389 // get magnitude spectrum
ian@0 390 const std::vector<ValueList>& spectrum = MagnitudeSpectrum();
ian@0 391 // get magnitude spectrum for each frame
ian@0 392 std::size_t frames = spectrum.size();
ian@0 393 std::size_t bins = spectrum[0].size();
ian@0 394 spectral_flux_.resize(frames);
ian@0 395 // first frame always zeroed
ian@0 396 spectral_flux_[0].assign(bins, 0);
ian@0 397 for (int i = 1; i < frames; i++) {
ian@0 398 const ValueList& spectrum_frame = spectrum[i];
ian@0 399 const ValueList& previous_spectrum_frame = spectrum[i - 1];
ian@0 400 ValueList& flux_frame = spectral_flux_[i];
ian@0 401 flux_frame.resize(bins);
ian@0 402 // get the difference in magnitudes for each bin
ian@0 403 for (int bin = 0; bin < bins; bin++) {
ian@0 404 flux_frame[bin] = spectrum_frame[bin] - previous_spectrum_frame[bin];
ian@0 405 }
ian@0 406 //std::cout << i << ": " << stats::Mean(flux_frame) << '\n';
ian@0 407 }
ian@0 408 }
ian@0 409 return spectral_flux_;
ian@0 410 }
ian@0 411
ian@0 412 } // dsp namespace