ian@0: // Copyright 2011, Ian Hobson. ian@0: // ian@0: // This file is part of gpsynth. ian@0: // ian@0: // gpsynth is free software: you can redistribute it and/or modify ian@0: // it under the terms of the GNU General Public License as published by ian@0: // the Free Software Foundation, either version 3 of the License, or ian@0: // (at your option) any later version. ian@0: // ian@0: // gpsynth is distributed in the hope that it will be useful, ian@0: // but WITHOUT ANY WARRANTY; without even the implied warranty of ian@0: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ian@0: // GNU General Public License for more details. ian@0: // ian@0: // You should have received a copy of the GNU General Public License ian@0: // along with gpsynth in the file COPYING. ian@0: // If not, see http://www.gnu.org/licenses/. ian@0: ian@0: #include "feature_extractor.hpp" ian@0: ian@0: #include "statistics.hpp" ian@0: #include "std_ex.hpp" ian@0: #include "window_makers.hpp" ian@0: ian@0: #include "sndfile.hh" ian@0: ian@0: #include "boost/bind.hpp" ian@0: #include "boost/filesystem.hpp" ian@0: ian@0: #include ian@0: #include ian@0: #include ian@0: #include ian@0: #include ian@0: ian@0: namespace bf = boost::filesystem; ian@0: ian@0: namespace dsp { ian@0: ian@0: FeatureExtractor::FeatureExtractor(const std::string& file_path /* = "" */, ian@0: int window_size /* = 2048 */, ian@0: int hop_size /* = 512 */) ian@0: : window_size_(-1), // initialized by SetFrameSize ian@0: hop_size_(hop_size), ian@0: bin_frequencies_(window_size), ian@0: spectrum_analyzer_(window_size), ian@0: mfcc_analyzer_(window_size, 40, 13, 30, 10000) ian@0: { ian@0: SetWindowSize(window_size); ian@0: if (!file_path.empty()) { ian@0: LoadFile(file_path); ian@0: } ian@0: } ian@0: ian@0: void FeatureExtractor::ClearCaches() { ian@0: spectrum_.clear(); ian@0: magnitude_spectrum_.clear(); ian@0: log_magnitude_spectrum_.clear(); ian@0: spectral_centroid_.clear(); ian@0: spectral_spread_.clear(); ian@0: spectral_flux_.clear(); ian@0: pitch_.clear(); ian@0: energy_.clear(); ian@0: mfccs_.clear(); ian@0: delta_mfccs_.clear(); ian@0: double_delta_mfccs_.clear(); ian@0: } ian@0: ian@0: void FeatureExtractor::SetWindowSize(int window_size) { ian@0: if (window_size_ != window_size) { ian@0: window_size_ = window_size; ian@0: frame_.resize(window_size); ian@0: // generate new window ian@0: window_.resize(window_size); ian@0: dsp::HammingWindow(window_size, window_.begin()); ian@0: ClearCaches(); ian@0: } ian@0: } ian@0: ian@0: void FeatureExtractor::SetHopSize(int hop_size) { ian@0: if (hop_size_ != hop_size) { ian@0: hop_size_ = hop_size; ian@0: ClearCaches(); ian@0: } ian@0: } ian@0: ian@0: void FeatureExtractor::LoadFile(const std::string& file_path) { ian@0: SndfileHandle file(file_path); ian@0: if (file.error()) { ian@0: std::stringstream message; ian@0: message << "FeatureExtractor::ExtractFeaturesFromFile: " ian@0: << "Error reading from file '" << file_path << "' " ian@0: << '(' << file.error() << ')'; ian@0: throw std::runtime_error(message.str()); ian@0: } ian@0: sf_count_t frames = file.frames(); ian@0: int channels = file.channels(); ian@0: file_data_.resize(frames * channels); ian@0: // read the audio data in to the buffer ian@0: file.readf(&file_data_[0], frames); ian@0: // only supporting mono at the moment, remove extra channels ian@0: if (channels > 1) { ian@0: for (int i = 0; i < frames; i++) { ian@0: file_data_[i] = file_data_[i * channels]; ian@0: } ian@0: file_data_.resize(frames); ian@0: } ian@0: // success, clear caches ian@0: ClearCaches(); ian@0: // update file info ian@0: file_path_ = file_path; ian@0: sample_rate_ = file.samplerate(); ian@0: frames_ = static_cast(file_data_.size()) / hop_size_; ian@0: if (frames_ == 0) { ian@0: frames_++; ian@0: } ian@0: // bin info ian@0: bin_size_ = sample_rate_ / static_cast(window_size_); ian@0: std::generate(bin_frequencies_.begin(), bin_frequencies_.end(), ian@0: stdx::Stepper(bin_size_)); ian@0: } ian@0: ian@0: Value FeatureExtractor::Duration() const { ian@0: return file_data_.size() / static_cast(sample_rate_); ian@0: } ian@0: ian@0: const std::vector& FeatureExtractor::Spectrum() { ian@0: if (spectrum_.empty()) { ian@0: const int frame_size = window_size_; ian@0: const int half_frame_size = frame_size / 2; ian@0: const int hop_size = hop_size_; ian@0: spectrum_.resize(frames_); ian@0: ValueList::const_iterator data_begin = file_data_.begin(); ian@0: ValueList::const_iterator data_end = file_data_.end(); ian@0: for (int i = 0; i < frames_; i++) { ian@0: //std::cout << "frame:" << i << '\n'; ian@0: // find the frame bounds ian@0: ValueList::const_iterator read_start = data_begin + (i * hop_size); ian@0: ValueList::const_iterator read_end = read_start + frame_size; ian@0: if (read_end > data_end) { ian@0: read_end = data_end; ian@0: } ian@0: // extract the frame and apply window ian@0: std::transform(read_start, read_end, window_.begin(), ian@0: frame_.begin(), std::multiplies()); ian@0: // on the last frame we'll need to fill the rest of the frame with zeros ian@0: std::size_t data_read = read_end - read_start; ian@0: if (data_read < frame_size) { ian@0: std::fill(frame_.begin() + data_read, frame_.end(), 0); ian@0: } ian@0: // get the spectrum of the current frame ian@0: std::copy(frame_.begin(), frame_.end(), spectrum_analyzer_.Input()); ian@0: spectrum_analyzer_.Execute(); ian@0: spectrum_[i].assign(spectrum_analyzer_.Output(), ian@0: spectrum_analyzer_.Output() + half_frame_size); ian@0: } ian@0: } ian@0: return spectrum_; ian@0: } ian@0: ian@0: const std::vector& FeatureExtractor::MagnitudeSpectrum() { ian@0: if (magnitude_spectrum_.empty()) { ian@0: // get complex spectrum ian@0: const std::vector& spectrum = Spectrum(); ian@0: // get magnitude spectrum for each frame ian@0: std::size_t frames = spectrum.size(); ian@0: magnitude_spectrum_.resize(frames); ian@0: for (int i = 0; i < frames; i++) { ian@0: // take the absolute value of each complex bin ian@0: magnitude_spectrum_[i].resize(spectrum[i].size()); ian@0: std::transform(spectrum[i].begin(), spectrum[i].end(), ian@0: magnitude_spectrum_[i].begin(), std::abs); ian@0: } ian@0: } ian@0: return magnitude_spectrum_; ian@0: } ian@0: ian@0: const std::vector& FeatureExtractor::LogMagnitudeSpectrum() { ian@0: if (log_magnitude_spectrum_.empty()) { ian@0: // get magnitude spectrum ian@0: const std::vector& magnitude_spectrum = MagnitudeSpectrum(); ian@0: // get magnitude spectrum for each frame ian@0: std::size_t frames = magnitude_spectrum.size(); ian@0: log_magnitude_spectrum_.resize(frames); ian@0: for (int i = 0; i < frames; i++) { ian@0: log_magnitude_spectrum_[i].resize(magnitude_spectrum[i].size()); ian@0: // copy magnitudes to log magnitude buffer, avoiding zeros ian@0: std::replace_copy(magnitude_spectrum[i].begin(), ian@0: magnitude_spectrum[i].end(), ian@0: log_magnitude_spectrum_[i].begin(), ian@0: 0.0, ian@0: std::numeric_limits::min()); ian@0: // take the log of each magnitude (ugly casting for cmath function...) ian@0: std::transform(log_magnitude_spectrum_[i].begin(), ian@0: log_magnitude_spectrum_[i].end(), ian@0: log_magnitude_spectrum_[i].begin(), ian@0: static_cast(std::log10)); ian@0: } ian@0: } ian@0: return log_magnitude_spectrum_; ian@0: } ian@0: ian@0: const ValueList& FeatureExtractor::SpectralCentroid() { ian@0: if (spectral_centroid_.empty()) { ian@0: MagnitudeSpectrum(); ian@0: std::size_t frames = magnitude_spectrum_.size(); ian@0: spectral_centroid_.resize(frames); ian@0: std::size_t bins = magnitude_spectrum_[0].size(); ian@0: ValueList weighted_frequencies(bins); ian@0: for (int i = 0; i < frames; i++) { ian@0: const ValueList& magnitudes = magnitude_spectrum_[i]; ian@0: // get bin frequencies weighted by bin magnitudes ian@0: std::transform(magnitudes.begin(), ian@0: magnitudes.end(), ian@0: bin_frequencies_.begin(), ian@0: weighted_frequencies.begin(), ian@0: std::multiplies()); ian@0: // divide mean of weighted frequencies by mean of bin magnitudes ian@0: Value magnitude = stats::Sum(magnitudes); ian@0: if (magnitude > 0) { ian@0: spectral_centroid_[i] = stats::Sum(weighted_frequencies) / magnitude; ian@0: } else { ian@0: spectral_centroid_[i] = 0; ian@0: } ian@0: } ian@0: } ian@0: return spectral_centroid_; ian@0: } ian@0: ian@0: ian@0: const ValueList& FeatureExtractor::SpectralSpread() { ian@0: if (spectral_spread_.empty()) { ian@0: SpectralCentroid(); ian@0: std::size_t frames = magnitude_spectrum_.size(); ian@0: spectral_spread_.resize(frames); ian@0: std::size_t bins = magnitude_spectrum_[0].size(); ian@0: ValueList weighted_difference(bins); ian@0: for (int i = 0; i < frames; i++) { ian@0: Value centroid = spectral_centroid_[i]; ian@0: const ValueList& magnitudes = magnitude_spectrum_[i]; ian@0: for (int bin = 0; bin < bins; bin++) { ian@0: Value difference = bin_frequencies_[bin] - centroid; ian@0: weighted_difference[bin] = difference * difference * magnitudes[bin]; ian@0: } ian@0: Value magnitude = stats::Sum(magnitudes); ian@0: if (magnitude > 0) { ian@0: Value variance = stats::Sum(weighted_difference) / magnitude; ian@0: spectral_spread_[i] = std::sqrt(variance); ian@0: } else { ian@0: spectral_spread_[i] = 0; ian@0: } ian@0: } ian@0: } ian@0: return spectral_spread_; ian@0: } ian@0: ian@0: const ValueList& FeatureExtractor::Pitch() { ian@0: if (pitch_.empty()) { ian@0: MagnitudeSpectrum(); ian@0: pitch_.resize(frames_); ian@0: std::deque recent_pitches; ian@0: const int moving_average_size = 8; ian@0: for (int i = 0; i < frames_; i++) { ian@0: const ValueList& mags = magnitude_spectrum_[i]; ian@0: // find the peak bin number ian@0: std::ptrdiff_t peak_bin = std::distance(mags.begin(), ian@0: std::max_element(mags.begin(), ian@0: mags.end())); ian@0: // interpolate peak frequency ian@0: double offset; ian@0: double centre = mags[peak_bin]; ian@0: if (centre == 0) { ian@0: pitch_[i] = 0; ian@0: } else { ian@0: if (peak_bin == 0) { ian@0: // parabolic interpolation of first 2 bin magnitudes ian@0: double above = mags[peak_bin + 1]; ian@0: offset = above / (2 * (2 * centre - above)); ian@0: } else if (peak_bin == mags.size() - 1) { ian@0: // parabolic interpolation of last 2 bin magnitudes ian@0: double below = mags[peak_bin - 1]; ian@0: offset = (-below) / (2 * (2 * centre - below)); ian@0: } else { ian@0: double below = mags[peak_bin - 1]; ian@0: double above = mags[peak_bin + 1]; ian@0: // gaussian interpolation ian@0: offset = std::log(above / below) ian@0: / (2 * std::log((centre * centre) / (above * below))); ian@0: } ian@0: Value frame_pitch = bin_size_ * (peak_bin + offset); ian@0: if (recent_pitches.size() == moving_average_size) { ian@0: recent_pitches.pop_front(); ian@0: } ian@0: recent_pitches.push_back(frame_pitch); ian@0: pitch_[i] = stats::Mean(recent_pitches); ian@0: } ian@0: } ian@0: } ian@0: return pitch_; ian@0: } ian@0: ian@0: const ValueList& FeatureExtractor::Energy() { ian@0: if (energy_.empty()) { ian@0: const int frame_size = window_size_; ian@0: const int hop_size = hop_size_; ian@0: energy_.resize(frames_); ian@0: ValueList::const_iterator data_begin = file_data_.begin(); ian@0: ValueList::const_iterator data_end = file_data_.end(); ian@0: for (int i = 0; i < frames_; i++) { ian@0: // find the frame bounds ian@0: ValueList::const_iterator read_start = data_begin + (i * hop_size); ian@0: ValueList::const_iterator read_end = read_start + frame_size; ian@0: if (read_end > data_end) { ian@0: read_end = data_end; ian@0: } ian@0: Value energy(0); ian@0: std::ptrdiff_t samples = std::distance(read_start, read_end); ian@0: while (read_start != read_end) { ian@0: energy += std::pow(*read_start++, 2); ian@0: } ian@0: energy_[i] = energy / samples; ian@0: } ian@0: } ian@0: return energy_; ian@0: } ian@0: ian@0: const std::vector& FeatureExtractor::MFCCs() { ian@0: if (mfccs_.empty()) { ian@0: const std::vector& magnitude_spectrum = MagnitudeSpectrum(); ian@0: mfcc_analyzer_.SetSampleRate(sample_rate_); ian@0: mfccs_.resize(frames_); ian@0: for (int i = 0; i < frames_; i++) { ian@0: mfccs_[i] = mfcc_analyzer_.ExtractMFCCS(magnitude_spectrum[i]); ian@0: } ian@0: } ian@0: return mfccs_; ian@0: } ian@0: ian@0: const std::vector& FeatureExtractor::DeltaMFCCs() { ian@0: if (delta_mfccs_.empty()) { ian@0: const std::vector& mfccs = MFCCs(); ian@0: // resize buffers ian@0: const std::size_t number_of_mfccs = mfccs[0].size(); ian@0: ValueList empty_values(number_of_mfccs, 0); ian@0: delta_mfccs_.resize(mfccs.size(), empty_values); ian@0: // calculate deltas based on 2 neighbouring frames in either direction ian@0: for (int i = 2, last_frame = frames_ - 2; i < last_frame; i++) { ian@0: const ValueList& before_2 = mfccs[i - 2]; ian@0: const ValueList& before_1 = mfccs[i - 1]; ian@0: const ValueList& after_1 = mfccs[i + 1]; ian@0: const ValueList& after_2 = mfccs[i + 2]; ian@0: ValueList& delta_frame = delta_mfccs_[i]; ian@0: for (int m = 0; m < number_of_mfccs; m++) { ian@0: Value delta = after_1[m] - before_1[m] ian@0: + 2.0 * (after_2[m] - before_2[m]); ian@0: delta_frame[m] = delta / 10.0; ian@0: } ian@0: } ian@0: } ian@0: return delta_mfccs_; ian@0: } ian@0: ian@0: const std::vector& FeatureExtractor::DoubleDeltaMFCCs() { ian@0: if (double_delta_mfccs_.empty()) { ian@0: const std::vector& delta_mfccs = DeltaMFCCs(); ian@0: // resize buffers ian@0: const std::size_t number_of_mfccs = delta_mfccs[0].size(); ian@0: ValueList empty_values(number_of_mfccs, 0); ian@0: double_delta_mfccs_.resize(delta_mfccs.size(), empty_values); ian@0: // calculate deltas based on 2 neighbouring frames in either direction ian@0: for (int i = 2, last_frame = frames_ - 2; i < last_frame; i++) { ian@0: const ValueList& before_2 = delta_mfccs[i - 2]; ian@0: const ValueList& before_1 = delta_mfccs[i - 1]; ian@0: const ValueList& after_1 = delta_mfccs[i + 1]; ian@0: const ValueList& after_2 = delta_mfccs[i + 2]; ian@0: ValueList& double_delta_frame = double_delta_mfccs_[i]; ian@0: for (int m = 0; m < number_of_mfccs; m++) { ian@0: Value delta = after_1[m] - before_1[m] ian@0: + 2.0 * (after_2[m] - before_2[m]); ian@0: double_delta_frame[m] = delta / 10.0; ian@0: } ian@0: } ian@0: } ian@0: return double_delta_mfccs_; ian@0: } ian@0: ian@0: const std::vector& FeatureExtractor::SpectralFlux() { ian@0: if (spectral_flux_.empty()) { ian@0: // get magnitude spectrum ian@0: const std::vector& spectrum = MagnitudeSpectrum(); ian@0: // get magnitude spectrum for each frame ian@0: std::size_t frames = spectrum.size(); ian@0: std::size_t bins = spectrum[0].size(); ian@0: spectral_flux_.resize(frames); ian@0: // first frame always zeroed ian@0: spectral_flux_[0].assign(bins, 0); ian@0: for (int i = 1; i < frames; i++) { ian@0: const ValueList& spectrum_frame = spectrum[i]; ian@0: const ValueList& previous_spectrum_frame = spectrum[i - 1]; ian@0: ValueList& flux_frame = spectral_flux_[i]; ian@0: flux_frame.resize(bins); ian@0: // get the difference in magnitudes for each bin ian@0: for (int bin = 0; bin < bins; bin++) { ian@0: flux_frame[bin] = spectrum_frame[bin] - previous_spectrum_frame[bin]; ian@0: } ian@0: //std::cout << i << ": " << stats::Mean(flux_frame) << '\n'; ian@0: } ian@0: } ian@0: return spectral_flux_; ian@0: } ian@0: ian@0: } // dsp namespace