Chris@37: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@37: Chris@37: /* Chris@37: Vamp feature extraction plugin using the MATCH audio alignment Chris@37: algorithm. Chris@37: Chris@37: Centre for Digital Music, Queen Mary, University of London. Chris@236: Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary Chris@230: University of London, Copyright (c) 2014-2015 Tido GmbH. Chris@37: Chris@37: This program is free software; you can redistribute it and/or Chris@37: modify it under the terms of the GNU General Public License as Chris@37: published by the Free Software Foundation; either version 2 of the Chris@37: License, or (at your option) any later version. See the file Chris@37: COPYING included with this distribution for more information. Chris@37: */ Chris@37: Chris@37: #ifndef FEATURE_EXTRACTOR_H Chris@37: #define FEATURE_EXTRACTOR_H Chris@37: Chris@187: #include "MatchTypes.h" Chris@37: Chris@37: /** Chris@37: * Convert frequency-domain audio frames into features suitable for Chris@125: * MATCH alignment calculation. Chris@37: * Chris@125: * The default feature is a warping of the frequency data to map FFT Chris@125: * frequency bins into feature bins. The mapping is linear (1-1) until Chris@125: * the resolution reaches 2 points per semitone, then logarithmic with Chris@125: * a semitone resolution. e.g. for 44.1kHz sampling rate and fftSize Chris@125: * of 2048 (46ms), bin spacing is 21.5Hz, which is mapped linearly for Chris@125: * bins 0-34 (0 to 732Hz), and logarithmically for the remaining bins Chris@125: * (midi notes 79 to 127, bins 35 to 83), where all energy above note Chris@125: * 127 is mapped into the final bin. Chris@125: * Chris@125: * Alternatively a chroma mapping is also available. This produces a Chris@125: * 13-bin feature by mapping all FFT bins into bin 0 until the Chris@125: * resolution reaches 1 point per semitone, then mapping each Chris@125: * subsequent bin into its corresponding semitone in the remaining 12 Chris@125: * bins (where bin 1 is C). e.g. e.g. for 44.1kHz sampling rate and Chris@125: * fftSize of 2048 (46ms), frequencies up to 361 Hz go to bin 0, Chris@125: * subsequent frequencies to the chroma bins. Chris@37: */ Chris@37: class FeatureExtractor Chris@37: { Chris@37: public: Chris@37: struct Parameters { Chris@37: Chris@216: Parameters(float rate_) : Chris@37: sampleRate(rate_), Chris@37: useChromaFrequencyMap(false), Chris@216: fftSize(2048), Chris@176: referenceFrequency(440.0), Chris@219: minFrequency(150.), Chris@176: maxFrequency(rate_/2.) Chris@37: {} Chris@37: Chris@37: /** Sample rate of audio */ Chris@37: float sampleRate; Chris@37: Chris@37: /** Flag indicating whether to use a chroma frequency map (12 Chris@37: * bins) instead of the default warped spectrogram */ Chris@37: bool useChromaFrequencyMap; Chris@37: Chris@37: /** Size of an FFT frame in samples. Note that the data passed Chris@37: * in is already in the frequency domain, so this expresses Chris@37: * the size of the frame that the caller will be providing. */ Chris@37: int fftSize; Chris@159: Chris@159: /** Frequency of concert A */ Chris@159: double referenceFrequency; Chris@176: Chris@176: /** Minimum frequency cutoff to include in feature */ Chris@176: double minFrequency; Chris@176: Chris@176: /** Maximum frequency cutoff to include in feature */ Chris@176: double maxFrequency; Chris@37: }; Chris@37: Chris@37: /** Chris@37: * Construct a FeatureExtractor with the given parameters. Chris@37: * Chris@37: * Note that FeatureExtractor maintains internal frame-to-frame Chris@37: * state: use one FeatureExtractor per audio source, and construct Chris@37: * a new one for each new source. Chris@37: */ Chris@37: FeatureExtractor(Parameters params); Chris@37: Chris@37: /** Chris@37: * Return the feature vector size that will be returned from process(). Chris@37: */ Chris@37: int getFeatureSize() const { return m_featureSize; } Chris@74: Chris@74: /** Chris@74: * Return the feature vector size that would be returned from Chris@74: * process() with these parameters. Chris@74: */ Chris@74: static int getFeatureSizeFor(Parameters params); Chris@37: Chris@37: /** Chris@201: * Process one frequency-domain audio frame, provided as real & Chris@201: * imaginary components from the FFT output. Return a feature Chris@38: * vector of size given by getFeatureSize(). Input vectors must Chris@38: * have at least params.fftSize/2+1 elements each. Chris@37: * Chris@37: * Operates by mapping the frequency bins into a part-linear Chris@103: * part-logarithmic array, unless useChromaFrequencyMap is true in Chris@103: * which case they are mapped into chroma bins. Chris@37: */ Chris@183: feature_t process(const std::vector &real, Chris@183: const std::vector &imag); Chris@37: Chris@74: /** Chris@201: * Process one frequency-domain audio frame, provided as real & Chris@201: * imaginary components from the FFT output. Return a feature Chris@184: * vector of size given by getFeatureSize(). Input vectors must Chris@184: * have at least params.fftSize/2+1 elements each. Chris@184: * Chris@184: * Operates by mapping the frequency bins into a part-linear Chris@184: * part-logarithmic array, unless useChromaFrequencyMap is true in Chris@184: * which case they are mapped into chroma bins. Chris@184: */ Chris@184: feature_t process(const std::vector &real, Chris@184: const std::vector &imag); Chris@184: Chris@184: /** Chris@201: * Process one frequency-domain audio frame, provided as real & Chris@201: * imaginary components from the FFT output. Return a feature Chris@201: * vector of size given by getFeatureSize(). Input arrays must Chris@201: * have at least params.fftSize/2+1 elements each. Chris@201: * Chris@201: * Operates by mapping the frequency bins into a part-linear Chris@201: * part-logarithmic array, unless useChromaFrequencyMap is true in Chris@201: * which case they are mapped into chroma bins. Chris@201: */ Chris@201: feature_t process(const float *real, const float *imag); Chris@201: Chris@201: /** Chris@74: * Process one frequency-domain audio frame, provided as a single Chris@74: * array of alternating real and imaginary components. Input array Chris@74: * must have at least 2 * (params.fftSize/2 + 1) elements. Chris@74: * Chris@74: * Operates by mapping the frequency bins into a part-linear Chris@103: * part-logarithmic array, unless useChromaFrequencyMap is true in Chris@103: * which case they are mapped into chroma bins. Chris@74: */ Chris@184: feature_t process(const float *carray); Chris@74: Chris@37: protected: Chris@37: /** Make either standard or chroma map, depending on m_params */ Chris@37: void makeFreqMap(); Chris@37: Chris@37: /** Creates a map of FFT frequency bins to comparison bins. Where Chris@37: * the spacing of FFT bins is less than 0.5 semitones, the Chris@37: * mapping is one to one. Where the spacing is greater than 0.5 Chris@37: * semitones, the FFT energy is mapped into semitone-wide Chris@37: * bins. No scaling is performed; that is the energy is summed Chris@37: * into the comparison bins. */ Chris@37: void makeStandardFrequencyMap(); Chris@37: Chris@37: /** Creates a map of FFT frequency bins to semitone chroma bins. */ Chris@37: void makeChromaFrequencyMap(); Chris@37: Chris@37: /** Configuration parameters */ Chris@37: Parameters m_params; Chris@37: Chris@37: /** A mapping function for mapping FFT bins to final frequency Chris@37: * bins. The mapping is linear (1-1) until the resolution Chris@37: * reaches 2 points per semitone, then logarithmic with a Chris@37: * semitone resolution. e.g. for 44.1kHz sampling rate and Chris@37: * fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped Chris@37: * linearly for bins 0-34 (0 to 732Hz), and logarithmically for Chris@37: * the remaining bins (midi notes 79 to 127, bins 35 to 83), Chris@37: * where all energy above note 127 is mapped into the final Chris@176: * bin. Chris@176: * Chris@176: * If a bin's frequency is outside the minFrequency->maxFrequency Chris@176: * range, it will be mapped to a target bin of -1 and should be Chris@176: * discarded. Chris@176: */ Chris@37: std::vector m_freqMap; Chris@37: Chris@184: feature_t processMags(const std::vector &mags); Chris@184: std::vector scaleMags(const std::vector &mags); Chris@169: Chris@37: /** The size of a returned feature. */ Chris@37: int m_featureSize; Chris@37: }; Chris@37: Chris@37: #endif Chris@37: