match-vamp: src/FeatureExtractor.h annotate

annotate src/FeatureExtractor.h @ 176:50bf5c5bca34 refactors

Add minimum and maximum frequency parameters to FeatureExtractor

author	Chris Cannam
date	Fri, 13 Feb 2015 15:18:47 +0000
parents	cdbee79699b0
children	1440773da492

rev	line source
Chris@37	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
Chris@37	2
Chris@37	3 /*
Chris@37	4 Vamp feature extraction plugin using the MATCH audio alignment
Chris@37	5 algorithm.
Chris@37	6
Chris@37	7 Centre for Digital Music, Queen Mary, University of London.
Chris@37	8 This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
Chris@37	9
Chris@37	10 This program is free software; you can redistribute it and/or
Chris@37	11 modify it under the terms of the GNU General Public License as
Chris@37	12 published by the Free Software Foundation; either version 2 of the
Chris@37	13 License, or (at your option) any later version. See the file
Chris@37	14 COPYING included with this distribution for more information.
Chris@37	15 */
Chris@37	16
Chris@37	17 #ifndef FEATURE_EXTRACTOR_H
Chris@37	18 #define FEATURE_EXTRACTOR_H
Chris@37	19
Chris@37	20 #include <vector>
Chris@37	21
Chris@37	22 /**
Chris@37	23 * Convert frequency-domain audio frames into features suitable for
Chris@125	24 * MATCH alignment calculation.
Chris@37	25 *
Chris@125	26 * The default feature is a warping of the frequency data to map FFT
Chris@125	27 * frequency bins into feature bins. The mapping is linear (1-1) until
Chris@125	28 * the resolution reaches 2 points per semitone, then logarithmic with
Chris@125	29 * a semitone resolution. e.g. for 44.1kHz sampling rate and fftSize
Chris@125	30 * of 2048 (46ms), bin spacing is 21.5Hz, which is mapped linearly for
Chris@125	31 * bins 0-34 (0 to 732Hz), and logarithmically for the remaining bins
Chris@125	32 * (midi notes 79 to 127, bins 35 to 83), where all energy above note
Chris@125	33 * 127 is mapped into the final bin.
Chris@125	34 *
Chris@125	35 * Alternatively a chroma mapping is also available. This produces a
Chris@125	36 * 13-bin feature by mapping all FFT bins into bin 0 until the
Chris@125	37 * resolution reaches 1 point per semitone, then mapping each
Chris@125	38 * subsequent bin into its corresponding semitone in the remaining 12
Chris@125	39 * bins (where bin 1 is C). e.g. e.g. for 44.1kHz sampling rate and
Chris@125	40 * fftSize of 2048 (46ms), frequencies up to 361 Hz go to bin 0,
Chris@125	41 * subsequent frequencies to the chroma bins.
Chris@37	42 */
Chris@37	43 class FeatureExtractor
Chris@37	44 {
Chris@37	45 public:
Chris@37	46 struct Parameters {
Chris@37	47
Chris@37	48 Parameters(float rate_, int fftSize_) :
Chris@37	49 sampleRate(rate_),
Chris@37	50 useChromaFrequencyMap(false),
Chris@159	51 fftSize(fftSize_),
Chris@176	52 referenceFrequency(440.0),
Chris@176	53 minFrequency(0.),
Chris@176	54 maxFrequency(rate_/2.)
Chris@37	55 {}
Chris@37	56
Chris@37	57 /** Sample rate of audio */
Chris@37	58 float sampleRate;
Chris@37	59
Chris@37	60 /** Flag indicating whether to use a chroma frequency map (12
Chris@37	61 * bins) instead of the default warped spectrogram */
Chris@37	62 bool useChromaFrequencyMap;
Chris@37	63
Chris@37	64 /** Size of an FFT frame in samples. Note that the data passed
Chris@37	65 * in is already in the frequency domain, so this expresses
Chris@37	66 * the size of the frame that the caller will be providing. */
Chris@37	67 int fftSize;
Chris@159	68
Chris@159	69 /** Frequency of concert A */
Chris@159	70 double referenceFrequency;
Chris@176	71
Chris@176	72 /** Minimum frequency cutoff to include in feature */
Chris@176	73 double minFrequency;
Chris@176	74
Chris@176	75 /** Maximum frequency cutoff to include in feature */
Chris@176	76 double maxFrequency;
Chris@37	77 };
Chris@37	78
Chris@37	79 /**
Chris@37	80 * Construct a FeatureExtractor with the given parameters.
Chris@37	81 *
Chris@37	82 * Note that FeatureExtractor maintains internal frame-to-frame
Chris@37	83 * state: use one FeatureExtractor per audio source, and construct
Chris@37	84 * a new one for each new source.
Chris@37	85 */
Chris@37	86 FeatureExtractor(Parameters params);
Chris@37	87
Chris@37	88 /**
Chris@37	89 * Return the feature vector size that will be returned from process().
Chris@37	90 */
Chris@37	91 int getFeatureSize() const { return m_featureSize; }
Chris@74	92
Chris@74	93 /**
Chris@74	94 * Return the feature vector size that would be returned from
Chris@74	95 * process() with these parameters.
Chris@74	96 */
Chris@74	97 static int getFeatureSizeFor(Parameters params);
Chris@37	98
Chris@37	99 /**
Chris@37	100 * Process one frequency-domain audio frame (provided as real &
Chris@37	101 * imaginary components from the FFT output). Return a feature
Chris@38	102 * vector of size given by getFeatureSize(). Input vectors must
Chris@38	103 * have at least params.fftSize/2+1 elements each.
Chris@37	104 *
Chris@37	105 * Operates by mapping the frequency bins into a part-linear
Chris@103	106 * part-logarithmic array, unless useChromaFrequencyMap is true in
Chris@103	107 * which case they are mapped into chroma bins.
Chris@37	108 */
Chris@37	109 std::vector<double> process(const std::vector<double> &real,
Chris@37	110 const std::vector<double> &imag);
Chris@37	111
Chris@74	112 /**
Chris@74	113 * Process one frequency-domain audio frame, provided as a single
Chris@74	114 * array of alternating real and imaginary components. Input array
Chris@74	115 * must have at least 2 * (params.fftSize/2 + 1) elements.
Chris@74	116 *
Chris@74	117 * Operates by mapping the frequency bins into a part-linear
Chris@103	118 * part-logarithmic array, unless useChromaFrequencyMap is true in
Chris@103	119 * which case they are mapped into chroma bins.
Chris@74	120 */
Chris@74	121 std::vector<double> process(const float *carray);
Chris@74	122
Chris@37	123 protected:
Chris@37	124 /** Make either standard or chroma map, depending on m_params */
Chris@37	125 void makeFreqMap();
Chris@37	126
Chris@37	127 /** Creates a map of FFT frequency bins to comparison bins. Where
Chris@37	128 * the spacing of FFT bins is less than 0.5 semitones, the
Chris@37	129 * mapping is one to one. Where the spacing is greater than 0.5
Chris@37	130 * semitones, the FFT energy is mapped into semitone-wide
Chris@37	131 * bins. No scaling is performed; that is the energy is summed
Chris@37	132 * into the comparison bins. */
Chris@37	133 void makeStandardFrequencyMap();
Chris@37	134
Chris@37	135 /** Creates a map of FFT frequency bins to semitone chroma bins. */
Chris@37	136 void makeChromaFrequencyMap();
Chris@37	137
Chris@37	138 /** Configuration parameters */
Chris@37	139 Parameters m_params;
Chris@37	140
Chris@37	141 /** A mapping function for mapping FFT bins to final frequency
Chris@37	142 * bins. The mapping is linear (1-1) until the resolution
Chris@37	143 * reaches 2 points per semitone, then logarithmic with a
Chris@37	144 * semitone resolution. e.g. for 44.1kHz sampling rate and
Chris@37	145 * fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped
Chris@37	146 * linearly for bins 0-34 (0 to 732Hz), and logarithmically for
Chris@37	147 * the remaining bins (midi notes 79 to 127, bins 35 to 83),
Chris@37	148 * where all energy above note 127 is mapped into the final
Chris@176	149 * bin.
Chris@176	150 *
Chris@176	151 * If a bin's frequency is outside the minFrequency->maxFrequency
Chris@176	152 * range, it will be mapped to a target bin of -1 and should be
Chris@176	153 * discarded.
Chris@176	154 */
Chris@37	155 std::vector<int> m_freqMap;
Chris@37	156
Chris@37	157 /** The size of a returned feature. */
Chris@37	158 int m_featureSize;
Chris@37	159 };
Chris@37	160
Chris@37	161 #endif
Chris@37	162

Mercurial > hg > match-vamp

annotate src/FeatureExtractor.h @ 176:50bf5c5bca34 refactors