view src/FeatureExtractor.h @ 103:593054bf6476 feature_conditioner

Pull out normalisation and specdiff stuff into FeatureConditioner
author Chris Cannam
date Thu, 04 Dec 2014 13:05:16 +0000
parents b9aa663a607b
children 3792bcd34470
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    Vamp feature extraction plugin using the MATCH audio alignment
    algorithm.

    Centre for Digital Music, Queen Mary, University of London.
    This file copyright 2007 Simon Dixon, Chris Cannam and QMUL.
    
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#ifndef FEATURE_EXTRACTOR_H
#define FEATURE_EXTRACTOR_H

#include <vector>

/**
 * Convert frequency-domain audio frames into features suitable for
 * MATCH alignment calculation. The default feature is a warping of
 * the frequency data to map higher frequencies into a linear scale. A
 * chroma mapping is also available.
 *
 * Note that FeatureExtractor may maintain internal frame-to-frame
 * state: use one FeatureExtractor per audio source, and construct a
 * new one for each new source.
 */
class FeatureExtractor
{
public:
    struct Parameters {

        Parameters(float rate_, int fftSize_) :
            sampleRate(rate_),
            useChromaFrequencyMap(false),
            fftSize(fftSize_)
        {}

        /** Sample rate of audio */
        float sampleRate;

        /** Flag indicating whether to use a chroma frequency map (12
         *  bins) instead of the default warped spectrogram */
        bool useChromaFrequencyMap;

        /** Spacing of audio frames (determines the amount of overlap or
         *  skip between frames). This value is expressed in
         *  seconds. */
        double hopTime;

        /** Size of an FFT frame in samples. Note that the data passed
         *  in is already in the frequency domain, so this expresses
         *  the size of the frame that the caller will be providing. */
        int fftSize;
    };

    /**
     * Construct a FeatureExtractor with the given parameters.
     *
     * Note that FeatureExtractor maintains internal frame-to-frame
     * state: use one FeatureExtractor per audio source, and construct
     * a new one for each new source.
     */
    FeatureExtractor(Parameters params);

    /**
     * Return the feature vector size that will be returned from process().
     */
    int getFeatureSize() const { return m_featureSize; }

    /**
     * Return the feature vector size that would be returned from
     * process() with these parameters.
     */
    static int getFeatureSizeFor(Parameters params);
    
    /**
     * Process one frequency-domain audio frame (provided as real &
     * imaginary components from the FFT output). Return a feature
     * vector of size given by getFeatureSize(). Input vectors must
     * have at least params.fftSize/2+1 elements each.
     *
     * Operates by mapping the frequency bins into a part-linear
     * part-logarithmic array, unless useChromaFrequencyMap is true in
     * which case they are mapped into chroma bins.
     */
    std::vector<double> process(const std::vector<double> &real,
                                const std::vector<double> &imag);
    
    /**
     * Process one frequency-domain audio frame, provided as a single
     * array of alternating real and imaginary components. Input array
     * must have at least 2 * (params.fftSize/2 + 1) elements.
     *
     * Operates by mapping the frequency bins into a part-linear
     * part-logarithmic array, unless useChromaFrequencyMap is true in
     * which case they are mapped into chroma bins.
     */
    std::vector<double> process(const float *carray);
    
protected:
    /** Make either standard or chroma map, depending on m_params */
    void makeFreqMap();

    /** Creates a map of FFT frequency bins to comparison bins.  Where
     *  the spacing of FFT bins is less than 0.5 semitones, the
     *  mapping is one to one. Where the spacing is greater than 0.5
     *  semitones, the FFT energy is mapped into semitone-wide
     *  bins. No scaling is performed; that is the energy is summed
     *  into the comparison bins. */
    void makeStandardFrequencyMap();

    /** Creates a map of FFT frequency bins to semitone chroma bins. */
    void makeChromaFrequencyMap();

    /** Configuration parameters */
    Parameters m_params;

    /** A mapping function for mapping FFT bins to final frequency
     *  bins.  The mapping is linear (1-1) until the resolution
     *  reaches 2 points per semitone, then logarithmic with a
     *  semitone resolution.  e.g. for 44.1kHz sampling rate and
     *  fftSize of 2048 (46ms), bin spacing is 21.5Hz, which is mapped
     *  linearly for bins 0-34 (0 to 732Hz), and logarithmically for
     *  the remaining bins (midi notes 79 to 127, bins 35 to 83),
     *  where all energy above note 127 is mapped into the final
     *  bin. */
    std::vector<int> m_freqMap;

    /** The size of a returned feature. */
    int m_featureSize;
};

#endif