Chris@1: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@1: Chris@1: /* Chris@3: Vamp feature extraction plugin for the BeatRoot beat tracker. Chris@1: Chris@3: Centre for Digital Music, Queen Mary, University of London. Chris@3: This file copyright 2011 Simon Dixon, Chris Cannam and QMUL. Chris@1: Chris@3: This program is free software; you can redistribute it and/or Chris@3: modify it under the terms of the GNU General Public License as Chris@3: published by the Free Software Foundation; either version 2 of the Chris@3: License, or (at your option) any later version. See the file Chris@3: COPYING included with this distribution for more information. Chris@1: */ Chris@1: Chris@1: #ifndef _BEATROOT_PROCESSOR_H_ Chris@1: #define _BEATROOT_PROCESSOR_H_ Chris@1: Chris@4: #include "Peaks.h" Chris@6: #include "Event.h" Chris@6: #include "BeatTracker.h" Chris@4: Chris@2: #include Chris@3: #include Chris@2: Chris@12: #ifdef DEBUG_BEATROOT Chris@12: #include Chris@12: #endif Chris@12: Chris@2: using std::vector; Chris@2: Chris@1: class BeatRootProcessor Chris@1: { Chris@9: public: Chris@9: int getFFTSize() const { return fftSize; } Chris@9: int getHopSize() const { return hopSize; } Chris@9: Chris@1: protected: Chris@1: /** Sample rate of audio */ Chris@1: float sampleRate; Chris@1: Chris@1: /** Spacing of audio frames (determines the amount of overlap or Chris@1: * skip between frames). This value is expressed in Chris@1: * seconds. (Default = 0.020s) */ Chris@1: double hopTime; Chris@1: Chris@1: /** The approximate size of an FFT frame in seconds. (Default = Chris@1: * 0.04644s). The value is adjusted so that fftSize Chris@1: * is always power of 2. */ Chris@1: double fftTime; Chris@1: Chris@1: /** Spacing of audio frames in samples (see hopTime) */ Chris@1: int hopSize; Chris@1: Chris@1: /** The size of an FFT frame in samples (see fftTime) */ Chris@1: int fftSize; Chris@1: Chris@1: /** Spectral flux onset detection function, indexed by frame. */ Chris@4: vector spectralFlux; Chris@1: Chris@1: /** A mapping function for mapping FFT bins to final frequency bins. Chris@1: * The mapping is linear (1-1) until the resolution reaches 2 points per Chris@1: * semitone, then logarithmic with a semitone resolution. e.g. for Chris@1: * 44.1kHz sampling rate and fftSize of 2048 (46ms), bin spacing is Chris@1: * 21.5Hz, which is mapped linearly for bins 0-34 (0 to 732Hz), and Chris@1: * logarithmically for the remaining bins (midi notes 79 to 127, bins 35 to Chris@1: * 83), where all energy above note 127 is mapped into the final bin. */ Chris@1: vector freqMap; Chris@1: Chris@1: /** The number of entries in freqMap. Note that the length of Chris@1: * the array is greater, because its size is not known at creation time. */ Chris@1: int freqMapSize; Chris@1: Chris@1: /** The magnitude spectrum of the most recent frame. Used for Chris@1: * calculating the spectral flux. */ Chris@1: vector prevFrame; Chris@1: Chris@1: /** The estimated onset times from peak-picking the onset Chris@1: * detection function(s). */ Chris@1: vector onsets; Chris@1: Chris@1: /** The estimated onset times and their saliences. */ Chris@6: EventList onsetList; Chris@23: Chris@23: /** User-specifiable processing parameters. */ Chris@23: AgentParameters agentParameters; Chris@1: Chris@1: /** Flag for suppressing all standard output messages except results. */ Chris@2: static bool silent; Chris@1: Chris@1: public: Chris@1: Chris@1: /** Constructor: note that streams are not opened until the input Chris@1: * file is set (see setInputFile()). */ Chris@23: BeatRootProcessor(float sr, AgentParameters parameters) : Chris@23: sampleRate(sr), Chris@31: hopTime(0.010), Chris@31: fftTime(0.04644), Chris@23: hopSize(0), Chris@23: fftSize(0), Chris@23: agentParameters(parameters) Chris@23: { Chris@9: hopSize = lrint(sampleRate * hopTime); Chris@9: fftSize = lrint(pow(2, lrint( log(fftTime * sampleRate) / log(2)))); Chris@24: init(); Chris@1: } // constructor Chris@1: Chris@9: void reset() { Chris@9: init(); Chris@9: } Chris@9: Chris@10: /** Processes a frame of frequency-domain audio data by mapping Chris@10: * the frequency bins into a part-linear part-logarithmic array, Chris@10: * then computing the spectral flux then (optionally) normalising Chris@10: * and calculating onsets. Chris@10: */ Chris@15: void processFrame(const float *const *inputBuffers); Chris@10: Chris@10: /** Tracks beats once all frames have been processed by processFrame Chris@10: */ Chris@36: EventList beatTrack(EventList *optionalUnfilledBeatReturn); Chris@10: Chris@2: protected: Chris@24: /** Allocates or re-allocates memory for arrays, based on parameter settings */ Chris@3: void init() { Chris@22: #ifdef DEBUG_BEATROOT Chris@22: std::cerr << "BeatRootProcessor::init()" << std::endl; Chris@22: #endif Chris@3: makeFreqMap(fftSize, sampleRate); Chris@3: prevFrame.clear(); Chris@10: for (int i = 0; i <= fftSize/2; i++) prevFrame.push_back(0); Chris@3: spectralFlux.clear(); Chris@22: onsets.clear(); Chris@22: onsetList.clear(); Chris@3: } // init() Chris@1: Chris@3: /** Creates a map of FFT frequency bins to comparison bins. Chris@3: * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is Chris@3: * one to one. Where the spacing is greater than 0.5 semitones, the FFT Chris@3: * energy is mapped into semitone-wide bins. No scaling is performed; that Chris@3: * is the energy is summed into the comparison bins. See also Chris@3: * processFrame() Chris@3: */ Chris@3: void makeFreqMap(int fftSize, float sampleRate) { Chris@3: freqMap.resize(fftSize/2+1); Chris@3: double binWidth = sampleRate / fftSize; Chris@3: int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); Chris@3: int crossoverMidi = (int)lrint(log(crossoverBin*binWidth/440)/ Chris@3: log(2) * 12 + 69); Chris@3: int i = 0; Chris@22: while (i <= crossoverBin && i <= fftSize/2) { Chris@22: freqMap[i] = i; Chris@22: ++i; Chris@22: } Chris@3: while (i <= fftSize/2) { Chris@3: double midi = log(i*binWidth/440) / log(2) * 12 + 69; Chris@3: if (midi > 127) Chris@3: midi = 127; Chris@22: freqMap[i] = crossoverBin + (int)lrint(midi) - crossoverMidi; Chris@22: ++i; Chris@3: } Chris@3: freqMapSize = freqMap[i-1] + 1; Chris@3: } // makeFreqMap() Chris@1: Chris@3: }; // class AudioProcessor Chris@1: Chris@1: Chris@1: #endif