To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / BeatRootProcessor.h @ 31:b9c2f444cdaa
History | View | Annotate | Download (5.48 KB)
| 1 |
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
|---|---|
| 2 |
|
| 3 |
/*
|
| 4 |
Vamp feature extraction plugin for the BeatRoot beat tracker.
|
| 5 |
|
| 6 |
Centre for Digital Music, Queen Mary, University of London.
|
| 7 |
This file copyright 2011 Simon Dixon, Chris Cannam and QMUL.
|
| 8 |
|
| 9 |
This program is free software; you can redistribute it and/or
|
| 10 |
modify it under the terms of the GNU General Public License as
|
| 11 |
published by the Free Software Foundation; either version 2 of the
|
| 12 |
License, or (at your option) any later version. See the file
|
| 13 |
COPYING included with this distribution for more information.
|
| 14 |
*/
|
| 15 |
|
| 16 |
#ifndef _BEATROOT_PROCESSOR_H_
|
| 17 |
#define _BEATROOT_PROCESSOR_H_
|
| 18 |
|
| 19 |
#include "Peaks.h" |
| 20 |
#include "Event.h" |
| 21 |
#include "BeatTracker.h" |
| 22 |
|
| 23 |
#include <vector> |
| 24 |
#include <cmath> |
| 25 |
|
| 26 |
#ifdef DEBUG_BEATROOT
|
| 27 |
#include <iostream> |
| 28 |
#endif
|
| 29 |
|
| 30 |
using std::vector; |
| 31 |
|
| 32 |
class BeatRootProcessor |
| 33 |
{
|
| 34 |
public:
|
| 35 |
int getFFTSize() const { return fftSize; } |
| 36 |
int getHopSize() const { return hopSize; } |
| 37 |
|
| 38 |
protected:
|
| 39 |
/** Sample rate of audio */
|
| 40 |
float sampleRate;
|
| 41 |
|
| 42 |
/** Spacing of audio frames (determines the amount of overlap or
|
| 43 |
* skip between frames). This value is expressed in
|
| 44 |
* seconds. (Default = 0.020s) */
|
| 45 |
double hopTime;
|
| 46 |
|
| 47 |
/** The approximate size of an FFT frame in seconds. (Default =
|
| 48 |
* 0.04644s). The value is adjusted so that <code>fftSize</code>
|
| 49 |
* is always power of 2. */
|
| 50 |
double fftTime;
|
| 51 |
|
| 52 |
/** Spacing of audio frames in samples (see <code>hopTime</code>) */
|
| 53 |
int hopSize;
|
| 54 |
|
| 55 |
/** The size of an FFT frame in samples (see <code>fftTime</code>) */
|
| 56 |
int fftSize;
|
| 57 |
|
| 58 |
/** Spectral flux onset detection function, indexed by frame. */
|
| 59 |
vector<double> spectralFlux;
|
| 60 |
|
| 61 |
/** A mapping function for mapping FFT bins to final frequency bins.
|
| 62 |
* The mapping is linear (1-1) until the resolution reaches 2 points per
|
| 63 |
* semitone, then logarithmic with a semitone resolution. e.g. for
|
| 64 |
* 44.1kHz sampling rate and fftSize of 2048 (46ms), bin spacing is
|
| 65 |
* 21.5Hz, which is mapped linearly for bins 0-34 (0 to 732Hz), and
|
| 66 |
* logarithmically for the remaining bins (midi notes 79 to 127, bins 35 to
|
| 67 |
* 83), where all energy above note 127 is mapped into the final bin. */
|
| 68 |
vector<int> freqMap;
|
| 69 |
|
| 70 |
/** The number of entries in <code>freqMap</code>. Note that the length of
|
| 71 |
* the array is greater, because its size is not known at creation time. */
|
| 72 |
int freqMapSize;
|
| 73 |
|
| 74 |
/** The magnitude spectrum of the most recent frame. Used for
|
| 75 |
* calculating the spectral flux. */
|
| 76 |
vector<double> prevFrame;
|
| 77 |
|
| 78 |
/** The estimated onset times from peak-picking the onset
|
| 79 |
* detection function(s). */
|
| 80 |
vector<double> onsets;
|
| 81 |
|
| 82 |
/** The estimated onset times and their saliences. */
|
| 83 |
EventList onsetList; |
| 84 |
|
| 85 |
/** User-specifiable processing parameters. */
|
| 86 |
AgentParameters agentParameters; |
| 87 |
|
| 88 |
/** Flag for suppressing all standard output messages except results. */
|
| 89 |
static bool silent; |
| 90 |
|
| 91 |
public:
|
| 92 |
|
| 93 |
/** Constructor: note that streams are not opened until the input
|
| 94 |
* file is set (see <code>setInputFile()</code>). */
|
| 95 |
BeatRootProcessor(float sr, AgentParameters parameters) :
|
| 96 |
sampleRate(sr), |
| 97 |
hopTime(0.010), |
| 98 |
fftTime(0.04644), |
| 99 |
hopSize(0),
|
| 100 |
fftSize(0),
|
| 101 |
agentParameters(parameters) |
| 102 |
{
|
| 103 |
hopSize = lrint(sampleRate * hopTime); |
| 104 |
fftSize = lrint(pow(2, lrint( log(fftTime * sampleRate) / log(2)))); |
| 105 |
init(); |
| 106 |
} // constructor
|
| 107 |
|
| 108 |
void reset() {
|
| 109 |
init(); |
| 110 |
} |
| 111 |
|
| 112 |
/** Processes a frame of frequency-domain audio data by mapping
|
| 113 |
* the frequency bins into a part-linear part-logarithmic array,
|
| 114 |
* then computing the spectral flux then (optionally) normalising
|
| 115 |
* and calculating onsets.
|
| 116 |
*/
|
| 117 |
void processFrame(const float *const *inputBuffers); |
| 118 |
|
| 119 |
/** Tracks beats once all frames have been processed by processFrame
|
| 120 |
*/
|
| 121 |
EventList beatTrack(); |
| 122 |
|
| 123 |
protected:
|
| 124 |
/** Allocates or re-allocates memory for arrays, based on parameter settings */
|
| 125 |
void init() {
|
| 126 |
#ifdef DEBUG_BEATROOT
|
| 127 |
std::cerr << "BeatRootProcessor::init()" << std::endl;
|
| 128 |
#endif
|
| 129 |
makeFreqMap(fftSize, sampleRate); |
| 130 |
prevFrame.clear(); |
| 131 |
for (int i = 0; i <= fftSize/2; i++) prevFrame.push_back(0); |
| 132 |
spectralFlux.clear(); |
| 133 |
onsets.clear(); |
| 134 |
onsetList.clear(); |
| 135 |
} // init()
|
| 136 |
|
| 137 |
/** Creates a map of FFT frequency bins to comparison bins.
|
| 138 |
* Where the spacing of FFT bins is less than 0.5 semitones, the mapping is
|
| 139 |
* one to one. Where the spacing is greater than 0.5 semitones, the FFT
|
| 140 |
* energy is mapped into semitone-wide bins. No scaling is performed; that
|
| 141 |
* is the energy is summed into the comparison bins. See also
|
| 142 |
* processFrame()
|
| 143 |
*/
|
| 144 |
void makeFreqMap(int fftSize, float sampleRate) { |
| 145 |
freqMap.resize(fftSize/2+1); |
| 146 |
double binWidth = sampleRate / fftSize;
|
| 147 |
int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); |
| 148 |
int crossoverMidi = (int)lrint(log(crossoverBin*binWidth/440)/ |
| 149 |
log(2) * 12 + 69); |
| 150 |
int i = 0; |
| 151 |
while (i <= crossoverBin && i <= fftSize/2) { |
| 152 |
freqMap[i] = i; |
| 153 |
++i; |
| 154 |
} |
| 155 |
while (i <= fftSize/2) { |
| 156 |
double midi = log(i*binWidth/440) / log(2) * 12 + 69; |
| 157 |
if (midi > 127) |
| 158 |
midi = 127;
|
| 159 |
freqMap[i] = crossoverBin + (int)lrint(midi) - crossoverMidi;
|
| 160 |
++i; |
| 161 |
} |
| 162 |
freqMapSize = freqMap[i-1] + 1; |
| 163 |
} // makeFreqMap()
|
| 164 |
|
| 165 |
}; // class AudioProcessor
|
| 166 |
|
| 167 |
|
| 168 |
#endif
|