Chris@1
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@1
|
2
|
Chris@1
|
3 /*
|
Chris@3
|
4 Vamp feature extraction plugin for the BeatRoot beat tracker.
|
Chris@1
|
5
|
Chris@3
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@3
|
7 This file copyright 2011 Simon Dixon, Chris Cannam and QMUL.
|
Chris@1
|
8
|
Chris@3
|
9 This program is free software; you can redistribute it and/or
|
Chris@3
|
10 modify it under the terms of the GNU General Public License as
|
Chris@3
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@3
|
12 License, or (at your option) any later version. See the file
|
Chris@3
|
13 COPYING included with this distribution for more information.
|
Chris@1
|
14 */
|
Chris@1
|
15
|
Chris@1
|
16 #ifndef _BEATROOT_PROCESSOR_H_
|
Chris@1
|
17 #define _BEATROOT_PROCESSOR_H_
|
Chris@1
|
18
|
Chris@4
|
19 #include "Peaks.h"
|
Chris@6
|
20 #include "Event.h"
|
Chris@6
|
21 #include "BeatTracker.h"
|
Chris@4
|
22
|
Chris@2
|
23 #include <vector>
|
Chris@3
|
24 #include <cmath>
|
Chris@2
|
25
|
Chris@12
|
26 #ifdef DEBUG_BEATROOT
|
Chris@12
|
27 #include <iostream>
|
Chris@12
|
28 #endif
|
Chris@12
|
29
|
Chris@2
|
30 using std::vector;
|
Chris@2
|
31
|
Chris@1
|
32 class BeatRootProcessor
|
Chris@1
|
33 {
|
Chris@9
|
34 public:
|
Chris@9
|
35 int getFFTSize() const { return fftSize; }
|
Chris@9
|
36 int getHopSize() const { return hopSize; }
|
Chris@9
|
37
|
Chris@1
|
38 protected:
|
Chris@1
|
39 /** Sample rate of audio */
|
Chris@1
|
40 float sampleRate;
|
Chris@1
|
41
|
Chris@1
|
42 /** Spacing of audio frames (determines the amount of overlap or
|
Chris@1
|
43 * skip between frames). This value is expressed in
|
Chris@1
|
44 * seconds. (Default = 0.020s) */
|
Chris@1
|
45 double hopTime;
|
Chris@1
|
46
|
Chris@1
|
47 /** The approximate size of an FFT frame in seconds. (Default =
|
Chris@1
|
48 * 0.04644s). The value is adjusted so that <code>fftSize</code>
|
Chris@1
|
49 * is always power of 2. */
|
Chris@1
|
50 double fftTime;
|
Chris@1
|
51
|
Chris@1
|
52 /** Spacing of audio frames in samples (see <code>hopTime</code>) */
|
Chris@1
|
53 int hopSize;
|
Chris@1
|
54
|
Chris@1
|
55 /** The size of an FFT frame in samples (see <code>fftTime</code>) */
|
Chris@1
|
56 int fftSize;
|
Chris@1
|
57
|
Chris@1
|
58 /** Spectral flux onset detection function, indexed by frame. */
|
Chris@4
|
59 vector<double> spectralFlux;
|
Chris@1
|
60
|
Chris@1
|
61 /** A mapping function for mapping FFT bins to final frequency bins.
|
Chris@1
|
62 * The mapping is linear (1-1) until the resolution reaches 2 points per
|
Chris@1
|
63 * semitone, then logarithmic with a semitone resolution. e.g. for
|
Chris@1
|
64 * 44.1kHz sampling rate and fftSize of 2048 (46ms), bin spacing is
|
Chris@1
|
65 * 21.5Hz, which is mapped linearly for bins 0-34 (0 to 732Hz), and
|
Chris@1
|
66 * logarithmically for the remaining bins (midi notes 79 to 127, bins 35 to
|
Chris@1
|
67 * 83), where all energy above note 127 is mapped into the final bin. */
|
Chris@1
|
68 vector<int> freqMap;
|
Chris@1
|
69
|
Chris@1
|
70 /** The number of entries in <code>freqMap</code>. Note that the length of
|
Chris@1
|
71 * the array is greater, because its size is not known at creation time. */
|
Chris@1
|
72 int freqMapSize;
|
Chris@1
|
73
|
Chris@1
|
74 /** The magnitude spectrum of the most recent frame. Used for
|
Chris@1
|
75 * calculating the spectral flux. */
|
Chris@1
|
76 vector<double> prevFrame;
|
Chris@1
|
77
|
Chris@1
|
78 /** The estimated onset times from peak-picking the onset
|
Chris@1
|
79 * detection function(s). */
|
Chris@1
|
80 vector<double> onsets;
|
Chris@1
|
81
|
Chris@1
|
82 /** The estimated onset times and their saliences. */
|
Chris@6
|
83 EventList onsetList;
|
Chris@1
|
84
|
Chris@1
|
85 /** Flag for suppressing all standard output messages except results. */
|
Chris@2
|
86 static bool silent;
|
Chris@1
|
87
|
Chris@1
|
88 public:
|
Chris@1
|
89
|
Chris@1
|
90 /** Constructor: note that streams are not opened until the input
|
Chris@1
|
91 * file is set (see <code>setInputFile()</code>). */
|
Chris@8
|
92 BeatRootProcessor(float sr) :
|
Chris@8
|
93 sampleRate(sr) {
|
Chris@1
|
94 hopSize = 0;
|
Chris@1
|
95 fftSize = 0;
|
Chris@9
|
96 hopTime = 0.010;
|
Chris@9
|
97 fftTime = 0.04644;
|
Chris@9
|
98 hopSize = lrint(sampleRate * hopTime);
|
Chris@9
|
99 fftSize = lrint(pow(2, lrint( log(fftTime * sampleRate) / log(2))));
|
Chris@1
|
100 } // constructor
|
Chris@1
|
101
|
Chris@9
|
102 void reset() {
|
Chris@9
|
103 init();
|
Chris@9
|
104 }
|
Chris@9
|
105
|
Chris@10
|
106 /** Processes a frame of frequency-domain audio data by mapping
|
Chris@10
|
107 * the frequency bins into a part-linear part-logarithmic array,
|
Chris@10
|
108 * then computing the spectral flux then (optionally) normalising
|
Chris@10
|
109 * and calculating onsets.
|
Chris@10
|
110 */
|
Chris@15
|
111 void processFrame(const float *const *inputBuffers);
|
Chris@10
|
112
|
Chris@10
|
113 /** Tracks beats once all frames have been processed by processFrame
|
Chris@10
|
114 */
|
Chris@15
|
115 EventList beatTrack();
|
Chris@10
|
116
|
Chris@2
|
117 protected:
|
Chris@3
|
118 /** Allocates memory for arrays, based on parameter settings */
|
Chris@3
|
119 void init() {
|
Chris@22
|
120 #ifdef DEBUG_BEATROOT
|
Chris@22
|
121 std::cerr << "BeatRootProcessor::init()" << std::endl;
|
Chris@22
|
122 #endif
|
Chris@3
|
123 makeFreqMap(fftSize, sampleRate);
|
Chris@3
|
124 prevFrame.clear();
|
Chris@10
|
125 for (int i = 0; i <= fftSize/2; i++) prevFrame.push_back(0);
|
Chris@3
|
126 spectralFlux.clear();
|
Chris@22
|
127 onsets.clear();
|
Chris@22
|
128 onsetList.clear();
|
Chris@3
|
129 } // init()
|
Chris@1
|
130
|
Chris@3
|
131 /** Creates a map of FFT frequency bins to comparison bins.
|
Chris@3
|
132 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is
|
Chris@3
|
133 * one to one. Where the spacing is greater than 0.5 semitones, the FFT
|
Chris@3
|
134 * energy is mapped into semitone-wide bins. No scaling is performed; that
|
Chris@3
|
135 * is the energy is summed into the comparison bins. See also
|
Chris@3
|
136 * processFrame()
|
Chris@3
|
137 */
|
Chris@3
|
138 void makeFreqMap(int fftSize, float sampleRate) {
|
Chris@3
|
139 freqMap.resize(fftSize/2+1);
|
Chris@3
|
140 double binWidth = sampleRate / fftSize;
|
Chris@3
|
141 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
|
Chris@3
|
142 int crossoverMidi = (int)lrint(log(crossoverBin*binWidth/440)/
|
Chris@3
|
143 log(2) * 12 + 69);
|
Chris@3
|
144 int i = 0;
|
Chris@22
|
145 while (i <= crossoverBin && i <= fftSize/2) {
|
Chris@22
|
146 freqMap[i] = i;
|
Chris@22
|
147 ++i;
|
Chris@22
|
148 }
|
Chris@3
|
149 while (i <= fftSize/2) {
|
Chris@3
|
150 double midi = log(i*binWidth/440) / log(2) * 12 + 69;
|
Chris@3
|
151 if (midi > 127)
|
Chris@3
|
152 midi = 127;
|
Chris@22
|
153 freqMap[i] = crossoverBin + (int)lrint(midi) - crossoverMidi;
|
Chris@22
|
154 ++i;
|
Chris@3
|
155 }
|
Chris@3
|
156 freqMapSize = freqMap[i-1] + 1;
|
Chris@3
|
157 } // makeFreqMap()
|
Chris@1
|
158
|
Chris@3
|
159 }; // class AudioProcessor
|
Chris@1
|
160
|
Chris@1
|
161
|
Chris@1
|
162 #endif
|