Chris@1
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@1
|
2
|
Chris@1
|
3 /*
|
Chris@3
|
4 Vamp feature extraction plugin for the BeatRoot beat tracker.
|
Chris@1
|
5
|
Chris@3
|
6 Centre for Digital Music, Queen Mary, University of London.
|
Chris@3
|
7 This file copyright 2011 Simon Dixon, Chris Cannam and QMUL.
|
Chris@1
|
8
|
Chris@3
|
9 This program is free software; you can redistribute it and/or
|
Chris@3
|
10 modify it under the terms of the GNU General Public License as
|
Chris@3
|
11 published by the Free Software Foundation; either version 2 of the
|
Chris@3
|
12 License, or (at your option) any later version. See the file
|
Chris@3
|
13 COPYING included with this distribution for more information.
|
Chris@1
|
14 */
|
Chris@1
|
15
|
Chris@1
|
16 #ifndef _BEATROOT_PROCESSOR_H_
|
Chris@1
|
17 #define _BEATROOT_PROCESSOR_H_
|
Chris@1
|
18
|
Chris@4
|
19 #include "Peaks.h"
|
Chris@6
|
20 #include "Event.h"
|
Chris@6
|
21 #include "BeatTracker.h"
|
Chris@4
|
22
|
Chris@2
|
23 #include <vector>
|
Chris@3
|
24 #include <cmath>
|
Chris@2
|
25
|
Chris@12
|
26 #ifdef DEBUG_BEATROOT
|
Chris@12
|
27 #include <iostream>
|
Chris@12
|
28 #endif
|
Chris@12
|
29
|
Chris@2
|
30 using std::vector;
|
Chris@2
|
31
|
Chris@1
|
32 class BeatRootProcessor
|
Chris@1
|
33 {
|
Chris@9
|
34 public:
|
Chris@9
|
35 int getFFTSize() const { return fftSize; }
|
Chris@9
|
36 int getHopSize() const { return hopSize; }
|
Chris@9
|
37
|
Chris@1
|
38 protected:
|
Chris@1
|
39 /** Sample rate of audio */
|
Chris@1
|
40 float sampleRate;
|
Chris@1
|
41
|
Chris@1
|
42 /** Spacing of audio frames (determines the amount of overlap or
|
Chris@1
|
43 * skip between frames). This value is expressed in
|
Chris@1
|
44 * seconds. (Default = 0.020s) */
|
Chris@1
|
45 double hopTime;
|
Chris@1
|
46
|
Chris@1
|
47 /** The approximate size of an FFT frame in seconds. (Default =
|
Chris@1
|
48 * 0.04644s). The value is adjusted so that <code>fftSize</code>
|
Chris@1
|
49 * is always power of 2. */
|
Chris@1
|
50 double fftTime;
|
Chris@1
|
51
|
Chris@1
|
52 /** Spacing of audio frames in samples (see <code>hopTime</code>) */
|
Chris@1
|
53 int hopSize;
|
Chris@1
|
54
|
Chris@1
|
55 /** The size of an FFT frame in samples (see <code>fftTime</code>) */
|
Chris@1
|
56 int fftSize;
|
Chris@1
|
57
|
Chris@1
|
58 /** Spectral flux onset detection function, indexed by frame. */
|
Chris@4
|
59 vector<double> spectralFlux;
|
Chris@1
|
60
|
Chris@1
|
61 /** A mapping function for mapping FFT bins to final frequency bins.
|
Chris@1
|
62 * The mapping is linear (1-1) until the resolution reaches 2 points per
|
Chris@1
|
63 * semitone, then logarithmic with a semitone resolution. e.g. for
|
Chris@1
|
64 * 44.1kHz sampling rate and fftSize of 2048 (46ms), bin spacing is
|
Chris@1
|
65 * 21.5Hz, which is mapped linearly for bins 0-34 (0 to 732Hz), and
|
Chris@1
|
66 * logarithmically for the remaining bins (midi notes 79 to 127, bins 35 to
|
Chris@1
|
67 * 83), where all energy above note 127 is mapped into the final bin. */
|
Chris@1
|
68 vector<int> freqMap;
|
Chris@1
|
69
|
Chris@1
|
70 /** The number of entries in <code>freqMap</code>. Note that the length of
|
Chris@1
|
71 * the array is greater, because its size is not known at creation time. */
|
Chris@1
|
72 int freqMapSize;
|
Chris@1
|
73
|
Chris@1
|
74 /** The magnitude spectrum of the most recent frame. Used for
|
Chris@1
|
75 * calculating the spectral flux. */
|
Chris@1
|
76 vector<double> prevFrame;
|
Chris@1
|
77
|
Chris@1
|
78 /** The estimated onset times from peak-picking the onset
|
Chris@1
|
79 * detection function(s). */
|
Chris@1
|
80 vector<double> onsets;
|
Chris@1
|
81
|
Chris@1
|
82 /** The estimated onset times and their saliences. */
|
Chris@6
|
83 EventList onsetList;
|
Chris@23
|
84
|
Chris@23
|
85 /** User-specifiable processing parameters. */
|
Chris@23
|
86 AgentParameters agentParameters;
|
Chris@1
|
87
|
Chris@1
|
88 /** Flag for suppressing all standard output messages except results. */
|
Chris@2
|
89 static bool silent;
|
Chris@1
|
90
|
Chris@1
|
91 public:
|
Chris@1
|
92
|
Chris@1
|
93 /** Constructor: note that streams are not opened until the input
|
Chris@1
|
94 * file is set (see <code>setInputFile()</code>). */
|
Chris@23
|
95 BeatRootProcessor(float sr, AgentParameters parameters) :
|
Chris@23
|
96 sampleRate(sr),
|
Chris@31
|
97 hopTime(0.010),
|
Chris@31
|
98 fftTime(0.04644),
|
Chris@23
|
99 hopSize(0),
|
Chris@23
|
100 fftSize(0),
|
Chris@23
|
101 agentParameters(parameters)
|
Chris@23
|
102 {
|
Chris@9
|
103 hopSize = lrint(sampleRate * hopTime);
|
Chris@9
|
104 fftSize = lrint(pow(2, lrint( log(fftTime * sampleRate) / log(2))));
|
Chris@24
|
105 init();
|
Chris@1
|
106 } // constructor
|
Chris@1
|
107
|
Chris@9
|
108 void reset() {
|
Chris@9
|
109 init();
|
Chris@9
|
110 }
|
Chris@9
|
111
|
Chris@10
|
112 /** Processes a frame of frequency-domain audio data by mapping
|
Chris@10
|
113 * the frequency bins into a part-linear part-logarithmic array,
|
Chris@10
|
114 * then computing the spectral flux then (optionally) normalising
|
Chris@10
|
115 * and calculating onsets.
|
Chris@10
|
116 */
|
Chris@15
|
117 void processFrame(const float *const *inputBuffers);
|
Chris@10
|
118
|
Chris@10
|
119 /** Tracks beats once all frames have been processed by processFrame
|
Chris@10
|
120 */
|
Chris@36
|
121 EventList beatTrack(EventList *optionalUnfilledBeatReturn);
|
Chris@10
|
122
|
Chris@2
|
123 protected:
|
Chris@24
|
124 /** Allocates or re-allocates memory for arrays, based on parameter settings */
|
Chris@3
|
125 void init() {
|
Chris@22
|
126 #ifdef DEBUG_BEATROOT
|
Chris@22
|
127 std::cerr << "BeatRootProcessor::init()" << std::endl;
|
Chris@22
|
128 #endif
|
Chris@3
|
129 makeFreqMap(fftSize, sampleRate);
|
Chris@3
|
130 prevFrame.clear();
|
Chris@10
|
131 for (int i = 0; i <= fftSize/2; i++) prevFrame.push_back(0);
|
Chris@3
|
132 spectralFlux.clear();
|
Chris@22
|
133 onsets.clear();
|
Chris@22
|
134 onsetList.clear();
|
Chris@3
|
135 } // init()
|
Chris@1
|
136
|
Chris@3
|
137 /** Creates a map of FFT frequency bins to comparison bins.
|
Chris@3
|
138 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is
|
Chris@3
|
139 * one to one. Where the spacing is greater than 0.5 semitones, the FFT
|
Chris@3
|
140 * energy is mapped into semitone-wide bins. No scaling is performed; that
|
Chris@3
|
141 * is the energy is summed into the comparison bins. See also
|
Chris@3
|
142 * processFrame()
|
Chris@3
|
143 */
|
Chris@3
|
144 void makeFreqMap(int fftSize, float sampleRate) {
|
Chris@3
|
145 freqMap.resize(fftSize/2+1);
|
Chris@3
|
146 double binWidth = sampleRate / fftSize;
|
Chris@3
|
147 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1));
|
Chris@3
|
148 int crossoverMidi = (int)lrint(log(crossoverBin*binWidth/440)/
|
Chris@3
|
149 log(2) * 12 + 69);
|
Chris@3
|
150 int i = 0;
|
Chris@22
|
151 while (i <= crossoverBin && i <= fftSize/2) {
|
Chris@22
|
152 freqMap[i] = i;
|
Chris@22
|
153 ++i;
|
Chris@22
|
154 }
|
Chris@3
|
155 while (i <= fftSize/2) {
|
Chris@3
|
156 double midi = log(i*binWidth/440) / log(2) * 12 + 69;
|
Chris@3
|
157 if (midi > 127)
|
Chris@3
|
158 midi = 127;
|
Chris@22
|
159 freqMap[i] = crossoverBin + (int)lrint(midi) - crossoverMidi;
|
Chris@22
|
160 ++i;
|
Chris@3
|
161 }
|
Chris@3
|
162 freqMapSize = freqMap[i-1] + 1;
|
Chris@3
|
163 } // makeFreqMap()
|
Chris@1
|
164
|
Chris@3
|
165 }; // class AudioProcessor
|
Chris@1
|
166
|
Chris@1
|
167
|
Chris@1
|
168 #endif
|