comparison dsp/tempotracking/DownBeat.cpp @ 55:7fe29d8a7eaf

* Various fixes related to the bar estimator code
author cannam
date Tue, 10 Feb 2009 16:37:11 +0000
parents 5bec06ecc88a
children a0f987c06bec
comparison
equal deleted inserted replaced
54:5bec06ecc88a 55:7fe29d8a7eaf
10 10
11 #include "DownBeat.h" 11 #include "DownBeat.h"
12 12
13 #include "maths/MathAliases.h" 13 #include "maths/MathAliases.h"
14 #include "maths/MathUtilities.h" 14 #include "maths/MathUtilities.h"
15 #include "maths/KLDivergence.h"
15 #include "dsp/transforms/FFT.h" 16 #include "dsp/transforms/FFT.h"
16 17
17 #include <iostream> 18 #include <iostream>
18 #include <cstdlib> 19 #include <cstdlib>
19 20
20 DownBeat::DownBeat(float originalSampleRate, 21 DownBeat::DownBeat(float originalSampleRate,
21 size_t decimationFactor, 22 size_t decimationFactor,
22 size_t dfIncrement) : 23 size_t dfIncrement) :
24 m_bpb(0),
23 m_rate(originalSampleRate), 25 m_rate(originalSampleRate),
24 m_factor(decimationFactor), 26 m_factor(decimationFactor),
25 m_increment(dfIncrement), 27 m_increment(dfIncrement),
26 m_decimator1(0), 28 m_decimator1(0),
27 m_decimator2(0), 29 m_decimator2(0),
32 m_beatframe(0) 34 m_beatframe(0)
33 { 35 {
34 // beat frame size is next power of two up from 1.3 seconds at the 36 // beat frame size is next power of two up from 1.3 seconds at the
35 // downsampled rate (happens to produce 4096 for 44100 or 48000 at 37 // downsampled rate (happens to produce 4096 for 44100 or 48000 at
36 // 16x decimation, which is our expected normal situation) 38 // 16x decimation, which is our expected normal situation)
37 int bfs = int((m_rate / decimationFactor) * 1.3); 39 m_beatframesize = MathUtilities::nextPowerOfTwo
38 m_beatframesize = 1; 40 (int((m_rate / decimationFactor) * 1.3));
39 while (bfs) { bfs >>= 1; m_beatframesize <<= 1; }
40 std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl; 41 std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl;
41 m_beatframe = new double[m_beatframesize]; 42 m_beatframe = new double[m_beatframesize];
42 m_fftRealOut = new double[m_beatframesize]; 43 m_fftRealOut = new double[m_beatframesize];
43 m_fftImagOut = new double[m_beatframesize]; 44 m_fftImagOut = new double[m_beatframesize];
44 } 45 }
53 delete[] m_fftRealOut; 54 delete[] m_fftRealOut;
54 delete[] m_fftImagOut; 55 delete[] m_fftImagOut;
55 } 56 }
56 57
57 void 58 void
59 DownBeat::setBeatsPerBar(int bpb)
60 {
61 m_bpb = bpb;
62 }
63
64 void
58 DownBeat::makeDecimators() 65 DownBeat::makeDecimators()
59 { 66 {
60 if (m_factor < 2) return; 67 if (m_factor < 2) return;
61 int highest = Decimator::getHighestSupportedFactor(); 68 int highest = Decimator::getHighestSupportedFactor();
62 if (m_factor <= highest) { 69 if (m_factor <= highest) {
63 m_decimator1 = new Decimator(m_increment, m_factor); 70 m_decimator1 = new Decimator(m_increment, m_factor);
71 std::cerr << "DownBeat: decimator 1 factor " << m_factor << ", size " << m_increment << std::endl;
64 return; 72 return;
65 } 73 }
66 m_decimator1 = new Decimator(m_increment, highest); 74 m_decimator1 = new Decimator(m_increment, highest);
75 std::cerr << "DownBeat: decimator 1 factor " << highest << ", size " << m_increment << std::endl;
67 m_decimator2 = new Decimator(m_increment / highest, m_factor / highest); 76 m_decimator2 = new Decimator(m_increment / highest, m_factor / highest);
68 m_decbuf = new double[m_factor / highest]; 77 std::cerr << "DownBeat: decimator 2 factor " << m_factor / highest << ", size " << m_increment / highest << std::endl;
69 } 78 m_decbuf = new float[m_increment / highest];
70 79 }
71 void 80
72 DownBeat::pushAudioBlock(const double *audio) 81 void
82 DownBeat::pushAudioBlock(const float *audio)
73 { 83 {
74 if (m_buffill + (m_increment / m_factor) > m_bufsiz) { 84 if (m_buffill + (m_increment / m_factor) > m_bufsiz) {
75 if (m_bufsiz == 0) m_bufsiz = m_increment * 16; 85 if (m_bufsiz == 0) m_bufsiz = m_increment * 16;
76 else m_bufsiz = m_bufsiz * 2; 86 else m_bufsiz = m_bufsiz * 2;
77 if (!m_buffer) { 87 if (!m_buffer) {
78 m_buffer = (double *)malloc(m_bufsiz * sizeof(double)); 88 m_buffer = (float *)malloc(m_bufsiz * sizeof(float));
79 } else { 89 } else {
80 std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl; 90 std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl;
81 m_buffer = (double *)realloc(m_buffer, m_bufsiz * sizeof(double)); 91 m_buffer = (float *)realloc(m_buffer, m_bufsiz * sizeof(float));
82 } 92 }
83 } 93 }
84 if (!m_decimator1) makeDecimators(); 94 if (!m_decimator1) makeDecimators();
95 float rmsin = 0, rmsout = 0;
96 for (int i = 0; i < m_increment; ++i) {
97 rmsin += audio[i] * audio[i];
98 }
85 if (m_decimator2) { 99 if (m_decimator2) {
86 m_decimator1->process(audio, m_decbuf); 100 m_decimator1->process(audio, m_decbuf);
87 m_decimator2->process(m_decbuf, m_buffer + m_buffill); 101 m_decimator2->process(m_decbuf, m_buffer + m_buffill);
88 } else { 102 } else {
89 m_decimator1->process(audio, m_buffer + m_buffill); 103 m_decimator1->process(audio, m_buffer + m_buffill);
90 } 104 }
105 for (int i = 0; i < m_increment / m_factor; ++i) {
106 rmsout += m_buffer[m_buffill + i] * m_buffer[m_buffill + i];
107 }
108 std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl;
91 m_buffill += m_increment / m_factor; 109 m_buffill += m_increment / m_factor;
92 } 110 }
93 111
94 const double * 112 const float *
95 DownBeat::getBufferedAudio(size_t &length) const 113 DownBeat::getBufferedAudio(size_t &length) const
96 { 114 {
97 length = m_buffill; 115 length = m_buffill;
98 return m_buffer; 116 return m_buffer;
99 } 117 }
100 118
101 void 119 void
102 DownBeat::findDownBeats(const double *audio, 120 DownBeat::resetAudioBuffer()
121 {
122 if (m_buffer) free(m_buffer);
123 m_buffill = 0;
124 m_bufsiz = 0;
125 }
126
127 void
128 DownBeat::findDownBeats(const float *audio,
103 size_t audioLength, 129 size_t audioLength,
104 const d_vec_t &beats, 130 const d_vec_t &beats,
105 i_vec_t &downbeats) 131 i_vec_t &downbeats)
106 { 132 {
107 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS 133 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS
122 148
123 // Copy the extents of the current beat from downsampled array 149 // Copy the extents of the current beat from downsampled array
124 // into beat frame buffer 150 // into beat frame buffer
125 151
126 size_t beatstart = (beats[i] * m_increment) / m_factor; 152 size_t beatstart = (beats[i] * m_increment) / m_factor;
127 size_t beatend = (beats[i] * m_increment) / m_factor; 153 size_t beatend = (beats[i+1] * m_increment) / m_factor;
128 if (beatend >= audioLength) beatend = audioLength - 1; 154 if (beatend >= audioLength) beatend = audioLength - 1;
129 if (beatend < beatstart) beatend = beatstart; 155 if (beatend < beatstart) beatend = beatstart;
130 size_t beatlen = beatend - beatstart; 156 size_t beatlen = beatend - beatstart;
131 157
132 // Also apply a Hanning window to the beat frame buffer, sized 158 // Also apply a Hanning window to the beat frame buffer, sized
133 // to the beat extents rather than the frame size. (Because 159 // to the beat extents rather than the frame size. (Because
134 // the size varies, it's easier to do this by hand than use 160 // the size varies, it's easier to do this by hand than use
135 // our Window abstraction.) 161 // our Window abstraction.)
136 162
163 float rms = 0;
137 for (size_t j = 0; j < beatlen; ++j) { 164 for (size_t j = 0; j < beatlen; ++j) {
138 double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen)))); 165 double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen))));
139 m_beatframe[j] = audio[beatstart + j] * mul; 166 m_beatframe[j] = audio[beatstart + j] * mul;
140 } 167 rms += m_beatframe[j] * m_beatframe[j];
168 }
169 rms = sqrt(rms);
170 std::cerr << "beat " << i << ": audio rms " << rms << std::endl;
141 171
142 for (size_t j = beatlen; j < m_beatframesize; ++j) { 172 for (size_t j = beatlen; j < m_beatframesize; ++j) {
143 m_beatframe[j] = 0.0; 173 m_beatframe[j] = 0.0;
144 } 174 }
145 175
160 MathUtilities::adaptiveThreshold(newspec); 190 MathUtilities::adaptiveThreshold(newspec);
161 191
162 // Calculate JS divergence between new and old spectral frames 192 // Calculate JS divergence between new and old spectral frames
163 193
164 specdiff.push_back(measureSpecDiff(oldspec, newspec)); 194 specdiff.push_back(measureSpecDiff(oldspec, newspec));
195 // specdiff.push_back(KLDivergence().distanceDistribution(oldspec, newspec, false));
196
197 std::cerr << "specdiff: " << specdiff[specdiff.size()-1] << std::endl;
165 198
166 // Copy newspec across to old 199 // Copy newspec across to old
167 200
168 for (size_t j = 0; j < m_beatframesize/2; ++j) { 201 for (size_t j = 0; j < m_beatframesize/2; ++j) {
169 oldspec[j] = newspec[j]; 202 oldspec[j] = newspec[j];
170 } 203 }
171 } 204 }
172 205
173 // We now have all spectral difference measures in specdiff 206 // We now have all spectral difference measures in specdiff
174 207
175 uint timesig = 4; // SHOULD REPLACE THIS WITH A FIND_METER FUNCTION - OR USER PARAMETER 208 uint timesig = m_bpb;
209 if (timesig == 0) timesig = 4;
210
176 d_vec_t dbcand(timesig); // downbeat candidates 211 d_vec_t dbcand(timesig); // downbeat candidates
212
213 for (int beat = 0; beat < timesig; ++beat) {
214 dbcand[beat] = 0;
215 }
177 216
178 // look for beat transition which leads to greatest spectral change 217 // look for beat transition which leads to greatest spectral change
179 for (int beat = 0; beat < timesig; ++beat) { 218 for (int beat = 0; beat < timesig; ++beat) {
180 for (int example = beat; example < specdiff.size(); ++example) { 219 for (int example = beat; example < specdiff.size(); example += timesig) {
181 dbcand[beat] += (specdiff[example]) / timesig; 220 dbcand[beat] += (specdiff[example]) / timesig;
182 } 221 }
183 } 222 std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl;
223 }
224
184 225
185 // first downbeat is beat at index of maximum value of dbcand 226 // first downbeat is beat at index of maximum value of dbcand
186 int dbind = MathUtilities::getMax(dbcand); 227 int dbind = MathUtilities::getMax(dbcand);
187 228
188 // remaining downbeats are at timesig intervals from the first 229 // remaining downbeats are at timesig intervals from the first