Mercurial > hg > qm-dsp
comparison dsp/tempotracking/DownBeat.cpp @ 55:7fe29d8a7eaf
* Various fixes related to the bar estimator code
author | cannam |
---|---|
date | Tue, 10 Feb 2009 16:37:11 +0000 |
parents | 5bec06ecc88a |
children | a0f987c06bec |
comparison
equal
deleted
inserted
replaced
54:5bec06ecc88a | 55:7fe29d8a7eaf |
---|---|
10 | 10 |
11 #include "DownBeat.h" | 11 #include "DownBeat.h" |
12 | 12 |
13 #include "maths/MathAliases.h" | 13 #include "maths/MathAliases.h" |
14 #include "maths/MathUtilities.h" | 14 #include "maths/MathUtilities.h" |
15 #include "maths/KLDivergence.h" | |
15 #include "dsp/transforms/FFT.h" | 16 #include "dsp/transforms/FFT.h" |
16 | 17 |
17 #include <iostream> | 18 #include <iostream> |
18 #include <cstdlib> | 19 #include <cstdlib> |
19 | 20 |
20 DownBeat::DownBeat(float originalSampleRate, | 21 DownBeat::DownBeat(float originalSampleRate, |
21 size_t decimationFactor, | 22 size_t decimationFactor, |
22 size_t dfIncrement) : | 23 size_t dfIncrement) : |
24 m_bpb(0), | |
23 m_rate(originalSampleRate), | 25 m_rate(originalSampleRate), |
24 m_factor(decimationFactor), | 26 m_factor(decimationFactor), |
25 m_increment(dfIncrement), | 27 m_increment(dfIncrement), |
26 m_decimator1(0), | 28 m_decimator1(0), |
27 m_decimator2(0), | 29 m_decimator2(0), |
32 m_beatframe(0) | 34 m_beatframe(0) |
33 { | 35 { |
34 // beat frame size is next power of two up from 1.3 seconds at the | 36 // beat frame size is next power of two up from 1.3 seconds at the |
35 // downsampled rate (happens to produce 4096 for 44100 or 48000 at | 37 // downsampled rate (happens to produce 4096 for 44100 or 48000 at |
36 // 16x decimation, which is our expected normal situation) | 38 // 16x decimation, which is our expected normal situation) |
37 int bfs = int((m_rate / decimationFactor) * 1.3); | 39 m_beatframesize = MathUtilities::nextPowerOfTwo |
38 m_beatframesize = 1; | 40 (int((m_rate / decimationFactor) * 1.3)); |
39 while (bfs) { bfs >>= 1; m_beatframesize <<= 1; } | |
40 std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl; | 41 std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl; |
41 m_beatframe = new double[m_beatframesize]; | 42 m_beatframe = new double[m_beatframesize]; |
42 m_fftRealOut = new double[m_beatframesize]; | 43 m_fftRealOut = new double[m_beatframesize]; |
43 m_fftImagOut = new double[m_beatframesize]; | 44 m_fftImagOut = new double[m_beatframesize]; |
44 } | 45 } |
53 delete[] m_fftRealOut; | 54 delete[] m_fftRealOut; |
54 delete[] m_fftImagOut; | 55 delete[] m_fftImagOut; |
55 } | 56 } |
56 | 57 |
57 void | 58 void |
59 DownBeat::setBeatsPerBar(int bpb) | |
60 { | |
61 m_bpb = bpb; | |
62 } | |
63 | |
64 void | |
58 DownBeat::makeDecimators() | 65 DownBeat::makeDecimators() |
59 { | 66 { |
60 if (m_factor < 2) return; | 67 if (m_factor < 2) return; |
61 int highest = Decimator::getHighestSupportedFactor(); | 68 int highest = Decimator::getHighestSupportedFactor(); |
62 if (m_factor <= highest) { | 69 if (m_factor <= highest) { |
63 m_decimator1 = new Decimator(m_increment, m_factor); | 70 m_decimator1 = new Decimator(m_increment, m_factor); |
71 std::cerr << "DownBeat: decimator 1 factor " << m_factor << ", size " << m_increment << std::endl; | |
64 return; | 72 return; |
65 } | 73 } |
66 m_decimator1 = new Decimator(m_increment, highest); | 74 m_decimator1 = new Decimator(m_increment, highest); |
75 std::cerr << "DownBeat: decimator 1 factor " << highest << ", size " << m_increment << std::endl; | |
67 m_decimator2 = new Decimator(m_increment / highest, m_factor / highest); | 76 m_decimator2 = new Decimator(m_increment / highest, m_factor / highest); |
68 m_decbuf = new double[m_factor / highest]; | 77 std::cerr << "DownBeat: decimator 2 factor " << m_factor / highest << ", size " << m_increment / highest << std::endl; |
69 } | 78 m_decbuf = new float[m_increment / highest]; |
70 | 79 } |
71 void | 80 |
72 DownBeat::pushAudioBlock(const double *audio) | 81 void |
82 DownBeat::pushAudioBlock(const float *audio) | |
73 { | 83 { |
74 if (m_buffill + (m_increment / m_factor) > m_bufsiz) { | 84 if (m_buffill + (m_increment / m_factor) > m_bufsiz) { |
75 if (m_bufsiz == 0) m_bufsiz = m_increment * 16; | 85 if (m_bufsiz == 0) m_bufsiz = m_increment * 16; |
76 else m_bufsiz = m_bufsiz * 2; | 86 else m_bufsiz = m_bufsiz * 2; |
77 if (!m_buffer) { | 87 if (!m_buffer) { |
78 m_buffer = (double *)malloc(m_bufsiz * sizeof(double)); | 88 m_buffer = (float *)malloc(m_bufsiz * sizeof(float)); |
79 } else { | 89 } else { |
80 std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl; | 90 std::cerr << "DownBeat::pushAudioBlock: realloc m_buffer to " << m_bufsiz << std::endl; |
81 m_buffer = (double *)realloc(m_buffer, m_bufsiz * sizeof(double)); | 91 m_buffer = (float *)realloc(m_buffer, m_bufsiz * sizeof(float)); |
82 } | 92 } |
83 } | 93 } |
84 if (!m_decimator1) makeDecimators(); | 94 if (!m_decimator1) makeDecimators(); |
95 float rmsin = 0, rmsout = 0; | |
96 for (int i = 0; i < m_increment; ++i) { | |
97 rmsin += audio[i] * audio[i]; | |
98 } | |
85 if (m_decimator2) { | 99 if (m_decimator2) { |
86 m_decimator1->process(audio, m_decbuf); | 100 m_decimator1->process(audio, m_decbuf); |
87 m_decimator2->process(m_decbuf, m_buffer + m_buffill); | 101 m_decimator2->process(m_decbuf, m_buffer + m_buffill); |
88 } else { | 102 } else { |
89 m_decimator1->process(audio, m_buffer + m_buffill); | 103 m_decimator1->process(audio, m_buffer + m_buffill); |
90 } | 104 } |
105 for (int i = 0; i < m_increment / m_factor; ++i) { | |
106 rmsout += m_buffer[m_buffill + i] * m_buffer[m_buffill + i]; | |
107 } | |
108 std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl; | |
91 m_buffill += m_increment / m_factor; | 109 m_buffill += m_increment / m_factor; |
92 } | 110 } |
93 | 111 |
94 const double * | 112 const float * |
95 DownBeat::getBufferedAudio(size_t &length) const | 113 DownBeat::getBufferedAudio(size_t &length) const |
96 { | 114 { |
97 length = m_buffill; | 115 length = m_buffill; |
98 return m_buffer; | 116 return m_buffer; |
99 } | 117 } |
100 | 118 |
101 void | 119 void |
102 DownBeat::findDownBeats(const double *audio, | 120 DownBeat::resetAudioBuffer() |
121 { | |
122 if (m_buffer) free(m_buffer); | |
123 m_buffill = 0; | |
124 m_bufsiz = 0; | |
125 } | |
126 | |
127 void | |
128 DownBeat::findDownBeats(const float *audio, | |
103 size_t audioLength, | 129 size_t audioLength, |
104 const d_vec_t &beats, | 130 const d_vec_t &beats, |
105 i_vec_t &downbeats) | 131 i_vec_t &downbeats) |
106 { | 132 { |
107 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS | 133 // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS |
122 | 148 |
123 // Copy the extents of the current beat from downsampled array | 149 // Copy the extents of the current beat from downsampled array |
124 // into beat frame buffer | 150 // into beat frame buffer |
125 | 151 |
126 size_t beatstart = (beats[i] * m_increment) / m_factor; | 152 size_t beatstart = (beats[i] * m_increment) / m_factor; |
127 size_t beatend = (beats[i] * m_increment) / m_factor; | 153 size_t beatend = (beats[i+1] * m_increment) / m_factor; |
128 if (beatend >= audioLength) beatend = audioLength - 1; | 154 if (beatend >= audioLength) beatend = audioLength - 1; |
129 if (beatend < beatstart) beatend = beatstart; | 155 if (beatend < beatstart) beatend = beatstart; |
130 size_t beatlen = beatend - beatstart; | 156 size_t beatlen = beatend - beatstart; |
131 | 157 |
132 // Also apply a Hanning window to the beat frame buffer, sized | 158 // Also apply a Hanning window to the beat frame buffer, sized |
133 // to the beat extents rather than the frame size. (Because | 159 // to the beat extents rather than the frame size. (Because |
134 // the size varies, it's easier to do this by hand than use | 160 // the size varies, it's easier to do this by hand than use |
135 // our Window abstraction.) | 161 // our Window abstraction.) |
136 | 162 |
163 float rms = 0; | |
137 for (size_t j = 0; j < beatlen; ++j) { | 164 for (size_t j = 0; j < beatlen; ++j) { |
138 double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen)))); | 165 double mul = 0.5 * (1.0 - cos(TWO_PI * (double(j) / double(beatlen)))); |
139 m_beatframe[j] = audio[beatstart + j] * mul; | 166 m_beatframe[j] = audio[beatstart + j] * mul; |
140 } | 167 rms += m_beatframe[j] * m_beatframe[j]; |
168 } | |
169 rms = sqrt(rms); | |
170 std::cerr << "beat " << i << ": audio rms " << rms << std::endl; | |
141 | 171 |
142 for (size_t j = beatlen; j < m_beatframesize; ++j) { | 172 for (size_t j = beatlen; j < m_beatframesize; ++j) { |
143 m_beatframe[j] = 0.0; | 173 m_beatframe[j] = 0.0; |
144 } | 174 } |
145 | 175 |
160 MathUtilities::adaptiveThreshold(newspec); | 190 MathUtilities::adaptiveThreshold(newspec); |
161 | 191 |
162 // Calculate JS divergence between new and old spectral frames | 192 // Calculate JS divergence between new and old spectral frames |
163 | 193 |
164 specdiff.push_back(measureSpecDiff(oldspec, newspec)); | 194 specdiff.push_back(measureSpecDiff(oldspec, newspec)); |
195 // specdiff.push_back(KLDivergence().distanceDistribution(oldspec, newspec, false)); | |
196 | |
197 std::cerr << "specdiff: " << specdiff[specdiff.size()-1] << std::endl; | |
165 | 198 |
166 // Copy newspec across to old | 199 // Copy newspec across to old |
167 | 200 |
168 for (size_t j = 0; j < m_beatframesize/2; ++j) { | 201 for (size_t j = 0; j < m_beatframesize/2; ++j) { |
169 oldspec[j] = newspec[j]; | 202 oldspec[j] = newspec[j]; |
170 } | 203 } |
171 } | 204 } |
172 | 205 |
173 // We now have all spectral difference measures in specdiff | 206 // We now have all spectral difference measures in specdiff |
174 | 207 |
175 uint timesig = 4; // SHOULD REPLACE THIS WITH A FIND_METER FUNCTION - OR USER PARAMETER | 208 uint timesig = m_bpb; |
209 if (timesig == 0) timesig = 4; | |
210 | |
176 d_vec_t dbcand(timesig); // downbeat candidates | 211 d_vec_t dbcand(timesig); // downbeat candidates |
212 | |
213 for (int beat = 0; beat < timesig; ++beat) { | |
214 dbcand[beat] = 0; | |
215 } | |
177 | 216 |
178 // look for beat transition which leads to greatest spectral change | 217 // look for beat transition which leads to greatest spectral change |
179 for (int beat = 0; beat < timesig; ++beat) { | 218 for (int beat = 0; beat < timesig; ++beat) { |
180 for (int example = beat; example < specdiff.size(); ++example) { | 219 for (int example = beat; example < specdiff.size(); example += timesig) { |
181 dbcand[beat] += (specdiff[example]) / timesig; | 220 dbcand[beat] += (specdiff[example]) / timesig; |
182 } | 221 } |
183 } | 222 std::cerr << "dbcand[" << beat << "] = " << dbcand[beat] << std::endl; |
223 } | |
224 | |
184 | 225 |
185 // first downbeat is beat at index of maximum value of dbcand | 226 // first downbeat is beat at index of maximum value of dbcand |
186 int dbind = MathUtilities::getMax(dbcand); | 227 int dbind = MathUtilities::getMax(dbcand); |
187 | 228 |
188 // remaining downbeats are at timesig intervals from the first | 229 // remaining downbeats are at timesig intervals from the first |