Mercurial > hg > beatroot-vamp
comparison BeatRootProcessor.h @ 3:a821f49c42f0
More pruning, etc
author | Chris Cannam |
---|---|
date | Mon, 20 Jun 2011 16:32:11 +0100 |
parents | 7d4e6b1ff3d1 |
children | c06cf6f7cb04 |
comparison
equal
deleted
inserted
replaced
2:7d4e6b1ff3d1 | 3:a821f49c42f0 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ |
2 | 2 |
3 /* | 3 /* |
4 Vamp feature extraction plugin for the BeatRoot beat tracker. | 4 Vamp feature extraction plugin for the BeatRoot beat tracker. |
5 | 5 |
6 Centre for Digital Music, Queen Mary, University of London. | 6 Centre for Digital Music, Queen Mary, University of London. |
7 This file copyright 2011 Simon Dixon, Chris Cannam and QMUL. | 7 This file copyright 2011 Simon Dixon, Chris Cannam and QMUL. |
8 | 8 |
9 This program is free software; you can redistribute it and/or | 9 This program is free software; you can redistribute it and/or |
10 modify it under the terms of the GNU General Public License as | 10 modify it under the terms of the GNU General Public License as |
11 published by the Free Software Foundation; either version 2 of the | 11 published by the Free Software Foundation; either version 2 of the |
12 License, or (at your option) any later version. See the file | 12 License, or (at your option) any later version. See the file |
13 COPYING included with this distribution for more information. | 13 COPYING included with this distribution for more information. |
14 */ | 14 */ |
15 | 15 |
16 #ifndef _BEATROOT_PROCESSOR_H_ | 16 #ifndef _BEATROOT_PROCESSOR_H_ |
17 #define _BEATROOT_PROCESSOR_H_ | 17 #define _BEATROOT_PROCESSOR_H_ |
18 | 18 |
19 #include <vector> | 19 #include <vector> |
20 #include <cmath> | |
20 | 21 |
21 using std::vector; | 22 using std::vector; |
22 | 23 |
23 class BeatRootProcessor | 24 class BeatRootProcessor |
24 { | 25 { |
76 | 77 |
77 /** The magnitude spectra of all frames, used for plotting the spectrogram. */ | 78 /** The magnitude spectra of all frames, used for plotting the spectrogram. */ |
78 vector<vector<double> > frames; //!!! do we need this? much cheaper to lose it if we don't | 79 vector<vector<double> > frames; //!!! do we need this? much cheaper to lose it if we don't |
79 | 80 |
80 /** The RMS energy of all frames. */ | 81 /** The RMS energy of all frames. */ |
81 vector<double> energy; //!!! unused in beat tracking? | 82 // vector<double> energy; //!!! unused in beat tracking? |
82 | 83 |
83 /** The estimated onset times from peak-picking the onset | 84 /** The estimated onset times from peak-picking the onset |
84 * detection function(s). */ | 85 * detection function(s). */ |
85 vector<double> onsets; | 86 vector<double> onsets; |
86 | 87 |
116 */ | 117 */ |
117 static int normaliseMode; | 118 static int normaliseMode; |
118 | 119 |
119 /** Ratio between rate of sampling the signal energy (for the | 120 /** Ratio between rate of sampling the signal energy (for the |
120 * amplitude envelope) and the hop size */ | 121 * amplitude envelope) and the hop size */ |
121 static int energyOversampleFactor; //!!! not used? | 122 // static int energyOversampleFactor; //!!! not used? |
122 | 123 |
123 public: | 124 public: |
124 | 125 |
125 /** Constructor: note that streams are not opened until the input | 126 /** Constructor: note that streams are not opened until the input |
126 * file is set (see <code>setInputFile()</code>). */ | 127 * file is set (see <code>setInputFile()</code>). */ |
127 BeatRootProcessor() { | 128 BeatRootProcessor() { |
128 cbIndex = 0; | |
129 frameRMS = 0; | 129 frameRMS = 0; |
130 ltAverage = 0; | 130 ltAverage = 0; |
131 frameCount = 0; | 131 frameCount = 0; |
132 hopSize = 0; | 132 hopSize = 0; |
133 fftSize = 0; | 133 fftSize = 0; |
134 hopTime = 0.010; // DEFAULT, overridden with -h | 134 hopTime = 0.010; // DEFAULT, overridden with -h |
135 fftTime = 0.04644; // DEFAULT, overridden with -f | 135 fftTime = 0.04644; // DEFAULT, overridden with -f |
136 totalFrames = -1; //!!! not needed? | |
136 } // constructor | 137 } // constructor |
137 | 138 |
138 protected: | 139 protected: |
139 /** Allocates memory for arrays, based on parameter settings */ | 140 /** Allocates memory for arrays, based on parameter settings */ |
140 void init() { | 141 void init() { |
141 hopSize = (int) Math.round(sampleRate * hopTime); | 142 hopSize = lrint(sampleRate * hopTime); |
142 fftSize = (int) Math.round(Math.pow(2, | 143 fftSize = lrint(pow(2, lrint( log(fftTime * sampleRate) / log(2)))); |
143 Math.round( Math.log(fftTime * sampleRate) / Math.log(2)))); | 144 makeFreqMap(fftSize, sampleRate); |
144 makeFreqMap(fftSize, sampleRate); | 145 prevFrame.clear(); |
145 int buffSize = hopSize * channels * 2; | 146 for (int i = 0; i < freqMapSize; i++) prevFrame.push_back(0); |
146 if ((inputBuffer == null) || (inputBuffer.length != buffSize)) | 147 frameCount = 0; |
147 inputBuffer = new byte[buffSize]; | 148 frameRMS = 0; |
148 if ((circBuffer == null) || (circBuffer.length != fftSize)) { | 149 ltAverage = 0; |
149 circBuffer = new double[fftSize]; | 150 spectralFlux.clear(); |
150 reBuffer = new double[fftSize]; | 151 } // init() |
151 imBuffer = new double[fftSize]; | 152 |
152 prevPhase = new double[fftSize]; | 153 /** Creates a map of FFT frequency bins to comparison bins. |
153 prevPrevPhase = new double[fftSize]; | 154 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is |
154 prevFrame = new double[fftSize]; | 155 * one to one. Where the spacing is greater than 0.5 semitones, the FFT |
155 window = FFT.makeWindow(FFT.HAMMING, fftSize, fftSize); | 156 * energy is mapped into semitone-wide bins. No scaling is performed; that |
156 for (int i=0; i < fftSize; i++) | 157 * is the energy is summed into the comparison bins. See also |
157 window[i] *= Math.sqrt(fftSize); | 158 * processFrame() |
158 } | 159 */ |
159 if (pcmInputStream == rawInputStream) | 160 void makeFreqMap(int fftSize, float sampleRate) { |
160 totalFrames = (int)(pcmInputStream.getFrameLength() / hopSize); | 161 freqMap.resize(fftSize/2+1); |
161 else | 162 double binWidth = sampleRate / fftSize; |
162 totalFrames = (int) (MAX_LENGTH / hopTime); | 163 int crossoverBin = (int)(2 / (pow(2, 1/12.0) - 1)); |
163 if ((newFrame == null) || (newFrame.length != freqMapSize)) { | 164 int crossoverMidi = (int)lrint(log(crossoverBin*binWidth/440)/ |
164 newFrame = new double[freqMapSize]; | 165 log(2) * 12 + 69); |
165 frames = new double[totalFrames][freqMapSize]; | 166 int i = 0; |
166 } else if (frames.length != totalFrames) | 167 while (i <= crossoverBin) |
167 frames = new double[totalFrames][freqMapSize]; | 168 freqMap[i++] = i; |
168 energy = new double[totalFrames*energyOversampleFactor]; | 169 while (i <= fftSize/2) { |
169 phaseDeviation = new double[totalFrames]; | 170 double midi = log(i*binWidth/440) / log(2) * 12 + 69; |
170 spectralFlux = new double[totalFrames]; | 171 if (midi > 127) |
171 frameCount = 0; | 172 midi = 127; |
172 cbIndex = 0; | 173 freqMap[i++] = crossoverBin + (int)lrint(midi) - crossoverMidi; |
173 frameRMS = 0; | 174 } |
174 ltAverage = 0; | 175 freqMapSize = freqMap[i-1] + 1; |
175 } // init() | 176 } // makeFreqMap() |
176 | 177 |
177 /** Closes the input stream(s) associated with this object. */ | 178 /** Processes a frame of audio data by first computing the STFT with a |
178 void closeStreams() { | 179 * Hamming window, then mapping the frequency bins into a part-linear |
179 if (pcmInputStream != null) { | 180 * part-logarithmic array, then computing the spectral flux |
180 try { | 181 * then (optionally) normalising and calculating onsets. |
181 pcmInputStream.close(); | 182 */ |
182 if (pcmInputStream != rawInputStream) | 183 void processFrame(const float *const *inputBuffers) { |
183 rawInputStream.close(); | 184 newFrame.clear(); |
184 if (audioOut != null) { | 185 for (int i = 0; i < freqMapSize; i++) { |
185 audioOut.drain(); | 186 newFrame.push_back(0); |
186 audioOut.close(); | 187 } |
187 } | 188 double flux = 0; |
188 } catch (Exception e) {} | 189 for (int i = 0; i <= fftSize/2; i++) { |
189 pcmInputStream = null; | 190 double mag = sqrt(inputBuffers[0][i*2] * inputBuffers[0][i*2] + |
190 audioOut = null; | 191 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]); |
191 } | 192 if (mag > prevFrame[i]) flux += mag - prevFrame[i]; |
192 } // closeStreams() | 193 prevFrame[i] = mag; |
193 | 194 newFrame[freqMap[i]] += mag; |
194 /** Creates a map of FFT frequency bins to comparison bins. | 195 } |
195 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is | 196 spectralFlux.push_back(flux); |
196 * one to one. Where the spacing is greater than 0.5 semitones, the FFT | 197 frames.push_back(newFrame); |
197 * energy is mapped into semitone-wide bins. No scaling is performed; that | 198 // for (int i = 0; i < freqMapSize; i++) |
198 * is the energy is summed into the comparison bins. See also | 199 // [frameCount][i] = newFrame[i]; |
199 * processFrame() | 200 /* |
200 */ | 201 int index = cbIndex - (fftSize - hopSize); |
201 void makeFreqMap(int fftSize, float sampleRate) { | 202 if (index < 0) |
202 freqMap = new int[fftSize/2+1]; | 203 index += fftSize; |
203 double binWidth = sampleRate / fftSize; | 204 int sz = (fftSize - hopSize) / energyOversampleFactor; |
204 int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1)); | 205 for (int j = 0; j < energyOversampleFactor; j++) { |
205 int crossoverMidi = (int)Math.round(Math.log(crossoverBin*binWidth/440)/ | 206 double newEnergy = 0; |
206 Math.log(2) * 12 + 69); | 207 for (int i = 0; i < sz; i++) { |
207 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; | 208 newEnergy += circBuffer[index] * circBuffer[index]; |
208 int i = 0; | 209 if (++index == fftSize) |
209 while (i <= crossoverBin) | 210 index = 0; |
210 freqMap[i++] = i; | 211 } |
211 while (i <= fftSize/2) { | 212 energy[frameCount * energyOversampleFactor + j] = |
212 double midi = Math.log(i*binWidth/440) / Math.log(2) * 12 + 69; | 213 newEnergy / sz <= 1e-6? 0: log(newEnergy / sz) + 13.816; |
213 if (midi > 127) | 214 }*/ |
214 midi = 127; | 215 |
215 freqMap[i++] = crossoverBin + (int)Math.round(midi) - crossoverMidi; | 216 double decay = frameCount >= 200? 0.99: |
216 } | 217 (frameCount < 100? 0: (frameCount - 100) / 100.0); |
217 freqMapSize = freqMap[i-1] + 1; | 218 |
218 } // makeFreqMap() | 219 //!!! uh-oh -- frameRMS has not been calculated (it came from time-domain signal) -- will always appear silent |
219 | 220 |
220 /** Calculates the weighted phase deviation onset detection function. | 221 if (ltAverage == 0) |
221 * Not used. | 222 ltAverage = frameRMS; |
222 * TODO: Test the change to WPD fn */ | 223 else |
223 void weightedPhaseDeviation() { | 224 ltAverage = ltAverage * decay + frameRMS * (1.0 - decay); |
224 if (frameCount < 2) | 225 if (frameRMS <= silenceThreshold) |
225 phaseDeviation[frameCount] = 0; | 226 for (int i = 0; i < freqMapSize; i++) |
226 else { | 227 frames[frameCount][i] = 0; |
227 for (int i = 0; i < fftSize; i++) { | 228 else { |
228 double pd = imBuffer[i] - 2 * prevPhase[i] + prevPrevPhase[i]; | 229 if (normaliseMode == 1) |
229 double pd1 = Math.abs(Math.IEEEremainder(pd, 2 * Math.PI)); | 230 for (int i = 0; i < freqMapSize; i++) |
230 phaseDeviation[frameCount] += pd1 * reBuffer[i]; | 231 frames[frameCount][i] /= frameRMS; |
231 // System.err.printf("%7.3f %7.3f\n", pd/Math.PI, pd1/Math.PI); | 232 else if (normaliseMode == 2) |
232 } | 233 for (int i = 0; i < freqMapSize; i++) |
233 } | 234 frames[frameCount][i] /= ltAverage; |
234 phaseDeviation[frameCount] /= fftSize * Math.PI; | 235 for (int i = 0; i < freqMapSize; i++) { |
235 double[] tmp = prevPrevPhase; | 236 frames[frameCount][i] = log(frames[frameCount][i]) + rangeThreshold; |
236 prevPrevPhase = prevPhase; | 237 if (frames[frameCount][i] < 0) |
237 prevPhase = imBuffer; | 238 frames[frameCount][i] = 0; |
238 imBuffer = tmp; | 239 } |
239 } // weightedPhaseDeviation() | 240 } |
240 | |
241 /** Reads a frame of input data, averages the channels to mono, scales | |
242 * to a maximum possible absolute value of 1, and stores the audio data | |
243 * in a circular input buffer. | |
244 * @return true if a frame (or part of a frame, if it is the final frame) | |
245 * is read. If a complete frame cannot be read, the InputStream is set | |
246 * to null. | |
247 */ | |
248 bool getFrame() { | |
249 if (pcmInputStream == null) | |
250 return false; | |
251 try { | |
252 int bytesRead = (int) pcmInputStream.read(inputBuffer); | |
253 if ((audioOut != null) && (bytesRead > 0)) | |
254 if (audioOut.write(inputBuffer, 0, bytesRead) != bytesRead) | |
255 System.err.println("Error writing to audio device"); | |
256 if (bytesRead < inputBuffer.length) { | |
257 if (!silent) | |
258 System.err.println("End of input: " + audioFileName); | |
259 closeStreams(); | |
260 return false; | |
261 } | |
262 } catch (IOException e) { | |
263 e.printStackTrace(); | |
264 closeStreams(); | |
265 return false; | |
266 } | |
267 frameRMS = 0; | |
268 double sample; | |
269 switch(channels) { | |
270 case 1: | |
271 for (int i = 0; i < inputBuffer.length; i += 2) { | |
272 sample = ((inputBuffer[i+1]<<8) | | |
273 (inputBuffer[i]&0xff)) / 32768.0; | |
274 frameRMS += sample * sample; | |
275 circBuffer[cbIndex++] = sample; | |
276 if (cbIndex == fftSize) | |
277 cbIndex = 0; | |
278 } | |
279 break; | |
280 case 2: // saves ~0.1% of RT (total input overhead ~0.4%) :) | |
281 for (int i = 0; i < inputBuffer.length; i += 4) { | |
282 sample = (((inputBuffer[i+1]<<8) | (inputBuffer[i]&0xff)) + | |
283 ((inputBuffer[i+3]<<8) | (inputBuffer[i+2]&0xff))) | |
284 / 65536.0; | |
285 frameRMS += sample * sample; | |
286 circBuffer[cbIndex++] = sample; | |
287 if (cbIndex == fftSize) | |
288 cbIndex = 0; | |
289 } | |
290 break; | |
291 default: | |
292 for (int i = 0; i < inputBuffer.length; ) { | |
293 sample = 0; | |
294 for (int j = 0; j < channels; j++, i+=2) | |
295 sample += (inputBuffer[i+1]<<8) | (inputBuffer[i]&0xff); | |
296 sample /= 32768.0 * channels; | |
297 frameRMS += sample * sample; | |
298 circBuffer[cbIndex++] = sample; | |
299 if (cbIndex == fftSize) | |
300 cbIndex = 0; | |
301 } | |
302 } | |
303 frameRMS = Math.sqrt(frameRMS / inputBuffer.length * 2 * channels); | |
304 return true; | |
305 } // getFrame() | |
306 | |
307 /** Processes a frame of audio data by first computing the STFT with a | |
308 * Hamming window, then mapping the frequency bins into a part-linear | |
309 * part-logarithmic array, then computing the spectral flux | |
310 * then (optionally) normalising and calculating onsets. | |
311 */ | |
312 void processFrame() { | |
313 if (getFrame()) { | |
314 for (int i = 0; i < fftSize; i++) { | |
315 reBuffer[i] = window[i] * circBuffer[cbIndex]; | |
316 if (++cbIndex == fftSize) | |
317 cbIndex = 0; | |
318 } | |
319 Arrays.fill(imBuffer, 0); | |
320 FFT.magnitudePhaseFFT(reBuffer, imBuffer); | |
321 Arrays.fill(newFrame, 0); | |
322 double flux = 0; | |
323 for (int i = 0; i <= fftSize/2; i++) { | |
324 if (reBuffer[i] > prevFrame[i]) | |
325 flux += reBuffer[i] - prevFrame[i]; | |
326 newFrame[freqMap[i]] += reBuffer[i]; | |
327 } | |
328 spectralFlux[frameCount] = flux; | |
329 for (int i = 0; i < freqMapSize; i++) | |
330 frames[frameCount][i] = newFrame[i]; | |
331 int index = cbIndex - (fftSize - hopSize); | |
332 if (index < 0) | |
333 index += fftSize; | |
334 int sz = (fftSize - hopSize) / energyOversampleFactor; | |
335 for (int j = 0; j < energyOversampleFactor; j++) { | |
336 double newEnergy = 0; | |
337 for (int i = 0; i < sz; i++) { | |
338 newEnergy += circBuffer[index] * circBuffer[index]; | |
339 if (++index == fftSize) | |
340 index = 0; | |
341 } | |
342 energy[frameCount * energyOversampleFactor + j] = | |
343 newEnergy / sz <= 1e-6? 0: Math.log(newEnergy / sz) + 13.816; | |
344 } | |
345 double decay = frameCount >= 200? 0.99: | |
346 (frameCount < 100? 0: (frameCount - 100) / 100.0); | |
347 if (ltAverage == 0) | |
348 ltAverage = frameRMS; | |
349 else | |
350 ltAverage = ltAverage * decay + frameRMS * (1.0 - decay); | |
351 if (frameRMS <= silenceThreshold) | |
352 for (int i = 0; i < freqMapSize; i++) | |
353 frames[frameCount][i] = 0; | |
354 else { | |
355 if (normaliseMode == 1) | |
356 for (int i = 0; i < freqMapSize; i++) | |
357 frames[frameCount][i] /= frameRMS; | |
358 else if (normaliseMode == 2) | |
359 for (int i = 0; i < freqMapSize; i++) | |
360 frames[frameCount][i] /= ltAverage; | |
361 for (int i = 0; i < freqMapSize; i++) { | |
362 frames[frameCount][i] = Math.log(frames[frameCount][i]) + rangeThreshold; | |
363 if (frames[frameCount][i] < 0) | |
364 frames[frameCount][i] = 0; | |
365 } | |
366 } | |
367 // weightedPhaseDeviation(); | 241 // weightedPhaseDeviation(); |
368 // if (debug) | 242 // if (debug) |
369 // System.err.printf("PhaseDev: t=%7.3f phDev=%7.3f RMS=%7.3f\n", | 243 // System.err.printf("PhaseDev: t=%7.3f phDev=%7.3f RMS=%7.3f\n", |
370 // frameCount * hopTime, | 244 // frameCount * hopTime, |
371 // phaseDeviation[frameCount], | 245 // phaseDeviation[frameCount], |
372 // frameRMS); | 246 // frameRMS); |
373 double[] tmp = prevFrame; | 247 frameCount++; |
374 prevFrame = reBuffer; | 248 } // processFrame() |
375 reBuffer = tmp; | 249 |
376 frameCount++; | 250 /** Processes a complete file of audio data. */ |
377 if ((frameCount % 100) == 0) { | 251 void processFile() { |
378 if (!silent) { | 252 /* |
379 System.err.printf("Progress: %1d %5.3f %5.3f\n", | 253 while (pcmInputStream != null) { |
380 frameCount, frameRMS, ltAverage); | 254 // Profile.start(0); |
381 Profile.report(); | 255 processFrame(); |
382 } | 256 // Profile.log(0); |
383 if ((progressCallback != null) && (totalFrames > 0)) | 257 if (Thread.currentThread().isInterrupted()) { |
384 progressCallback.setFraction((double)frameCount/totalFrames); | 258 System.err.println("info: INTERRUPTED in processFile()"); |
385 } | 259 return; |
386 } | 260 } |
387 } // processFrame() | 261 } |
388 | 262 */ |
389 /** Processes a complete file of audio data. */ | |
390 void processFile() { | |
391 while (pcmInputStream != null) { | |
392 // Profile.start(0); | |
393 processFrame(); | |
394 // Profile.log(0); | |
395 if (Thread.currentThread().isInterrupted()) { | |
396 System.err.println("info: INTERRUPTED in processFile()"); | |
397 return; | |
398 } | |
399 } | |
400 | |
401 // double[] x1 = new double[phaseDeviation.length]; | 263 // double[] x1 = new double[phaseDeviation.length]; |
402 // for (int i = 0; i < x1.length; i++) { | 264 // for (int i = 0; i < x1.length; i++) { |
403 // x1[i] = i * hopTime; | 265 // x1[i] = i * hopTime; |
404 // phaseDeviation[i] = (phaseDeviation[i] - 0.4) * 100; | 266 // phaseDeviation[i] = (phaseDeviation[i] - 0.4) * 100; |
405 // } | 267 // } |
413 // plot.fitAxes(); | 275 // plot.fitAxes(); |
414 | 276 |
415 // double[] slope = new double[energy.length]; | 277 // double[] slope = new double[energy.length]; |
416 // double hop = hopTime / energyOversampleFactor; | 278 // double hop = hopTime / energyOversampleFactor; |
417 // Peaks.getSlope(energy, hop, 15, slope); | 279 // Peaks.getSlope(energy, hop, 15, slope); |
418 // LinkedList<Integer> peaks = Peaks.findPeaks(slope, (int)Math.round(0.06 / hop), 10); | 280 // LinkedList<Integer> peaks = Peaks.findPeaks(slope, (int)lrint(0.06 / hop), 10); |
419 | 281 |
420 double hop = hopTime; | 282 double hop = hopTime; |
421 Peaks.normalise(spectralFlux); | 283 Peaks.normalise(spectralFlux); |
422 LinkedList<Integer> peaks = Peaks.findPeaks(spectralFlux, (int)Math.round(0.06 / hop), 0.35, 0.84, true); | 284 LinkedList<Integer> peaks = Peaks.findPeaks(spectralFlux, (int)lrint(0.06 / hop), 0.35, 0.84, true); |
423 onsets = new double[peaks.size()]; | 285 onsets = new double[peaks.size()]; |
424 double[] y2 = new double[onsets.length]; | 286 double[] y2 = new double[onsets.length]; |
425 Iterator<Integer> it = peaks.iterator(); | 287 Iterator<Integer> it = peaks.iterator(); |
426 onsetList = new EventList(); | 288 onsetList = new EventList(); |
427 double minSalience = Peaks.min(spectralFlux); | 289 double minSalience = Peaks.min(spectralFlux); |
428 for (int i = 0; i < onsets.length; i++) { | 290 for (int i = 0; i < onsets.length; i++) { |
429 int index = it.next(); | 291 int index = it.next(); |
430 onsets[i] = index * hop; | 292 onsets[i] = index * hop; |
431 y2[i] = spectralFlux[index]; | 293 y2[i] = spectralFlux[index]; |
432 Event e = BeatTrackDisplay.newBeat(onsets[i], 0); | 294 Event e = BeatTrackDisplay.newBeat(onsets[i], 0); |
433 // if (debug) | 295 // if (debug) |
434 // System.err.printf("Onset: %8.3f %8.3f %8.3f\n", | 296 // System.err.printf("Onset: %8.3f %8.3f %8.3f\n", |
435 // onsets[i], energy[index], slope[index]); | 297 // onsets[i], energy[index], slope[index]); |
436 // e.salience = slope[index]; // or combination of energy + slope?? | 298 // e.salience = slope[index]; // or combination of energy + slope?? |
437 // Note that salience must be non-negative or the beat tracking system fails! | 299 // Note that salience must be non-negative or the beat tracking system fails! |
438 e.salience = spectralFlux[index] - minSalience; | 300 e.salience = spectralFlux[index] - minSalience; |
439 onsetList.add(e); | 301 onsetList.add(e); |
440 } | 302 } |
441 if (progressCallback != null) | 303 |
442 progressCallback.setFraction(1.0); | 304 //!!! This onsetList is then fed in to BeatTrackDisplay::beatTrack |
443 if (doOnsetPlot) { | 305 |
444 double[] x1 = new double[spectralFlux.length]; | 306 } // processFile() |
445 for (int i = 0; i < x1.length; i++) | 307 |
446 x1[i] = i * hopTime; | 308 }; // class AudioProcessor |
447 plot.addPlot(x1, spectralFlux, Color.red, 4); | |
448 plot.addPlot(onsets, y2, Color.green, 3); | |
449 plot.setTitle("Spectral flux and onsets"); | |
450 plot.fitAxes(); | |
451 } | |
452 if (debug) { | |
453 System.err.printf("Onsets: %d\nContinue? ", onsets.length); | |
454 readLine(); | |
455 } | |
456 } // processFile() | |
457 | |
458 /** Reads a text file containing a list of whitespace-separated feature values. | |
459 * Created for paper submitted to ICASSP'07. | |
460 * @param fileName File containing the data | |
461 * @return An array containing the feature values | |
462 */ | |
463 static double[] getFeatures(String fileName) { | |
464 ArrayList<Double> l = new ArrayList<Double>(); | |
465 try { | |
466 BufferedReader b = new BufferedReader(new FileReader(fileName)); | |
467 while (true) { | |
468 String s = b.readLine(); | |
469 if (s == null) | |
470 break; | |
471 int start = 0; | |
472 while (start < s.length()) { | |
473 int len = s.substring(start).indexOf(' '); | |
474 String t = null; | |
475 if (len < 0) | |
476 t = s.substring(start); | |
477 else if (len > 0) { | |
478 t = s.substring(start, start + len); | |
479 } | |
480 if (t != null) | |
481 try { | |
482 l.add(Double.parseDouble(t)); | |
483 } catch (NumberFormatException e) { | |
484 System.err.println(e); | |
485 if (l.size() == 0) | |
486 l.add(new Double(0)); | |
487 else | |
488 l.add(new Double(l.get(l.size()-1))); | |
489 } | |
490 start += len + 1; | |
491 if (len < 0) | |
492 break; | |
493 } | |
494 } | |
495 double[] features = new double[l.size()]; | |
496 Iterator<Double> it = l.iterator(); | |
497 for (int i = 0; it.hasNext(); i++) | |
498 features[i] = it.next().doubleValue(); | |
499 return features; | |
500 } catch (FileNotFoundException e) { | |
501 e.printStackTrace(); | |
502 return null; | |
503 } catch (IOException e) { | |
504 e.printStackTrace(); | |
505 return null; | |
506 } catch (NumberFormatException e) { | |
507 e.printStackTrace(); | |
508 return null; | |
509 } | |
510 } // getFeatures() | |
511 | |
512 /** Reads a file of feature values, treated as an onset detection function, | |
513 * and finds peaks, which are stored in <code>onsetList</code> and <code>onsets</code>. | |
514 * @param fileName The file of feature values | |
515 * @param hopTime The spacing of feature values in time | |
516 */ | |
517 void processFeatures(String fileName, double hopTime) { | |
518 double hop = hopTime; | |
519 double[] features = getFeatures(fileName); | |
520 Peaks.normalise(features); | |
521 LinkedList<Integer> peaks = Peaks.findPeaks(features, (int)Math.round(0.06 / hop), 0.35, 0.84, true); | |
522 onsets = new double[peaks.size()]; | |
523 double[] y2 = new double[onsets.length]; | |
524 Iterator<Integer> it = peaks.iterator(); | |
525 onsetList = new EventList(); | |
526 double minSalience = Peaks.min(features); | |
527 for (int i = 0; i < onsets.length; i++) { | |
528 int index = it.next(); | |
529 onsets[i] = index * hop; | |
530 y2[i] = features[index]; | |
531 Event e = BeatTrackDisplay.newBeat(onsets[i], 0); | |
532 e.salience = features[index] - minSalience; | |
533 onsetList.add(e); | |
534 } | |
535 } // processFeatures() | |
536 | |
537 /** Copies output of audio processing to the display panel. */ | |
538 void setDisplay(BeatTrackDisplay btd) { | |
539 int energy2[] = new int[totalFrames*energyOversampleFactor]; | |
540 double time[] = new double[totalFrames*energyOversampleFactor]; | |
541 for (int i = 0; i < totalFrames*energyOversampleFactor; i++) { | |
542 energy2[i] = (int) (energy[i] * 4 * energyOversampleFactor); | |
543 time[i] = i * hopTime / energyOversampleFactor; | |
544 } | |
545 btd.setMagnitudes(energy2); | |
546 btd.setEnvTimes(time); | |
547 btd.setSpectro(frames, totalFrames, hopTime, 0);//fftTime/hopTime); | |
548 btd.setOnsets(onsets); | |
549 btd.setOnsetList(onsetList); | |
550 } // setDisplay() | |
551 | |
552 } // class AudioProcessor | |
553 | 309 |
554 | 310 |
555 #endif | 311 #endif |