Mercurial > hg > beatroot-vamp
comparison BeatRootProcessor.h @ 1:791398eaf639
* Some half-digested Java/C++ mishmash
author | Chris Cannam |
---|---|
date | Mon, 24 Jan 2011 16:44:27 +0000 |
parents | |
children | 7d4e6b1ff3d1 |
comparison
equal
deleted
inserted
replaced
0:886f11e41417 | 1:791398eaf639 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 /* | |
4 Vamp feature extraction plugin for the BeatRoot beat tracker. | |
5 | |
6 Centre for Digital Music, Queen Mary, University of London. | |
7 This file copyright 2011 Simon Dixon, Chris Cannam and QMUL. | |
8 | |
9 This program is free software; you can redistribute it and/or | |
10 modify it under the terms of the GNU General Public License as | |
11 published by the Free Software Foundation; either version 2 of the | |
12 License, or (at your option) any later version. See the file | |
13 COPYING included with this distribution for more information. | |
14 */ | |
15 | |
16 #ifndef _BEATROOT_PROCESSOR_H_ | |
17 #define _BEATROOT_PROCESSOR_H_ | |
18 | |
19 class BeatRootProcessor | |
20 { | |
21 protected: | |
22 /** Sample rate of audio */ | |
23 float sampleRate; | |
24 | |
25 /** Spacing of audio frames (determines the amount of overlap or | |
26 * skip between frames). This value is expressed in | |
27 * seconds. (Default = 0.020s) */ | |
28 double hopTime; | |
29 | |
30 /** The approximate size of an FFT frame in seconds. (Default = | |
31 * 0.04644s). The value is adjusted so that <code>fftSize</code> | |
32 * is always power of 2. */ | |
33 double fftTime; | |
34 | |
35 /** Spacing of audio frames in samples (see <code>hopTime</code>) */ | |
36 int hopSize; | |
37 | |
38 /** The size of an FFT frame in samples (see <code>fftTime</code>) */ | |
39 int fftSize; | |
40 | |
41 /** The number of overlapping frames of audio data which have been read. */ | |
42 int frameCount; | |
43 | |
44 /** RMS amplitude of the current frame. */ | |
45 double frameRMS; | |
46 | |
47 /** Long term average frame energy (in frequency domain representation). */ | |
48 double ltAverage; | |
49 | |
50 /** Spectral flux onset detection function, indexed by frame. */ | |
51 vector<int> spectralFlux; | |
52 | |
53 /** A mapping function for mapping FFT bins to final frequency bins. | |
54 * The mapping is linear (1-1) until the resolution reaches 2 points per | |
55 * semitone, then logarithmic with a semitone resolution. e.g. for | |
56 * 44.1kHz sampling rate and fftSize of 2048 (46ms), bin spacing is | |
57 * 21.5Hz, which is mapped linearly for bins 0-34 (0 to 732Hz), and | |
58 * logarithmically for the remaining bins (midi notes 79 to 127, bins 35 to | |
59 * 83), where all energy above note 127 is mapped into the final bin. */ | |
60 vector<int> freqMap; | |
61 | |
62 /** The number of entries in <code>freqMap</code>. Note that the length of | |
63 * the array is greater, because its size is not known at creation time. */ | |
64 int freqMapSize; | |
65 | |
66 /** The magnitude spectrum of the most recent frame. Used for | |
67 * calculating the spectral flux. */ | |
68 vector<double> prevFrame; | |
69 | |
70 /** The magnitude spectrum of the current frame. */ | |
71 vector<double> newFrame; | |
72 | |
73 /** The magnitude spectra of all frames, used for plotting the spectrogram. */ | |
74 vector<vector<double> > frames; //!!! do we need this? much cheaper to lose it if we don't | |
75 | |
76 /** The RMS energy of all frames. */ | |
77 vector<double> energy; //!!! unused in beat tracking? | |
78 | |
79 /** The estimated onset times from peak-picking the onset | |
80 * detection function(s). */ | |
81 vector<double> onsets; | |
82 | |
83 /** The estimated onset times and their saliences. */ | |
84 //!!!EventList onsetList; | |
85 vector<double> onsetList; //!!! corresponding to keyDown member of events in list | |
86 | |
87 /** Total number of audio frames if known, or -1 for live or compressed input. */ | |
88 int totalFrames; | |
89 | |
90 /** Flag for enabling or disabling debugging output */ | |
91 static bool debug = false; | |
92 | |
93 /** Flag for suppressing all standard output messages except results. */ | |
94 static bool silent = true; | |
95 | |
96 /** RMS frame energy below this value results in the frame being | |
97 * set to zero, so that normalisation does not have undesired | |
98 * side-effects. */ | |
99 static double silenceThreshold = 0.0004; //!!!??? energy of what? should not be static? | |
100 | |
101 /** For dynamic range compression, this value is added to the log | |
102 * magnitude in each frequency bin and any remaining negative | |
103 * values are then set to zero. | |
104 */ | |
105 static double rangeThreshold = 10; //!!! sim | |
106 | |
107 /** Determines method of normalisation. Values can be:<ul> | |
108 * <li>0: no normalisation</li> | |
109 * <li>1: normalisation by current frame energy</li> | |
110 * <li>2: normalisation by exponential average of frame energy</li> | |
111 * </ul> | |
112 */ | |
113 static int normaliseMode = 2; | |
114 | |
115 /** Ratio between rate of sampling the signal energy (for the | |
116 * amplitude envelope) and the hop size */ | |
117 static int energyOversampleFactor = 2; //!!! not used? | |
118 | |
119 public: | |
120 | |
121 /** Constructor: note that streams are not opened until the input | |
122 * file is set (see <code>setInputFile()</code>). */ | |
123 AudioProcessor() { | |
124 cbIndex = 0; | |
125 frameRMS = 0; | |
126 ltAverage = 0; | |
127 frameCount = 0; | |
128 hopSize = 0; | |
129 fftSize = 0; | |
130 hopTime = 0.010; // DEFAULT, overridden with -h | |
131 fftTime = 0.04644; // DEFAULT, overridden with -f | |
132 progressCallback = null; | |
133 stdIn = new BufferedReader(new InputStreamReader(System.in)); | |
134 if (doOnsetPlot) | |
135 plot = new Plot(); | |
136 } // constructor | |
137 | |
138 /** For debugging, outputs information about the AudioProcessor to | |
139 * standard error. | |
140 */ | |
141 public void print() { | |
142 System.err.println(this); | |
143 } // print() | |
144 | |
145 /** For interactive pause - wait for user to hit Enter */ | |
146 public String readLine() { | |
147 try { return stdIn.readLine(); } catch (Exception e) { return null; } | |
148 } // readLine() | |
149 | |
150 /** Gives some basic information about the audio being processed. */ | |
151 public String toString() { | |
152 return "AudioProcessor\n" + | |
153 String.format("\tFile: %s (%3.1f kHz, %1d channels)\n", | |
154 audioFileName, sampleRate/1000, channels) + | |
155 String.format("\tHop / FFT sizes: %5.3f / %5.3f", | |
156 hopTime, hopTime * fftSize / hopSize); | |
157 } // toString() | |
158 | |
159 /** Adds a link to the GUI component which shows the progress of matching. | |
160 * @param c the AudioProcessor representing the other performance | |
161 */ | |
162 public void setProgressCallback(ProgressIndicator c) { | |
163 progressCallback = c; | |
164 } // setProgressCallback() | |
165 | |
166 /** Sets up the streams and buffers for live audio input (CD quality). | |
167 * If any Exception is thrown within this method, it is caught, and any | |
168 * opened streams are closed, and <code>pcmInputStream</code> is set to | |
169 * <code>null</code>, indicating that the method did not complete | |
170 * successfully. | |
171 */ | |
172 public void setLiveInput() { | |
173 try { | |
174 channels = 2; | |
175 sampleRate = 44100; | |
176 AudioFormat desiredFormat = new AudioFormat( | |
177 AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16, | |
178 channels, channels * 2, sampleRate, false); | |
179 TargetDataLine tdl = AudioSystem.getTargetDataLine(desiredFormat); | |
180 tdl.open(desiredFormat, liveInputBufferSize); | |
181 pcmInputStream = new AudioInputStream(tdl); | |
182 audioFormat = pcmInputStream.getFormat(); | |
183 init(); | |
184 tdl.start(); | |
185 } catch (Exception e) { | |
186 e.printStackTrace(); | |
187 closeStreams(); // make sure it exits in a consistent state | |
188 } | |
189 } // setLiveInput() | |
190 | |
191 /** Sets up the streams and buffers for audio file input. | |
192 * If any Exception is thrown within this method, it is caught, and any | |
193 * opened streams are closed, and <code>pcmInputStream</code> is set to | |
194 * <code>null</code>, indicating that the method did not complete | |
195 * successfully. | |
196 * @param fileName The path name of the input audio file. | |
197 */ | |
198 public void setInputFile(String fileName) { | |
199 closeStreams(); // release previously allocated resources | |
200 audioFileName = fileName; | |
201 try { | |
202 if (audioFileName == null) | |
203 throw new Exception("No input file specified"); | |
204 File audioFile = new File(audioFileName); | |
205 if (!audioFile.isFile()) | |
206 throw new FileNotFoundException( | |
207 "Requested file does not exist: " + audioFileName); | |
208 rawInputStream = AudioSystem.getAudioInputStream(audioFile); | |
209 audioFormat = rawInputStream.getFormat(); | |
210 channels = audioFormat.getChannels(); | |
211 sampleRate = audioFormat.getSampleRate(); | |
212 pcmInputStream = rawInputStream; | |
213 if ((audioFormat.getEncoding()!=AudioFormat.Encoding.PCM_SIGNED) || | |
214 (audioFormat.getFrameSize() != channels * 2) || | |
215 audioFormat.isBigEndian()) { | |
216 AudioFormat desiredFormat = new AudioFormat( | |
217 AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16, | |
218 channels, channels * 2, sampleRate, false); | |
219 pcmInputStream = AudioSystem.getAudioInputStream(desiredFormat, | |
220 rawInputStream); | |
221 audioFormat = desiredFormat; | |
222 } | |
223 init(); | |
224 } catch (Exception e) { | |
225 e.printStackTrace(); | |
226 closeStreams(); // make sure it exits in a consistent state | |
227 } | |
228 } // setInputFile() | |
229 | |
230 /** Allocates memory for arrays, based on parameter settings */ | |
231 protected void init() { | |
232 hopSize = (int) Math.round(sampleRate * hopTime); | |
233 fftSize = (int) Math.round(Math.pow(2, | |
234 Math.round( Math.log(fftTime * sampleRate) / Math.log(2)))); | |
235 makeFreqMap(fftSize, sampleRate); | |
236 int buffSize = hopSize * channels * 2; | |
237 if ((inputBuffer == null) || (inputBuffer.length != buffSize)) | |
238 inputBuffer = new byte[buffSize]; | |
239 if ((circBuffer == null) || (circBuffer.length != fftSize)) { | |
240 circBuffer = new double[fftSize]; | |
241 reBuffer = new double[fftSize]; | |
242 imBuffer = new double[fftSize]; | |
243 prevPhase = new double[fftSize]; | |
244 prevPrevPhase = new double[fftSize]; | |
245 prevFrame = new double[fftSize]; | |
246 window = FFT.makeWindow(FFT.HAMMING, fftSize, fftSize); | |
247 for (int i=0; i < fftSize; i++) | |
248 window[i] *= Math.sqrt(fftSize); | |
249 } | |
250 if (pcmInputStream == rawInputStream) | |
251 totalFrames = (int)(pcmInputStream.getFrameLength() / hopSize); | |
252 else | |
253 totalFrames = (int) (MAX_LENGTH / hopTime); | |
254 if ((newFrame == null) || (newFrame.length != freqMapSize)) { | |
255 newFrame = new double[freqMapSize]; | |
256 frames = new double[totalFrames][freqMapSize]; | |
257 } else if (frames.length != totalFrames) | |
258 frames = new double[totalFrames][freqMapSize]; | |
259 energy = new double[totalFrames*energyOversampleFactor]; | |
260 phaseDeviation = new double[totalFrames]; | |
261 spectralFlux = new double[totalFrames]; | |
262 frameCount = 0; | |
263 cbIndex = 0; | |
264 frameRMS = 0; | |
265 ltAverage = 0; | |
266 progressCallback = null; | |
267 } // init() | |
268 | |
269 /** Closes the input stream(s) associated with this object. */ | |
270 public void closeStreams() { | |
271 if (pcmInputStream != null) { | |
272 try { | |
273 pcmInputStream.close(); | |
274 if (pcmInputStream != rawInputStream) | |
275 rawInputStream.close(); | |
276 if (audioOut != null) { | |
277 audioOut.drain(); | |
278 audioOut.close(); | |
279 } | |
280 } catch (Exception e) {} | |
281 pcmInputStream = null; | |
282 audioOut = null; | |
283 } | |
284 } // closeStreams() | |
285 | |
286 /** Creates a map of FFT frequency bins to comparison bins. | |
287 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is | |
288 * one to one. Where the spacing is greater than 0.5 semitones, the FFT | |
289 * energy is mapped into semitone-wide bins. No scaling is performed; that | |
290 * is the energy is summed into the comparison bins. See also | |
291 * processFrame() | |
292 */ | |
293 protected void makeFreqMap(int fftSize, float sampleRate) { | |
294 freqMap = new int[fftSize/2+1]; | |
295 double binWidth = sampleRate / fftSize; | |
296 int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1)); | |
297 int crossoverMidi = (int)Math.round(Math.log(crossoverBin*binWidth/440)/ | |
298 Math.log(2) * 12 + 69); | |
299 // freq = 440 * Math.pow(2, (midi-69)/12.0) / binWidth; | |
300 int i = 0; | |
301 while (i <= crossoverBin) | |
302 freqMap[i++] = i; | |
303 while (i <= fftSize/2) { | |
304 double midi = Math.log(i*binWidth/440) / Math.log(2) * 12 + 69; | |
305 if (midi > 127) | |
306 midi = 127; | |
307 freqMap[i++] = crossoverBin + (int)Math.round(midi) - crossoverMidi; | |
308 } | |
309 freqMapSize = freqMap[i-1] + 1; | |
310 } // makeFreqMap() | |
311 | |
312 /** Calculates the weighted phase deviation onset detection function. | |
313 * Not used. | |
314 * TODO: Test the change to WPD fn */ | |
315 protected void weightedPhaseDeviation() { | |
316 if (frameCount < 2) | |
317 phaseDeviation[frameCount] = 0; | |
318 else { | |
319 for (int i = 0; i < fftSize; i++) { | |
320 double pd = imBuffer[i] - 2 * prevPhase[i] + prevPrevPhase[i]; | |
321 double pd1 = Math.abs(Math.IEEEremainder(pd, 2 * Math.PI)); | |
322 phaseDeviation[frameCount] += pd1 * reBuffer[i]; | |
323 // System.err.printf("%7.3f %7.3f\n", pd/Math.PI, pd1/Math.PI); | |
324 } | |
325 } | |
326 phaseDeviation[frameCount] /= fftSize * Math.PI; | |
327 double[] tmp = prevPrevPhase; | |
328 prevPrevPhase = prevPhase; | |
329 prevPhase = imBuffer; | |
330 imBuffer = tmp; | |
331 } // weightedPhaseDeviation() | |
332 | |
333 /** Reads a frame of input data, averages the channels to mono, scales | |
334 * to a maximum possible absolute value of 1, and stores the audio data | |
335 * in a circular input buffer. | |
336 * @return true if a frame (or part of a frame, if it is the final frame) | |
337 * is read. If a complete frame cannot be read, the InputStream is set | |
338 * to null. | |
339 */ | |
340 public boolean getFrame() { | |
341 if (pcmInputStream == null) | |
342 return false; | |
343 try { | |
344 int bytesRead = (int) pcmInputStream.read(inputBuffer); | |
345 if ((audioOut != null) && (bytesRead > 0)) | |
346 if (audioOut.write(inputBuffer, 0, bytesRead) != bytesRead) | |
347 System.err.println("Error writing to audio device"); | |
348 if (bytesRead < inputBuffer.length) { | |
349 if (!silent) | |
350 System.err.println("End of input: " + audioFileName); | |
351 closeStreams(); | |
352 return false; | |
353 } | |
354 } catch (IOException e) { | |
355 e.printStackTrace(); | |
356 closeStreams(); | |
357 return false; | |
358 } | |
359 frameRMS = 0; | |
360 double sample; | |
361 switch(channels) { | |
362 case 1: | |
363 for (int i = 0; i < inputBuffer.length; i += 2) { | |
364 sample = ((inputBuffer[i+1]<<8) | | |
365 (inputBuffer[i]&0xff)) / 32768.0; | |
366 frameRMS += sample * sample; | |
367 circBuffer[cbIndex++] = sample; | |
368 if (cbIndex == fftSize) | |
369 cbIndex = 0; | |
370 } | |
371 break; | |
372 case 2: // saves ~0.1% of RT (total input overhead ~0.4%) :) | |
373 for (int i = 0; i < inputBuffer.length; i += 4) { | |
374 sample = (((inputBuffer[i+1]<<8) | (inputBuffer[i]&0xff)) + | |
375 ((inputBuffer[i+3]<<8) | (inputBuffer[i+2]&0xff))) | |
376 / 65536.0; | |
377 frameRMS += sample * sample; | |
378 circBuffer[cbIndex++] = sample; | |
379 if (cbIndex == fftSize) | |
380 cbIndex = 0; | |
381 } | |
382 break; | |
383 default: | |
384 for (int i = 0; i < inputBuffer.length; ) { | |
385 sample = 0; | |
386 for (int j = 0; j < channels; j++, i+=2) | |
387 sample += (inputBuffer[i+1]<<8) | (inputBuffer[i]&0xff); | |
388 sample /= 32768.0 * channels; | |
389 frameRMS += sample * sample; | |
390 circBuffer[cbIndex++] = sample; | |
391 if (cbIndex == fftSize) | |
392 cbIndex = 0; | |
393 } | |
394 } | |
395 frameRMS = Math.sqrt(frameRMS / inputBuffer.length * 2 * channels); | |
396 return true; | |
397 } // getFrame() | |
398 | |
399 /** Processes a frame of audio data by first computing the STFT with a | |
400 * Hamming window, then mapping the frequency bins into a part-linear | |
401 * part-logarithmic array, then computing the spectral flux | |
402 * then (optionally) normalising and calculating onsets. | |
403 */ | |
404 protected void processFrame() { | |
405 if (getFrame()) { | |
406 for (int i = 0; i < fftSize; i++) { | |
407 reBuffer[i] = window[i] * circBuffer[cbIndex]; | |
408 if (++cbIndex == fftSize) | |
409 cbIndex = 0; | |
410 } | |
411 Arrays.fill(imBuffer, 0); | |
412 FFT.magnitudePhaseFFT(reBuffer, imBuffer); | |
413 Arrays.fill(newFrame, 0); | |
414 double flux = 0; | |
415 for (int i = 0; i <= fftSize/2; i++) { | |
416 if (reBuffer[i] > prevFrame[i]) | |
417 flux += reBuffer[i] - prevFrame[i]; | |
418 newFrame[freqMap[i]] += reBuffer[i]; | |
419 } | |
420 spectralFlux[frameCount] = flux; | |
421 for (int i = 0; i < freqMapSize; i++) | |
422 frames[frameCount][i] = newFrame[i]; | |
423 int index = cbIndex - (fftSize - hopSize); | |
424 if (index < 0) | |
425 index += fftSize; | |
426 int sz = (fftSize - hopSize) / energyOversampleFactor; | |
427 for (int j = 0; j < energyOversampleFactor; j++) { | |
428 double newEnergy = 0; | |
429 for (int i = 0; i < sz; i++) { | |
430 newEnergy += circBuffer[index] * circBuffer[index]; | |
431 if (++index == fftSize) | |
432 index = 0; | |
433 } | |
434 energy[frameCount * energyOversampleFactor + j] = | |
435 newEnergy / sz <= 1e-6? 0: Math.log(newEnergy / sz) + 13.816; | |
436 } | |
437 double decay = frameCount >= 200? 0.99: | |
438 (frameCount < 100? 0: (frameCount - 100) / 100.0); | |
439 if (ltAverage == 0) | |
440 ltAverage = frameRMS; | |
441 else | |
442 ltAverage = ltAverage * decay + frameRMS * (1.0 - decay); | |
443 if (frameRMS <= silenceThreshold) | |
444 for (int i = 0; i < freqMapSize; i++) | |
445 frames[frameCount][i] = 0; | |
446 else { | |
447 if (normaliseMode == 1) | |
448 for (int i = 0; i < freqMapSize; i++) | |
449 frames[frameCount][i] /= frameRMS; | |
450 else if (normaliseMode == 2) | |
451 for (int i = 0; i < freqMapSize; i++) | |
452 frames[frameCount][i] /= ltAverage; | |
453 for (int i = 0; i < freqMapSize; i++) { | |
454 frames[frameCount][i] = Math.log(frames[frameCount][i]) + rangeThreshold; | |
455 if (frames[frameCount][i] < 0) | |
456 frames[frameCount][i] = 0; | |
457 } | |
458 } | |
459 // weightedPhaseDeviation(); | |
460 // if (debug) | |
461 // System.err.printf("PhaseDev: t=%7.3f phDev=%7.3f RMS=%7.3f\n", | |
462 // frameCount * hopTime, | |
463 // phaseDeviation[frameCount], | |
464 // frameRMS); | |
465 double[] tmp = prevFrame; | |
466 prevFrame = reBuffer; | |
467 reBuffer = tmp; | |
468 frameCount++; | |
469 if ((frameCount % 100) == 0) { | |
470 if (!silent) { | |
471 System.err.printf("Progress: %1d %5.3f %5.3f\n", | |
472 frameCount, frameRMS, ltAverage); | |
473 Profile.report(); | |
474 } | |
475 if ((progressCallback != null) && (totalFrames > 0)) | |
476 progressCallback.setFraction((double)frameCount/totalFrames); | |
477 } | |
478 } | |
479 } // processFrame() | |
480 | |
481 /** Processes a complete file of audio data. */ | |
482 public void processFile() { | |
483 while (pcmInputStream != null) { | |
484 // Profile.start(0); | |
485 processFrame(); | |
486 // Profile.log(0); | |
487 if (Thread.currentThread().isInterrupted()) { | |
488 System.err.println("info: INTERRUPTED in processFile()"); | |
489 return; | |
490 } | |
491 } | |
492 | |
493 // double[] x1 = new double[phaseDeviation.length]; | |
494 // for (int i = 0; i < x1.length; i++) { | |
495 // x1[i] = i * hopTime; | |
496 // phaseDeviation[i] = (phaseDeviation[i] - 0.4) * 100; | |
497 // } | |
498 // double[] x2 = new double[energy.length]; | |
499 // for (int i = 0; i < x2.length; i++) | |
500 // x2[i] = i * hopTime / energyOversampleFactor; | |
501 // // plot.clear(); | |
502 // plot.addPlot(x1, phaseDeviation, Color.green, 7); | |
503 // plot.addPlot(x2, energy, Color.red, 7); | |
504 // plot.setTitle("Test phase deviation"); | |
505 // plot.fitAxes(); | |
506 | |
507 // double[] slope = new double[energy.length]; | |
508 // double hop = hopTime / energyOversampleFactor; | |
509 // Peaks.getSlope(energy, hop, 15, slope); | |
510 // LinkedList<Integer> peaks = Peaks.findPeaks(slope, (int)Math.round(0.06 / hop), 10); | |
511 | |
512 double hop = hopTime; | |
513 Peaks.normalise(spectralFlux); | |
514 LinkedList<Integer> peaks = Peaks.findPeaks(spectralFlux, (int)Math.round(0.06 / hop), 0.35, 0.84, true); | |
515 onsets = new double[peaks.size()]; | |
516 double[] y2 = new double[onsets.length]; | |
517 Iterator<Integer> it = peaks.iterator(); | |
518 onsetList = new EventList(); | |
519 double minSalience = Peaks.min(spectralFlux); | |
520 for (int i = 0; i < onsets.length; i++) { | |
521 int index = it.next(); | |
522 onsets[i] = index * hop; | |
523 y2[i] = spectralFlux[index]; | |
524 Event e = BeatTrackDisplay.newBeat(onsets[i], 0); | |
525 // if (debug) | |
526 // System.err.printf("Onset: %8.3f %8.3f %8.3f\n", | |
527 // onsets[i], energy[index], slope[index]); | |
528 // e.salience = slope[index]; // or combination of energy + slope?? | |
529 // Note that salience must be non-negative or the beat tracking system fails! | |
530 e.salience = spectralFlux[index] - minSalience; | |
531 onsetList.add(e); | |
532 } | |
533 if (progressCallback != null) | |
534 progressCallback.setFraction(1.0); | |
535 if (doOnsetPlot) { | |
536 double[] x1 = new double[spectralFlux.length]; | |
537 for (int i = 0; i < x1.length; i++) | |
538 x1[i] = i * hopTime; | |
539 plot.addPlot(x1, spectralFlux, Color.red, 4); | |
540 plot.addPlot(onsets, y2, Color.green, 3); | |
541 plot.setTitle("Spectral flux and onsets"); | |
542 plot.fitAxes(); | |
543 } | |
544 if (debug) { | |
545 System.err.printf("Onsets: %d\nContinue? ", onsets.length); | |
546 readLine(); | |
547 } | |
548 } // processFile() | |
549 | |
550 /** Reads a text file containing a list of whitespace-separated feature values. | |
551 * Created for paper submitted to ICASSP'07. | |
552 * @param fileName File containing the data | |
553 * @return An array containing the feature values | |
554 */ | |
555 public static double[] getFeatures(String fileName) { | |
556 ArrayList<Double> l = new ArrayList<Double>(); | |
557 try { | |
558 BufferedReader b = new BufferedReader(new FileReader(fileName)); | |
559 while (true) { | |
560 String s = b.readLine(); | |
561 if (s == null) | |
562 break; | |
563 int start = 0; | |
564 while (start < s.length()) { | |
565 int len = s.substring(start).indexOf(' '); | |
566 String t = null; | |
567 if (len < 0) | |
568 t = s.substring(start); | |
569 else if (len > 0) { | |
570 t = s.substring(start, start + len); | |
571 } | |
572 if (t != null) | |
573 try { | |
574 l.add(Double.parseDouble(t)); | |
575 } catch (NumberFormatException e) { | |
576 System.err.println(e); | |
577 if (l.size() == 0) | |
578 l.add(new Double(0)); | |
579 else | |
580 l.add(new Double(l.get(l.size()-1))); | |
581 } | |
582 start += len + 1; | |
583 if (len < 0) | |
584 break; | |
585 } | |
586 } | |
587 double[] features = new double[l.size()]; | |
588 Iterator<Double> it = l.iterator(); | |
589 for (int i = 0; it.hasNext(); i++) | |
590 features[i] = it.next().doubleValue(); | |
591 return features; | |
592 } catch (FileNotFoundException e) { | |
593 e.printStackTrace(); | |
594 return null; | |
595 } catch (IOException e) { | |
596 e.printStackTrace(); | |
597 return null; | |
598 } catch (NumberFormatException e) { | |
599 e.printStackTrace(); | |
600 return null; | |
601 } | |
602 } // getFeatures() | |
603 | |
604 /** Reads a file of feature values, treated as an onset detection function, | |
605 * and finds peaks, which are stored in <code>onsetList</code> and <code>onsets</code>. | |
606 * @param fileName The file of feature values | |
607 * @param hopTime The spacing of feature values in time | |
608 */ | |
609 public void processFeatures(String fileName, double hopTime) { | |
610 double hop = hopTime; | |
611 double[] features = getFeatures(fileName); | |
612 Peaks.normalise(features); | |
613 LinkedList<Integer> peaks = Peaks.findPeaks(features, (int)Math.round(0.06 / hop), 0.35, 0.84, true); | |
614 onsets = new double[peaks.size()]; | |
615 double[] y2 = new double[onsets.length]; | |
616 Iterator<Integer> it = peaks.iterator(); | |
617 onsetList = new EventList(); | |
618 double minSalience = Peaks.min(features); | |
619 for (int i = 0; i < onsets.length; i++) { | |
620 int index = it.next(); | |
621 onsets[i] = index * hop; | |
622 y2[i] = features[index]; | |
623 Event e = BeatTrackDisplay.newBeat(onsets[i], 0); | |
624 e.salience = features[index] - minSalience; | |
625 onsetList.add(e); | |
626 } | |
627 } // processFeatures() | |
628 | |
629 /** Copies output of audio processing to the display panel. */ | |
630 public void setDisplay(BeatTrackDisplay btd) { | |
631 int energy2[] = new int[totalFrames*energyOversampleFactor]; | |
632 double time[] = new double[totalFrames*energyOversampleFactor]; | |
633 for (int i = 0; i < totalFrames*energyOversampleFactor; i++) { | |
634 energy2[i] = (int) (energy[i] * 4 * energyOversampleFactor); | |
635 time[i] = i * hopTime / energyOversampleFactor; | |
636 } | |
637 btd.setMagnitudes(energy2); | |
638 btd.setEnvTimes(time); | |
639 btd.setSpectro(frames, totalFrames, hopTime, 0);//fftTime/hopTime); | |
640 btd.setOnsets(onsets); | |
641 btd.setOnsetList(onsetList); | |
642 } // setDisplay() | |
643 | |
644 } // class AudioProcessor | |
645 | |
646 | |
647 #endif |