Mercurial > hg > beatroot-vamp
comparison BeatRootProcessor.h @ 2:7d4e6b1ff3d1
Minor further updates
author | Chris Cannam |
---|---|
date | Fri, 17 Jun 2011 18:17:16 +0100 |
parents | 791398eaf639 |
children | a821f49c42f0 |
comparison
equal
deleted
inserted
replaced
1:791398eaf639 | 2:7d4e6b1ff3d1 |
---|---|
14 */ | 14 */ |
15 | 15 |
16 #ifndef _BEATROOT_PROCESSOR_H_ | 16 #ifndef _BEATROOT_PROCESSOR_H_ |
17 #define _BEATROOT_PROCESSOR_H_ | 17 #define _BEATROOT_PROCESSOR_H_ |
18 | 18 |
19 #include <vector> | |
20 | |
21 using std::vector; | |
22 | |
19 class BeatRootProcessor | 23 class BeatRootProcessor |
20 { | 24 { |
21 protected: | 25 protected: |
22 /** Sample rate of audio */ | 26 /** Sample rate of audio */ |
23 float sampleRate; | 27 float sampleRate; |
86 | 90 |
87 /** Total number of audio frames if known, or -1 for live or compressed input. */ | 91 /** Total number of audio frames if known, or -1 for live or compressed input. */ |
88 int totalFrames; | 92 int totalFrames; |
89 | 93 |
90 /** Flag for enabling or disabling debugging output */ | 94 /** Flag for enabling or disabling debugging output */ |
91 static bool debug = false; | 95 static bool debug; |
92 | 96 |
93 /** Flag for suppressing all standard output messages except results. */ | 97 /** Flag for suppressing all standard output messages except results. */ |
94 static bool silent = true; | 98 static bool silent; |
95 | 99 |
96 /** RMS frame energy below this value results in the frame being | 100 /** RMS frame energy below this value results in the frame being |
97 * set to zero, so that normalisation does not have undesired | 101 * set to zero, so that normalisation does not have undesired |
98 * side-effects. */ | 102 * side-effects. */ |
99 static double silenceThreshold = 0.0004; //!!!??? energy of what? should not be static? | 103 static double silenceThreshold; //!!!??? energy of what? should not be static? |
100 | 104 |
101 /** For dynamic range compression, this value is added to the log | 105 /** For dynamic range compression, this value is added to the log |
102 * magnitude in each frequency bin and any remaining negative | 106 * magnitude in each frequency bin and any remaining negative |
103 * values are then set to zero. | 107 * values are then set to zero. |
104 */ | 108 */ |
105 static double rangeThreshold = 10; //!!! sim | 109 static double rangeThreshold; //!!! sim |
106 | 110 |
107 /** Determines method of normalisation. Values can be:<ul> | 111 /** Determines method of normalisation. Values can be:<ul> |
108 * <li>0: no normalisation</li> | 112 * <li>0: no normalisation</li> |
109 * <li>1: normalisation by current frame energy</li> | 113 * <li>1: normalisation by current frame energy</li> |
110 * <li>2: normalisation by exponential average of frame energy</li> | 114 * <li>2: normalisation by exponential average of frame energy</li> |
111 * </ul> | 115 * </ul> |
112 */ | 116 */ |
113 static int normaliseMode = 2; | 117 static int normaliseMode; |
114 | 118 |
115 /** Ratio between rate of sampling the signal energy (for the | 119 /** Ratio between rate of sampling the signal energy (for the |
116 * amplitude envelope) and the hop size */ | 120 * amplitude envelope) and the hop size */ |
117 static int energyOversampleFactor = 2; //!!! not used? | 121 static int energyOversampleFactor; //!!! not used? |
118 | 122 |
119 public: | 123 public: |
120 | 124 |
121 /** Constructor: note that streams are not opened until the input | 125 /** Constructor: note that streams are not opened until the input |
122 * file is set (see <code>setInputFile()</code>). */ | 126 * file is set (see <code>setInputFile()</code>). */ |
123 AudioProcessor() { | 127 BeatRootProcessor() { |
124 cbIndex = 0; | 128 cbIndex = 0; |
125 frameRMS = 0; | 129 frameRMS = 0; |
126 ltAverage = 0; | 130 ltAverage = 0; |
127 frameCount = 0; | 131 frameCount = 0; |
128 hopSize = 0; | 132 hopSize = 0; |
129 fftSize = 0; | 133 fftSize = 0; |
130 hopTime = 0.010; // DEFAULT, overridden with -h | 134 hopTime = 0.010; // DEFAULT, overridden with -h |
131 fftTime = 0.04644; // DEFAULT, overridden with -f | 135 fftTime = 0.04644; // DEFAULT, overridden with -f |
132 progressCallback = null; | |
133 stdIn = new BufferedReader(new InputStreamReader(System.in)); | |
134 if (doOnsetPlot) | |
135 plot = new Plot(); | |
136 } // constructor | 136 } // constructor |
137 | 137 |
138 /** For debugging, outputs information about the AudioProcessor to | 138 protected: |
139 * standard error. | |
140 */ | |
141 public void print() { | |
142 System.err.println(this); | |
143 } // print() | |
144 | |
145 /** For interactive pause - wait for user to hit Enter */ | |
146 public String readLine() { | |
147 try { return stdIn.readLine(); } catch (Exception e) { return null; } | |
148 } // readLine() | |
149 | |
150 /** Gives some basic information about the audio being processed. */ | |
151 public String toString() { | |
152 return "AudioProcessor\n" + | |
153 String.format("\tFile: %s (%3.1f kHz, %1d channels)\n", | |
154 audioFileName, sampleRate/1000, channels) + | |
155 String.format("\tHop / FFT sizes: %5.3f / %5.3f", | |
156 hopTime, hopTime * fftSize / hopSize); | |
157 } // toString() | |
158 | |
159 /** Adds a link to the GUI component which shows the progress of matching. | |
160 * @param c the AudioProcessor representing the other performance | |
161 */ | |
162 public void setProgressCallback(ProgressIndicator c) { | |
163 progressCallback = c; | |
164 } // setProgressCallback() | |
165 | |
166 /** Sets up the streams and buffers for live audio input (CD quality). | |
167 * If any Exception is thrown within this method, it is caught, and any | |
168 * opened streams are closed, and <code>pcmInputStream</code> is set to | |
169 * <code>null</code>, indicating that the method did not complete | |
170 * successfully. | |
171 */ | |
172 public void setLiveInput() { | |
173 try { | |
174 channels = 2; | |
175 sampleRate = 44100; | |
176 AudioFormat desiredFormat = new AudioFormat( | |
177 AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16, | |
178 channels, channels * 2, sampleRate, false); | |
179 TargetDataLine tdl = AudioSystem.getTargetDataLine(desiredFormat); | |
180 tdl.open(desiredFormat, liveInputBufferSize); | |
181 pcmInputStream = new AudioInputStream(tdl); | |
182 audioFormat = pcmInputStream.getFormat(); | |
183 init(); | |
184 tdl.start(); | |
185 } catch (Exception e) { | |
186 e.printStackTrace(); | |
187 closeStreams(); // make sure it exits in a consistent state | |
188 } | |
189 } // setLiveInput() | |
190 | |
191 /** Sets up the streams and buffers for audio file input. | |
192 * If any Exception is thrown within this method, it is caught, and any | |
193 * opened streams are closed, and <code>pcmInputStream</code> is set to | |
194 * <code>null</code>, indicating that the method did not complete | |
195 * successfully. | |
196 * @param fileName The path name of the input audio file. | |
197 */ | |
198 public void setInputFile(String fileName) { | |
199 closeStreams(); // release previously allocated resources | |
200 audioFileName = fileName; | |
201 try { | |
202 if (audioFileName == null) | |
203 throw new Exception("No input file specified"); | |
204 File audioFile = new File(audioFileName); | |
205 if (!audioFile.isFile()) | |
206 throw new FileNotFoundException( | |
207 "Requested file does not exist: " + audioFileName); | |
208 rawInputStream = AudioSystem.getAudioInputStream(audioFile); | |
209 audioFormat = rawInputStream.getFormat(); | |
210 channels = audioFormat.getChannels(); | |
211 sampleRate = audioFormat.getSampleRate(); | |
212 pcmInputStream = rawInputStream; | |
213 if ((audioFormat.getEncoding()!=AudioFormat.Encoding.PCM_SIGNED) || | |
214 (audioFormat.getFrameSize() != channels * 2) || | |
215 audioFormat.isBigEndian()) { | |
216 AudioFormat desiredFormat = new AudioFormat( | |
217 AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16, | |
218 channels, channels * 2, sampleRate, false); | |
219 pcmInputStream = AudioSystem.getAudioInputStream(desiredFormat, | |
220 rawInputStream); | |
221 audioFormat = desiredFormat; | |
222 } | |
223 init(); | |
224 } catch (Exception e) { | |
225 e.printStackTrace(); | |
226 closeStreams(); // make sure it exits in a consistent state | |
227 } | |
228 } // setInputFile() | |
229 | |
230 /** Allocates memory for arrays, based on parameter settings */ | 139 /** Allocates memory for arrays, based on parameter settings */ |
231 protected void init() { | 140 void init() { |
232 hopSize = (int) Math.round(sampleRate * hopTime); | 141 hopSize = (int) Math.round(sampleRate * hopTime); |
233 fftSize = (int) Math.round(Math.pow(2, | 142 fftSize = (int) Math.round(Math.pow(2, |
234 Math.round( Math.log(fftTime * sampleRate) / Math.log(2)))); | 143 Math.round( Math.log(fftTime * sampleRate) / Math.log(2)))); |
235 makeFreqMap(fftSize, sampleRate); | 144 makeFreqMap(fftSize, sampleRate); |
236 int buffSize = hopSize * channels * 2; | 145 int buffSize = hopSize * channels * 2; |
261 spectralFlux = new double[totalFrames]; | 170 spectralFlux = new double[totalFrames]; |
262 frameCount = 0; | 171 frameCount = 0; |
263 cbIndex = 0; | 172 cbIndex = 0; |
264 frameRMS = 0; | 173 frameRMS = 0; |
265 ltAverage = 0; | 174 ltAverage = 0; |
266 progressCallback = null; | |
267 } // init() | 175 } // init() |
268 | 176 |
269 /** Closes the input stream(s) associated with this object. */ | 177 /** Closes the input stream(s) associated with this object. */ |
270 public void closeStreams() { | 178 void closeStreams() { |
271 if (pcmInputStream != null) { | 179 if (pcmInputStream != null) { |
272 try { | 180 try { |
273 pcmInputStream.close(); | 181 pcmInputStream.close(); |
274 if (pcmInputStream != rawInputStream) | 182 if (pcmInputStream != rawInputStream) |
275 rawInputStream.close(); | 183 rawInputStream.close(); |
288 * one to one. Where the spacing is greater than 0.5 semitones, the FFT | 196 * one to one. Where the spacing is greater than 0.5 semitones, the FFT |
289 * energy is mapped into semitone-wide bins. No scaling is performed; that | 197 * energy is mapped into semitone-wide bins. No scaling is performed; that |
290 * is the energy is summed into the comparison bins. See also | 198 * is the energy is summed into the comparison bins. See also |
291 * processFrame() | 199 * processFrame() |
292 */ | 200 */ |
293 protected void makeFreqMap(int fftSize, float sampleRate) { | 201 void makeFreqMap(int fftSize, float sampleRate) { |
294 freqMap = new int[fftSize/2+1]; | 202 freqMap = new int[fftSize/2+1]; |
295 double binWidth = sampleRate / fftSize; | 203 double binWidth = sampleRate / fftSize; |
296 int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1)); | 204 int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1)); |
297 int crossoverMidi = (int)Math.round(Math.log(crossoverBin*binWidth/440)/ | 205 int crossoverMidi = (int)Math.round(Math.log(crossoverBin*binWidth/440)/ |
298 Math.log(2) * 12 + 69); | 206 Math.log(2) * 12 + 69); |
310 } // makeFreqMap() | 218 } // makeFreqMap() |
311 | 219 |
312 /** Calculates the weighted phase deviation onset detection function. | 220 /** Calculates the weighted phase deviation onset detection function. |
313 * Not used. | 221 * Not used. |
314 * TODO: Test the change to WPD fn */ | 222 * TODO: Test the change to WPD fn */ |
315 protected void weightedPhaseDeviation() { | 223 void weightedPhaseDeviation() { |
316 if (frameCount < 2) | 224 if (frameCount < 2) |
317 phaseDeviation[frameCount] = 0; | 225 phaseDeviation[frameCount] = 0; |
318 else { | 226 else { |
319 for (int i = 0; i < fftSize; i++) { | 227 for (int i = 0; i < fftSize; i++) { |
320 double pd = imBuffer[i] - 2 * prevPhase[i] + prevPrevPhase[i]; | 228 double pd = imBuffer[i] - 2 * prevPhase[i] + prevPrevPhase[i]; |
335 * in a circular input buffer. | 243 * in a circular input buffer. |
336 * @return true if a frame (or part of a frame, if it is the final frame) | 244 * @return true if a frame (or part of a frame, if it is the final frame) |
337 * is read. If a complete frame cannot be read, the InputStream is set | 245 * is read. If a complete frame cannot be read, the InputStream is set |
338 * to null. | 246 * to null. |
339 */ | 247 */ |
340 public boolean getFrame() { | 248 bool getFrame() { |
341 if (pcmInputStream == null) | 249 if (pcmInputStream == null) |
342 return false; | 250 return false; |
343 try { | 251 try { |
344 int bytesRead = (int) pcmInputStream.read(inputBuffer); | 252 int bytesRead = (int) pcmInputStream.read(inputBuffer); |
345 if ((audioOut != null) && (bytesRead > 0)) | 253 if ((audioOut != null) && (bytesRead > 0)) |
399 /** Processes a frame of audio data by first computing the STFT with a | 307 /** Processes a frame of audio data by first computing the STFT with a |
400 * Hamming window, then mapping the frequency bins into a part-linear | 308 * Hamming window, then mapping the frequency bins into a part-linear |
401 * part-logarithmic array, then computing the spectral flux | 309 * part-logarithmic array, then computing the spectral flux |
402 * then (optionally) normalising and calculating onsets. | 310 * then (optionally) normalising and calculating onsets. |
403 */ | 311 */ |
404 protected void processFrame() { | 312 void processFrame() { |
405 if (getFrame()) { | 313 if (getFrame()) { |
406 for (int i = 0; i < fftSize; i++) { | 314 for (int i = 0; i < fftSize; i++) { |
407 reBuffer[i] = window[i] * circBuffer[cbIndex]; | 315 reBuffer[i] = window[i] * circBuffer[cbIndex]; |
408 if (++cbIndex == fftSize) | 316 if (++cbIndex == fftSize) |
409 cbIndex = 0; | 317 cbIndex = 0; |
477 } | 385 } |
478 } | 386 } |
479 } // processFrame() | 387 } // processFrame() |
480 | 388 |
481 /** Processes a complete file of audio data. */ | 389 /** Processes a complete file of audio data. */ |
482 public void processFile() { | 390 void processFile() { |
483 while (pcmInputStream != null) { | 391 while (pcmInputStream != null) { |
484 // Profile.start(0); | 392 // Profile.start(0); |
485 processFrame(); | 393 processFrame(); |
486 // Profile.log(0); | 394 // Profile.log(0); |
487 if (Thread.currentThread().isInterrupted()) { | 395 if (Thread.currentThread().isInterrupted()) { |
550 /** Reads a text file containing a list of whitespace-separated feature values. | 458 /** Reads a text file containing a list of whitespace-separated feature values. |
551 * Created for paper submitted to ICASSP'07. | 459 * Created for paper submitted to ICASSP'07. |
552 * @param fileName File containing the data | 460 * @param fileName File containing the data |
553 * @return An array containing the feature values | 461 * @return An array containing the feature values |
554 */ | 462 */ |
555 public static double[] getFeatures(String fileName) { | 463 static double[] getFeatures(String fileName) { |
556 ArrayList<Double> l = new ArrayList<Double>(); | 464 ArrayList<Double> l = new ArrayList<Double>(); |
557 try { | 465 try { |
558 BufferedReader b = new BufferedReader(new FileReader(fileName)); | 466 BufferedReader b = new BufferedReader(new FileReader(fileName)); |
559 while (true) { | 467 while (true) { |
560 String s = b.readLine(); | 468 String s = b.readLine(); |
604 /** Reads a file of feature values, treated as an onset detection function, | 512 /** Reads a file of feature values, treated as an onset detection function, |
605 * and finds peaks, which are stored in <code>onsetList</code> and <code>onsets</code>. | 513 * and finds peaks, which are stored in <code>onsetList</code> and <code>onsets</code>. |
606 * @param fileName The file of feature values | 514 * @param fileName The file of feature values |
607 * @param hopTime The spacing of feature values in time | 515 * @param hopTime The spacing of feature values in time |
608 */ | 516 */ |
609 public void processFeatures(String fileName, double hopTime) { | 517 void processFeatures(String fileName, double hopTime) { |
610 double hop = hopTime; | 518 double hop = hopTime; |
611 double[] features = getFeatures(fileName); | 519 double[] features = getFeatures(fileName); |
612 Peaks.normalise(features); | 520 Peaks.normalise(features); |
613 LinkedList<Integer> peaks = Peaks.findPeaks(features, (int)Math.round(0.06 / hop), 0.35, 0.84, true); | 521 LinkedList<Integer> peaks = Peaks.findPeaks(features, (int)Math.round(0.06 / hop), 0.35, 0.84, true); |
614 onsets = new double[peaks.size()]; | 522 onsets = new double[peaks.size()]; |
625 onsetList.add(e); | 533 onsetList.add(e); |
626 } | 534 } |
627 } // processFeatures() | 535 } // processFeatures() |
628 | 536 |
629 /** Copies output of audio processing to the display panel. */ | 537 /** Copies output of audio processing to the display panel. */ |
630 public void setDisplay(BeatTrackDisplay btd) { | 538 void setDisplay(BeatTrackDisplay btd) { |
631 int energy2[] = new int[totalFrames*energyOversampleFactor]; | 539 int energy2[] = new int[totalFrames*energyOversampleFactor]; |
632 double time[] = new double[totalFrames*energyOversampleFactor]; | 540 double time[] = new double[totalFrames*energyOversampleFactor]; |
633 for (int i = 0; i < totalFrames*energyOversampleFactor; i++) { | 541 for (int i = 0; i < totalFrames*energyOversampleFactor; i++) { |
634 energy2[i] = (int) (energy[i] * 4 * energyOversampleFactor); | 542 energy2[i] = (int) (energy[i] * 4 * energyOversampleFactor); |
635 time[i] = i * hopTime / energyOversampleFactor; | 543 time[i] = i * hopTime / energyOversampleFactor; |