comparison BeatRootProcessor.h @ 2:7d4e6b1ff3d1

Minor further updates
author Chris Cannam
date Fri, 17 Jun 2011 18:17:16 +0100
parents 791398eaf639
children a821f49c42f0
comparison
equal deleted inserted replaced
1:791398eaf639 2:7d4e6b1ff3d1
14 */ 14 */
15 15
16 #ifndef _BEATROOT_PROCESSOR_H_ 16 #ifndef _BEATROOT_PROCESSOR_H_
17 #define _BEATROOT_PROCESSOR_H_ 17 #define _BEATROOT_PROCESSOR_H_
18 18
19 #include <vector>
20
21 using std::vector;
22
19 class BeatRootProcessor 23 class BeatRootProcessor
20 { 24 {
21 protected: 25 protected:
22 /** Sample rate of audio */ 26 /** Sample rate of audio */
23 float sampleRate; 27 float sampleRate;
86 90
87 /** Total number of audio frames if known, or -1 for live or compressed input. */ 91 /** Total number of audio frames if known, or -1 for live or compressed input. */
88 int totalFrames; 92 int totalFrames;
89 93
90 /** Flag for enabling or disabling debugging output */ 94 /** Flag for enabling or disabling debugging output */
91 static bool debug = false; 95 static bool debug;
92 96
93 /** Flag for suppressing all standard output messages except results. */ 97 /** Flag for suppressing all standard output messages except results. */
94 static bool silent = true; 98 static bool silent;
95 99
96 /** RMS frame energy below this value results in the frame being 100 /** RMS frame energy below this value results in the frame being
97 * set to zero, so that normalisation does not have undesired 101 * set to zero, so that normalisation does not have undesired
98 * side-effects. */ 102 * side-effects. */
99 static double silenceThreshold = 0.0004; //!!!??? energy of what? should not be static? 103 static double silenceThreshold; //!!!??? energy of what? should not be static?
100 104
101 /** For dynamic range compression, this value is added to the log 105 /** For dynamic range compression, this value is added to the log
102 * magnitude in each frequency bin and any remaining negative 106 * magnitude in each frequency bin and any remaining negative
103 * values are then set to zero. 107 * values are then set to zero.
104 */ 108 */
105 static double rangeThreshold = 10; //!!! sim 109 static double rangeThreshold; //!!! sim
106 110
107 /** Determines method of normalisation. Values can be:<ul> 111 /** Determines method of normalisation. Values can be:<ul>
108 * <li>0: no normalisation</li> 112 * <li>0: no normalisation</li>
109 * <li>1: normalisation by current frame energy</li> 113 * <li>1: normalisation by current frame energy</li>
110 * <li>2: normalisation by exponential average of frame energy</li> 114 * <li>2: normalisation by exponential average of frame energy</li>
111 * </ul> 115 * </ul>
112 */ 116 */
113 static int normaliseMode = 2; 117 static int normaliseMode;
114 118
115 /** Ratio between rate of sampling the signal energy (for the 119 /** Ratio between rate of sampling the signal energy (for the
116 * amplitude envelope) and the hop size */ 120 * amplitude envelope) and the hop size */
117 static int energyOversampleFactor = 2; //!!! not used? 121 static int energyOversampleFactor; //!!! not used?
118 122
119 public: 123 public:
120 124
121 /** Constructor: note that streams are not opened until the input 125 /** Constructor: note that streams are not opened until the input
122 * file is set (see <code>setInputFile()</code>). */ 126 * file is set (see <code>setInputFile()</code>). */
123 AudioProcessor() { 127 BeatRootProcessor() {
124 cbIndex = 0; 128 cbIndex = 0;
125 frameRMS = 0; 129 frameRMS = 0;
126 ltAverage = 0; 130 ltAverage = 0;
127 frameCount = 0; 131 frameCount = 0;
128 hopSize = 0; 132 hopSize = 0;
129 fftSize = 0; 133 fftSize = 0;
130 hopTime = 0.010; // DEFAULT, overridden with -h 134 hopTime = 0.010; // DEFAULT, overridden with -h
131 fftTime = 0.04644; // DEFAULT, overridden with -f 135 fftTime = 0.04644; // DEFAULT, overridden with -f
132 progressCallback = null;
133 stdIn = new BufferedReader(new InputStreamReader(System.in));
134 if (doOnsetPlot)
135 plot = new Plot();
136 } // constructor 136 } // constructor
137 137
138 /** For debugging, outputs information about the AudioProcessor to 138 protected:
139 * standard error.
140 */
141 public void print() {
142 System.err.println(this);
143 } // print()
144
145 /** For interactive pause - wait for user to hit Enter */
146 public String readLine() {
147 try { return stdIn.readLine(); } catch (Exception e) { return null; }
148 } // readLine()
149
150 /** Gives some basic information about the audio being processed. */
151 public String toString() {
152 return "AudioProcessor\n" +
153 String.format("\tFile: %s (%3.1f kHz, %1d channels)\n",
154 audioFileName, sampleRate/1000, channels) +
155 String.format("\tHop / FFT sizes: %5.3f / %5.3f",
156 hopTime, hopTime * fftSize / hopSize);
157 } // toString()
158
159 /** Adds a link to the GUI component which shows the progress of matching.
160 * @param c the AudioProcessor representing the other performance
161 */
162 public void setProgressCallback(ProgressIndicator c) {
163 progressCallback = c;
164 } // setProgressCallback()
165
166 /** Sets up the streams and buffers for live audio input (CD quality).
167 * If any Exception is thrown within this method, it is caught, and any
168 * opened streams are closed, and <code>pcmInputStream</code> is set to
169 * <code>null</code>, indicating that the method did not complete
170 * successfully.
171 */
172 public void setLiveInput() {
173 try {
174 channels = 2;
175 sampleRate = 44100;
176 AudioFormat desiredFormat = new AudioFormat(
177 AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16,
178 channels, channels * 2, sampleRate, false);
179 TargetDataLine tdl = AudioSystem.getTargetDataLine(desiredFormat);
180 tdl.open(desiredFormat, liveInputBufferSize);
181 pcmInputStream = new AudioInputStream(tdl);
182 audioFormat = pcmInputStream.getFormat();
183 init();
184 tdl.start();
185 } catch (Exception e) {
186 e.printStackTrace();
187 closeStreams(); // make sure it exits in a consistent state
188 }
189 } // setLiveInput()
190
191 /** Sets up the streams and buffers for audio file input.
192 * If any Exception is thrown within this method, it is caught, and any
193 * opened streams are closed, and <code>pcmInputStream</code> is set to
194 * <code>null</code>, indicating that the method did not complete
195 * successfully.
196 * @param fileName The path name of the input audio file.
197 */
198 public void setInputFile(String fileName) {
199 closeStreams(); // release previously allocated resources
200 audioFileName = fileName;
201 try {
202 if (audioFileName == null)
203 throw new Exception("No input file specified");
204 File audioFile = new File(audioFileName);
205 if (!audioFile.isFile())
206 throw new FileNotFoundException(
207 "Requested file does not exist: " + audioFileName);
208 rawInputStream = AudioSystem.getAudioInputStream(audioFile);
209 audioFormat = rawInputStream.getFormat();
210 channels = audioFormat.getChannels();
211 sampleRate = audioFormat.getSampleRate();
212 pcmInputStream = rawInputStream;
213 if ((audioFormat.getEncoding()!=AudioFormat.Encoding.PCM_SIGNED) ||
214 (audioFormat.getFrameSize() != channels * 2) ||
215 audioFormat.isBigEndian()) {
216 AudioFormat desiredFormat = new AudioFormat(
217 AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16,
218 channels, channels * 2, sampleRate, false);
219 pcmInputStream = AudioSystem.getAudioInputStream(desiredFormat,
220 rawInputStream);
221 audioFormat = desiredFormat;
222 }
223 init();
224 } catch (Exception e) {
225 e.printStackTrace();
226 closeStreams(); // make sure it exits in a consistent state
227 }
228 } // setInputFile()
229
230 /** Allocates memory for arrays, based on parameter settings */ 139 /** Allocates memory for arrays, based on parameter settings */
231 protected void init() { 140 void init() {
232 hopSize = (int) Math.round(sampleRate * hopTime); 141 hopSize = (int) Math.round(sampleRate * hopTime);
233 fftSize = (int) Math.round(Math.pow(2, 142 fftSize = (int) Math.round(Math.pow(2,
234 Math.round( Math.log(fftTime * sampleRate) / Math.log(2)))); 143 Math.round( Math.log(fftTime * sampleRate) / Math.log(2))));
235 makeFreqMap(fftSize, sampleRate); 144 makeFreqMap(fftSize, sampleRate);
236 int buffSize = hopSize * channels * 2; 145 int buffSize = hopSize * channels * 2;
261 spectralFlux = new double[totalFrames]; 170 spectralFlux = new double[totalFrames];
262 frameCount = 0; 171 frameCount = 0;
263 cbIndex = 0; 172 cbIndex = 0;
264 frameRMS = 0; 173 frameRMS = 0;
265 ltAverage = 0; 174 ltAverage = 0;
266 progressCallback = null;
267 } // init() 175 } // init()
268 176
269 /** Closes the input stream(s) associated with this object. */ 177 /** Closes the input stream(s) associated with this object. */
270 public void closeStreams() { 178 void closeStreams() {
271 if (pcmInputStream != null) { 179 if (pcmInputStream != null) {
272 try { 180 try {
273 pcmInputStream.close(); 181 pcmInputStream.close();
274 if (pcmInputStream != rawInputStream) 182 if (pcmInputStream != rawInputStream)
275 rawInputStream.close(); 183 rawInputStream.close();
288 * one to one. Where the spacing is greater than 0.5 semitones, the FFT 196 * one to one. Where the spacing is greater than 0.5 semitones, the FFT
289 * energy is mapped into semitone-wide bins. No scaling is performed; that 197 * energy is mapped into semitone-wide bins. No scaling is performed; that
290 * is the energy is summed into the comparison bins. See also 198 * is the energy is summed into the comparison bins. See also
291 * processFrame() 199 * processFrame()
292 */ 200 */
293 protected void makeFreqMap(int fftSize, float sampleRate) { 201 void makeFreqMap(int fftSize, float sampleRate) {
294 freqMap = new int[fftSize/2+1]; 202 freqMap = new int[fftSize/2+1];
295 double binWidth = sampleRate / fftSize; 203 double binWidth = sampleRate / fftSize;
296 int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1)); 204 int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1));
297 int crossoverMidi = (int)Math.round(Math.log(crossoverBin*binWidth/440)/ 205 int crossoverMidi = (int)Math.round(Math.log(crossoverBin*binWidth/440)/
298 Math.log(2) * 12 + 69); 206 Math.log(2) * 12 + 69);
310 } // makeFreqMap() 218 } // makeFreqMap()
311 219
312 /** Calculates the weighted phase deviation onset detection function. 220 /** Calculates the weighted phase deviation onset detection function.
313 * Not used. 221 * Not used.
314 * TODO: Test the change to WPD fn */ 222 * TODO: Test the change to WPD fn */
315 protected void weightedPhaseDeviation() { 223 void weightedPhaseDeviation() {
316 if (frameCount < 2) 224 if (frameCount < 2)
317 phaseDeviation[frameCount] = 0; 225 phaseDeviation[frameCount] = 0;
318 else { 226 else {
319 for (int i = 0; i < fftSize; i++) { 227 for (int i = 0; i < fftSize; i++) {
320 double pd = imBuffer[i] - 2 * prevPhase[i] + prevPrevPhase[i]; 228 double pd = imBuffer[i] - 2 * prevPhase[i] + prevPrevPhase[i];
335 * in a circular input buffer. 243 * in a circular input buffer.
336 * @return true if a frame (or part of a frame, if it is the final frame) 244 * @return true if a frame (or part of a frame, if it is the final frame)
337 * is read. If a complete frame cannot be read, the InputStream is set 245 * is read. If a complete frame cannot be read, the InputStream is set
338 * to null. 246 * to null.
339 */ 247 */
340 public boolean getFrame() { 248 bool getFrame() {
341 if (pcmInputStream == null) 249 if (pcmInputStream == null)
342 return false; 250 return false;
343 try { 251 try {
344 int bytesRead = (int) pcmInputStream.read(inputBuffer); 252 int bytesRead = (int) pcmInputStream.read(inputBuffer);
345 if ((audioOut != null) && (bytesRead > 0)) 253 if ((audioOut != null) && (bytesRead > 0))
399 /** Processes a frame of audio data by first computing the STFT with a 307 /** Processes a frame of audio data by first computing the STFT with a
400 * Hamming window, then mapping the frequency bins into a part-linear 308 * Hamming window, then mapping the frequency bins into a part-linear
401 * part-logarithmic array, then computing the spectral flux 309 * part-logarithmic array, then computing the spectral flux
402 * then (optionally) normalising and calculating onsets. 310 * then (optionally) normalising and calculating onsets.
403 */ 311 */
404 protected void processFrame() { 312 void processFrame() {
405 if (getFrame()) { 313 if (getFrame()) {
406 for (int i = 0; i < fftSize; i++) { 314 for (int i = 0; i < fftSize; i++) {
407 reBuffer[i] = window[i] * circBuffer[cbIndex]; 315 reBuffer[i] = window[i] * circBuffer[cbIndex];
408 if (++cbIndex == fftSize) 316 if (++cbIndex == fftSize)
409 cbIndex = 0; 317 cbIndex = 0;
477 } 385 }
478 } 386 }
479 } // processFrame() 387 } // processFrame()
480 388
481 /** Processes a complete file of audio data. */ 389 /** Processes a complete file of audio data. */
482 public void processFile() { 390 void processFile() {
483 while (pcmInputStream != null) { 391 while (pcmInputStream != null) {
484 // Profile.start(0); 392 // Profile.start(0);
485 processFrame(); 393 processFrame();
486 // Profile.log(0); 394 // Profile.log(0);
487 if (Thread.currentThread().isInterrupted()) { 395 if (Thread.currentThread().isInterrupted()) {
550 /** Reads a text file containing a list of whitespace-separated feature values. 458 /** Reads a text file containing a list of whitespace-separated feature values.
551 * Created for paper submitted to ICASSP'07. 459 * Created for paper submitted to ICASSP'07.
552 * @param fileName File containing the data 460 * @param fileName File containing the data
553 * @return An array containing the feature values 461 * @return An array containing the feature values
554 */ 462 */
555 public static double[] getFeatures(String fileName) { 463 static double[] getFeatures(String fileName) {
556 ArrayList<Double> l = new ArrayList<Double>(); 464 ArrayList<Double> l = new ArrayList<Double>();
557 try { 465 try {
558 BufferedReader b = new BufferedReader(new FileReader(fileName)); 466 BufferedReader b = new BufferedReader(new FileReader(fileName));
559 while (true) { 467 while (true) {
560 String s = b.readLine(); 468 String s = b.readLine();
604 /** Reads a file of feature values, treated as an onset detection function, 512 /** Reads a file of feature values, treated as an onset detection function,
605 * and finds peaks, which are stored in <code>onsetList</code> and <code>onsets</code>. 513 * and finds peaks, which are stored in <code>onsetList</code> and <code>onsets</code>.
606 * @param fileName The file of feature values 514 * @param fileName The file of feature values
607 * @param hopTime The spacing of feature values in time 515 * @param hopTime The spacing of feature values in time
608 */ 516 */
609 public void processFeatures(String fileName, double hopTime) { 517 void processFeatures(String fileName, double hopTime) {
610 double hop = hopTime; 518 double hop = hopTime;
611 double[] features = getFeatures(fileName); 519 double[] features = getFeatures(fileName);
612 Peaks.normalise(features); 520 Peaks.normalise(features);
613 LinkedList<Integer> peaks = Peaks.findPeaks(features, (int)Math.round(0.06 / hop), 0.35, 0.84, true); 521 LinkedList<Integer> peaks = Peaks.findPeaks(features, (int)Math.round(0.06 / hop), 0.35, 0.84, true);
614 onsets = new double[peaks.size()]; 522 onsets = new double[peaks.size()];
625 onsetList.add(e); 533 onsetList.add(e);
626 } 534 }
627 } // processFeatures() 535 } // processFeatures()
628 536
629 /** Copies output of audio processing to the display panel. */ 537 /** Copies output of audio processing to the display panel. */
630 public void setDisplay(BeatTrackDisplay btd) { 538 void setDisplay(BeatTrackDisplay btd) {
631 int energy2[] = new int[totalFrames*energyOversampleFactor]; 539 int energy2[] = new int[totalFrames*energyOversampleFactor];
632 double time[] = new double[totalFrames*energyOversampleFactor]; 540 double time[] = new double[totalFrames*energyOversampleFactor];
633 for (int i = 0; i < totalFrames*energyOversampleFactor; i++) { 541 for (int i = 0; i < totalFrames*energyOversampleFactor; i++) {
634 energy2[i] = (int) (energy[i] * 4 * energyOversampleFactor); 542 energy2[i] = (int) (energy[i] * 4 * energyOversampleFactor);
635 time[i] = i * hopTime / energyOversampleFactor; 543 time[i] = i * hopTime / energyOversampleFactor;