| 16 |
16 |
#ifndef _BEATROOT_PROCESSOR_H_
|
| 17 |
17 |
#define _BEATROOT_PROCESSOR_H_
|
| 18 |
18 |
|
|
19 |
#include <vector>
|
|
20 |
|
|
21 |
using std::vector;
|
|
22 |
|
| 19 |
23 |
class BeatRootProcessor
|
| 20 |
24 |
{
|
| 21 |
25 |
protected:
|
| ... | ... | |
| 88 |
92 |
int totalFrames;
|
| 89 |
93 |
|
| 90 |
94 |
/** Flag for enabling or disabling debugging output */
|
| 91 |
|
static bool debug = false;
|
|
95 |
static bool debug;
|
| 92 |
96 |
|
| 93 |
97 |
/** Flag for suppressing all standard output messages except results. */
|
| 94 |
|
static bool silent = true;
|
|
98 |
static bool silent;
|
| 95 |
99 |
|
| 96 |
100 |
/** RMS frame energy below this value results in the frame being
|
| 97 |
101 |
* set to zero, so that normalisation does not have undesired
|
| 98 |
102 |
* side-effects. */
|
| 99 |
|
static double silenceThreshold = 0.0004; //!!!??? energy of what? should not be static?
|
|
103 |
static double silenceThreshold; //!!!??? energy of what? should not be static?
|
| 100 |
104 |
|
| 101 |
105 |
/** For dynamic range compression, this value is added to the log
|
| 102 |
106 |
* magnitude in each frequency bin and any remaining negative
|
| 103 |
107 |
* values are then set to zero.
|
| 104 |
108 |
*/
|
| 105 |
|
static double rangeThreshold = 10; //!!! sim
|
|
109 |
static double rangeThreshold; //!!! sim
|
| 106 |
110 |
|
| 107 |
111 |
/** Determines method of normalisation. Values can be:<ul>
|
| 108 |
112 |
* <li>0: no normalisation</li>
|
| ... | ... | |
| 110 |
114 |
* <li>2: normalisation by exponential average of frame energy</li>
|
| 111 |
115 |
* </ul>
|
| 112 |
116 |
*/
|
| 113 |
|
static int normaliseMode = 2;
|
|
117 |
static int normaliseMode;
|
| 114 |
118 |
|
| 115 |
119 |
/** Ratio between rate of sampling the signal energy (for the
|
| 116 |
120 |
* amplitude envelope) and the hop size */
|
| 117 |
|
static int energyOversampleFactor = 2; //!!! not used?
|
|
121 |
static int energyOversampleFactor; //!!! not used?
|
| 118 |
122 |
|
| 119 |
123 |
public:
|
| 120 |
124 |
|
| 121 |
125 |
/** Constructor: note that streams are not opened until the input
|
| 122 |
126 |
* file is set (see <code>setInputFile()</code>). */
|
| 123 |
|
AudioProcessor() {
|
|
127 |
BeatRootProcessor() {
|
| 124 |
128 |
cbIndex = 0;
|
| 125 |
129 |
frameRMS = 0;
|
| 126 |
130 |
ltAverage = 0;
|
| ... | ... | |
| 129 |
133 |
fftSize = 0;
|
| 130 |
134 |
hopTime = 0.010; // DEFAULT, overridden with -h
|
| 131 |
135 |
fftTime = 0.04644; // DEFAULT, overridden with -f
|
| 132 |
|
progressCallback = null;
|
| 133 |
|
stdIn = new BufferedReader(new InputStreamReader(System.in));
|
| 134 |
|
if (doOnsetPlot)
|
| 135 |
|
plot = new Plot();
|
| 136 |
136 |
} // constructor
|
| 137 |
137 |
|
| 138 |
|
/** For debugging, outputs information about the AudioProcessor to
|
| 139 |
|
* standard error.
|
| 140 |
|
*/
|
| 141 |
|
public void print() {
|
| 142 |
|
System.err.println(this);
|
| 143 |
|
} // print()
|
| 144 |
|
|
| 145 |
|
/** For interactive pause - wait for user to hit Enter */
|
| 146 |
|
public String readLine() {
|
| 147 |
|
try { return stdIn.readLine(); } catch (Exception e) { return null; }
|
| 148 |
|
} // readLine()
|
| 149 |
|
|
| 150 |
|
/** Gives some basic information about the audio being processed. */
|
| 151 |
|
public String toString() {
|
| 152 |
|
return "AudioProcessor\n" +
|
| 153 |
|
String.format("\tFile: %s (%3.1f kHz, %1d channels)\n",
|
| 154 |
|
audioFileName, sampleRate/1000, channels) +
|
| 155 |
|
String.format("\tHop / FFT sizes: %5.3f / %5.3f",
|
| 156 |
|
hopTime, hopTime * fftSize / hopSize);
|
| 157 |
|
} // toString()
|
| 158 |
|
|
| 159 |
|
/** Adds a link to the GUI component which shows the progress of matching.
|
| 160 |
|
* @param c the AudioProcessor representing the other performance
|
| 161 |
|
*/
|
| 162 |
|
public void setProgressCallback(ProgressIndicator c) {
|
| 163 |
|
progressCallback = c;
|
| 164 |
|
} // setProgressCallback()
|
| 165 |
|
|
| 166 |
|
/** Sets up the streams and buffers for live audio input (CD quality).
|
| 167 |
|
* If any Exception is thrown within this method, it is caught, and any
|
| 168 |
|
* opened streams are closed, and <code>pcmInputStream</code> is set to
|
| 169 |
|
* <code>null</code>, indicating that the method did not complete
|
| 170 |
|
* successfully.
|
| 171 |
|
*/
|
| 172 |
|
public void setLiveInput() {
|
| 173 |
|
try {
|
| 174 |
|
channels = 2;
|
| 175 |
|
sampleRate = 44100;
|
| 176 |
|
AudioFormat desiredFormat = new AudioFormat(
|
| 177 |
|
AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16,
|
| 178 |
|
channels, channels * 2, sampleRate, false);
|
| 179 |
|
TargetDataLine tdl = AudioSystem.getTargetDataLine(desiredFormat);
|
| 180 |
|
tdl.open(desiredFormat, liveInputBufferSize);
|
| 181 |
|
pcmInputStream = new AudioInputStream(tdl);
|
| 182 |
|
audioFormat = pcmInputStream.getFormat();
|
| 183 |
|
init();
|
| 184 |
|
tdl.start();
|
| 185 |
|
} catch (Exception e) {
|
| 186 |
|
e.printStackTrace();
|
| 187 |
|
closeStreams(); // make sure it exits in a consistent state
|
| 188 |
|
}
|
| 189 |
|
} // setLiveInput()
|
| 190 |
|
|
| 191 |
|
/** Sets up the streams and buffers for audio file input.
|
| 192 |
|
* If any Exception is thrown within this method, it is caught, and any
|
| 193 |
|
* opened streams are closed, and <code>pcmInputStream</code> is set to
|
| 194 |
|
* <code>null</code>, indicating that the method did not complete
|
| 195 |
|
* successfully.
|
| 196 |
|
* @param fileName The path name of the input audio file.
|
| 197 |
|
*/
|
| 198 |
|
public void setInputFile(String fileName) {
|
| 199 |
|
closeStreams(); // release previously allocated resources
|
| 200 |
|
audioFileName = fileName;
|
| 201 |
|
try {
|
| 202 |
|
if (audioFileName == null)
|
| 203 |
|
throw new Exception("No input file specified");
|
| 204 |
|
File audioFile = new File(audioFileName);
|
| 205 |
|
if (!audioFile.isFile())
|
| 206 |
|
throw new FileNotFoundException(
|
| 207 |
|
"Requested file does not exist: " + audioFileName);
|
| 208 |
|
rawInputStream = AudioSystem.getAudioInputStream(audioFile);
|
| 209 |
|
audioFormat = rawInputStream.getFormat();
|
| 210 |
|
channels = audioFormat.getChannels();
|
| 211 |
|
sampleRate = audioFormat.getSampleRate();
|
| 212 |
|
pcmInputStream = rawInputStream;
|
| 213 |
|
if ((audioFormat.getEncoding()!=AudioFormat.Encoding.PCM_SIGNED) ||
|
| 214 |
|
(audioFormat.getFrameSize() != channels * 2) ||
|
| 215 |
|
audioFormat.isBigEndian()) {
|
| 216 |
|
AudioFormat desiredFormat = new AudioFormat(
|
| 217 |
|
AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16,
|
| 218 |
|
channels, channels * 2, sampleRate, false);
|
| 219 |
|
pcmInputStream = AudioSystem.getAudioInputStream(desiredFormat,
|
| 220 |
|
rawInputStream);
|
| 221 |
|
audioFormat = desiredFormat;
|
| 222 |
|
}
|
| 223 |
|
init();
|
| 224 |
|
} catch (Exception e) {
|
| 225 |
|
e.printStackTrace();
|
| 226 |
|
closeStreams(); // make sure it exits in a consistent state
|
| 227 |
|
}
|
| 228 |
|
} // setInputFile()
|
| 229 |
|
|
|
138 |
protected:
|
| 230 |
139 |
/** Allocates memory for arrays, based on parameter settings */
|
| 231 |
|
protected void init() {
|
|
140 |
void init() {
|
| 232 |
141 |
hopSize = (int) Math.round(sampleRate * hopTime);
|
| 233 |
142 |
fftSize = (int) Math.round(Math.pow(2,
|
| 234 |
143 |
Math.round( Math.log(fftTime * sampleRate) / Math.log(2))));
|
| ... | ... | |
| 263 |
172 |
cbIndex = 0;
|
| 264 |
173 |
frameRMS = 0;
|
| 265 |
174 |
ltAverage = 0;
|
| 266 |
|
progressCallback = null;
|
| 267 |
175 |
} // init()
|
| 268 |
176 |
|
| 269 |
177 |
/** Closes the input stream(s) associated with this object. */
|
| 270 |
|
public void closeStreams() {
|
|
178 |
void closeStreams() {
|
| 271 |
179 |
if (pcmInputStream != null) {
|
| 272 |
180 |
try {
|
| 273 |
181 |
pcmInputStream.close();
|
| ... | ... | |
| 290 |
198 |
* is the energy is summed into the comparison bins. See also
|
| 291 |
199 |
* processFrame()
|
| 292 |
200 |
*/
|
| 293 |
|
protected void makeFreqMap(int fftSize, float sampleRate) {
|
|
201 |
void makeFreqMap(int fftSize, float sampleRate) {
|
| 294 |
202 |
freqMap = new int[fftSize/2+1];
|
| 295 |
203 |
double binWidth = sampleRate / fftSize;
|
| 296 |
204 |
int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1));
|
| ... | ... | |
| 312 |
220 |
/** Calculates the weighted phase deviation onset detection function.
|
| 313 |
221 |
* Not used.
|
| 314 |
222 |
* TODO: Test the change to WPD fn */
|
| 315 |
|
protected void weightedPhaseDeviation() {
|
|
223 |
void weightedPhaseDeviation() {
|
| 316 |
224 |
if (frameCount < 2)
|
| 317 |
225 |
phaseDeviation[frameCount] = 0;
|
| 318 |
226 |
else {
|
| ... | ... | |
| 337 |
245 |
* is read. If a complete frame cannot be read, the InputStream is set
|
| 338 |
246 |
* to null.
|
| 339 |
247 |
*/
|
| 340 |
|
public boolean getFrame() {
|
|
248 |
bool getFrame() {
|
| 341 |
249 |
if (pcmInputStream == null)
|
| 342 |
250 |
return false;
|
| 343 |
251 |
try {
|
| ... | ... | |
| 401 |
309 |
* part-logarithmic array, then computing the spectral flux
|
| 402 |
310 |
* then (optionally) normalising and calculating onsets.
|
| 403 |
311 |
*/
|
| 404 |
|
protected void processFrame() {
|
|
312 |
void processFrame() {
|
| 405 |
313 |
if (getFrame()) {
|
| 406 |
314 |
for (int i = 0; i < fftSize; i++) {
|
| 407 |
315 |
reBuffer[i] = window[i] * circBuffer[cbIndex];
|
| ... | ... | |
| 479 |
387 |
} // processFrame()
|
| 480 |
388 |
|
| 481 |
389 |
/** Processes a complete file of audio data. */
|
| 482 |
|
public void processFile() {
|
|
390 |
void processFile() {
|
| 483 |
391 |
while (pcmInputStream != null) {
|
| 484 |
392 |
// Profile.start(0);
|
| 485 |
393 |
processFrame();
|
| ... | ... | |
| 552 |
460 |
* @param fileName File containing the data
|
| 553 |
461 |
* @return An array containing the feature values
|
| 554 |
462 |
*/
|
| 555 |
|
public static double[] getFeatures(String fileName) {
|
|
463 |
static double[] getFeatures(String fileName) {
|
| 556 |
464 |
ArrayList<Double> l = new ArrayList<Double>();
|
| 557 |
465 |
try {
|
| 558 |
466 |
BufferedReader b = new BufferedReader(new FileReader(fileName));
|
| ... | ... | |
| 606 |
514 |
* @param fileName The file of feature values
|
| 607 |
515 |
* @param hopTime The spacing of feature values in time
|
| 608 |
516 |
*/
|
| 609 |
|
public void processFeatures(String fileName, double hopTime) {
|
|
517 |
void processFeatures(String fileName, double hopTime) {
|
| 610 |
518 |
double hop = hopTime;
|
| 611 |
519 |
double[] features = getFeatures(fileName);
|
| 612 |
520 |
Peaks.normalise(features);
|
| ... | ... | |
| 627 |
535 |
} // processFeatures()
|
| 628 |
536 |
|
| 629 |
537 |
/** Copies output of audio processing to the display panel. */
|
| 630 |
|
public void setDisplay(BeatTrackDisplay btd) {
|
|
538 |
void setDisplay(BeatTrackDisplay btd) {
|
| 631 |
539 |
int energy2[] = new int[totalFrames*energyOversampleFactor];
|
| 632 |
540 |
double time[] = new double[totalFrames*energyOversampleFactor];
|
| 633 |
541 |
for (int i = 0; i < totalFrames*energyOversampleFactor; i++) {
|