Revision 2:7d4e6b1ff3d1 BeatRootProcessor.h
| BeatRootProcessor.h | ||
|---|---|---|
| 16 | 16 |
#ifndef _BEATROOT_PROCESSOR_H_ |
| 17 | 17 |
#define _BEATROOT_PROCESSOR_H_ |
| 18 | 18 |
|
| 19 |
#include <vector> |
|
| 20 |
|
|
| 21 |
using std::vector; |
|
| 22 |
|
|
| 19 | 23 |
class BeatRootProcessor |
| 20 | 24 |
{
|
| 21 | 25 |
protected: |
| ... | ... | |
| 88 | 92 |
int totalFrames; |
| 89 | 93 |
|
| 90 | 94 |
/** Flag for enabling or disabling debugging output */ |
| 91 |
static bool debug = false;
|
|
| 95 |
static bool debug; |
|
| 92 | 96 |
|
| 93 | 97 |
/** Flag for suppressing all standard output messages except results. */ |
| 94 |
static bool silent = true;
|
|
| 98 |
static bool silent; |
|
| 95 | 99 |
|
| 96 | 100 |
/** RMS frame energy below this value results in the frame being |
| 97 | 101 |
* set to zero, so that normalisation does not have undesired |
| 98 | 102 |
* side-effects. */ |
| 99 |
static double silenceThreshold = 0.0004; //!!!??? energy of what? should not be static?
|
|
| 103 |
static double silenceThreshold; //!!!??? energy of what? should not be static? |
|
| 100 | 104 |
|
| 101 | 105 |
/** For dynamic range compression, this value is added to the log |
| 102 | 106 |
* magnitude in each frequency bin and any remaining negative |
| 103 | 107 |
* values are then set to zero. |
| 104 | 108 |
*/ |
| 105 |
static double rangeThreshold = 10; //!!! sim
|
|
| 109 |
static double rangeThreshold; //!!! sim |
|
| 106 | 110 |
|
| 107 | 111 |
/** Determines method of normalisation. Values can be:<ul> |
| 108 | 112 |
* <li>0: no normalisation</li> |
| ... | ... | |
| 110 | 114 |
* <li>2: normalisation by exponential average of frame energy</li> |
| 111 | 115 |
* </ul> |
| 112 | 116 |
*/ |
| 113 |
static int normaliseMode = 2;
|
|
| 117 |
static int normaliseMode; |
|
| 114 | 118 |
|
| 115 | 119 |
/** Ratio between rate of sampling the signal energy (for the |
| 116 | 120 |
* amplitude envelope) and the hop size */ |
| 117 |
static int energyOversampleFactor = 2; //!!! not used?
|
|
| 121 |
static int energyOversampleFactor; //!!! not used? |
|
| 118 | 122 |
|
| 119 | 123 |
public: |
| 120 | 124 |
|
| 121 | 125 |
/** Constructor: note that streams are not opened until the input |
| 122 | 126 |
* file is set (see <code>setInputFile()</code>). */ |
| 123 |
AudioProcessor() {
|
|
| 127 |
BeatRootProcessor() {
|
|
| 124 | 128 |
cbIndex = 0; |
| 125 | 129 |
frameRMS = 0; |
| 126 | 130 |
ltAverage = 0; |
| ... | ... | |
| 129 | 133 |
fftSize = 0; |
| 130 | 134 |
hopTime = 0.010; // DEFAULT, overridden with -h |
| 131 | 135 |
fftTime = 0.04644; // DEFAULT, overridden with -f |
| 132 |
progressCallback = null; |
|
| 133 |
stdIn = new BufferedReader(new InputStreamReader(System.in)); |
|
| 134 |
if (doOnsetPlot) |
|
| 135 |
plot = new Plot(); |
|
| 136 | 136 |
} // constructor |
| 137 | 137 |
|
| 138 |
/** For debugging, outputs information about the AudioProcessor to |
|
| 139 |
* standard error. |
|
| 140 |
*/ |
|
| 141 |
public void print() {
|
|
| 142 |
System.err.println(this); |
|
| 143 |
} // print() |
|
| 144 |
|
|
| 145 |
/** For interactive pause - wait for user to hit Enter */ |
|
| 146 |
public String readLine() {
|
|
| 147 |
try { return stdIn.readLine(); } catch (Exception e) { return null; }
|
|
| 148 |
} // readLine() |
|
| 149 |
|
|
| 150 |
/** Gives some basic information about the audio being processed. */ |
|
| 151 |
public String toString() {
|
|
| 152 |
return "AudioProcessor\n" + |
|
| 153 |
String.format("\tFile: %s (%3.1f kHz, %1d channels)\n",
|
|
| 154 |
audioFileName, sampleRate/1000, channels) + |
|
| 155 |
String.format("\tHop / FFT sizes: %5.3f / %5.3f",
|
|
| 156 |
hopTime, hopTime * fftSize / hopSize); |
|
| 157 |
} // toString() |
|
| 158 |
|
|
| 159 |
/** Adds a link to the GUI component which shows the progress of matching. |
|
| 160 |
* @param c the AudioProcessor representing the other performance |
|
| 161 |
*/ |
|
| 162 |
public void setProgressCallback(ProgressIndicator c) {
|
|
| 163 |
progressCallback = c; |
|
| 164 |
} // setProgressCallback() |
|
| 165 |
|
|
| 166 |
/** Sets up the streams and buffers for live audio input (CD quality). |
|
| 167 |
* If any Exception is thrown within this method, it is caught, and any |
|
| 168 |
* opened streams are closed, and <code>pcmInputStream</code> is set to |
|
| 169 |
* <code>null</code>, indicating that the method did not complete |
|
| 170 |
* successfully. |
|
| 171 |
*/ |
|
| 172 |
public void setLiveInput() {
|
|
| 173 |
try {
|
|
| 174 |
channels = 2; |
|
| 175 |
sampleRate = 44100; |
|
| 176 |
AudioFormat desiredFormat = new AudioFormat( |
|
| 177 |
AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16, |
|
| 178 |
channels, channels * 2, sampleRate, false); |
|
| 179 |
TargetDataLine tdl = AudioSystem.getTargetDataLine(desiredFormat); |
|
| 180 |
tdl.open(desiredFormat, liveInputBufferSize); |
|
| 181 |
pcmInputStream = new AudioInputStream(tdl); |
|
| 182 |
audioFormat = pcmInputStream.getFormat(); |
|
| 183 |
init(); |
|
| 184 |
tdl.start(); |
|
| 185 |
} catch (Exception e) {
|
|
| 186 |
e.printStackTrace(); |
|
| 187 |
closeStreams(); // make sure it exits in a consistent state |
|
| 188 |
} |
|
| 189 |
} // setLiveInput() |
|
| 190 |
|
|
| 191 |
/** Sets up the streams and buffers for audio file input. |
|
| 192 |
* If any Exception is thrown within this method, it is caught, and any |
|
| 193 |
* opened streams are closed, and <code>pcmInputStream</code> is set to |
|
| 194 |
* <code>null</code>, indicating that the method did not complete |
|
| 195 |
* successfully. |
|
| 196 |
* @param fileName The path name of the input audio file. |
|
| 197 |
*/ |
|
| 198 |
public void setInputFile(String fileName) {
|
|
| 199 |
closeStreams(); // release previously allocated resources |
|
| 200 |
audioFileName = fileName; |
|
| 201 |
try {
|
|
| 202 |
if (audioFileName == null) |
|
| 203 |
throw new Exception("No input file specified");
|
|
| 204 |
File audioFile = new File(audioFileName); |
|
| 205 |
if (!audioFile.isFile()) |
|
| 206 |
throw new FileNotFoundException( |
|
| 207 |
"Requested file does not exist: " + audioFileName); |
|
| 208 |
rawInputStream = AudioSystem.getAudioInputStream(audioFile); |
|
| 209 |
audioFormat = rawInputStream.getFormat(); |
|
| 210 |
channels = audioFormat.getChannels(); |
|
| 211 |
sampleRate = audioFormat.getSampleRate(); |
|
| 212 |
pcmInputStream = rawInputStream; |
|
| 213 |
if ((audioFormat.getEncoding()!=AudioFormat.Encoding.PCM_SIGNED) || |
|
| 214 |
(audioFormat.getFrameSize() != channels * 2) || |
|
| 215 |
audioFormat.isBigEndian()) {
|
|
| 216 |
AudioFormat desiredFormat = new AudioFormat( |
|
| 217 |
AudioFormat.Encoding.PCM_SIGNED, sampleRate, 16, |
|
| 218 |
channels, channels * 2, sampleRate, false); |
|
| 219 |
pcmInputStream = AudioSystem.getAudioInputStream(desiredFormat, |
|
| 220 |
rawInputStream); |
|
| 221 |
audioFormat = desiredFormat; |
|
| 222 |
} |
|
| 223 |
init(); |
|
| 224 |
} catch (Exception e) {
|
|
| 225 |
e.printStackTrace(); |
|
| 226 |
closeStreams(); // make sure it exits in a consistent state |
|
| 227 |
} |
|
| 228 |
} // setInputFile() |
|
| 229 |
|
|
| 138 |
protected: |
|
| 230 | 139 |
/** Allocates memory for arrays, based on parameter settings */ |
| 231 |
protected void init() {
|
|
| 140 |
void init() {
|
|
| 232 | 141 |
hopSize = (int) Math.round(sampleRate * hopTime); |
| 233 | 142 |
fftSize = (int) Math.round(Math.pow(2, |
| 234 | 143 |
Math.round( Math.log(fftTime * sampleRate) / Math.log(2)))); |
| ... | ... | |
| 263 | 172 |
cbIndex = 0; |
| 264 | 173 |
frameRMS = 0; |
| 265 | 174 |
ltAverage = 0; |
| 266 |
progressCallback = null; |
|
| 267 | 175 |
} // init() |
| 268 | 176 |
|
| 269 | 177 |
/** Closes the input stream(s) associated with this object. */ |
| 270 |
public void closeStreams() {
|
|
| 178 |
void closeStreams() {
|
|
| 271 | 179 |
if (pcmInputStream != null) {
|
| 272 | 180 |
try {
|
| 273 | 181 |
pcmInputStream.close(); |
| ... | ... | |
| 290 | 198 |
* is the energy is summed into the comparison bins. See also |
| 291 | 199 |
* processFrame() |
| 292 | 200 |
*/ |
| 293 |
protected void makeFreqMap(int fftSize, float sampleRate) {
|
|
| 201 |
void makeFreqMap(int fftSize, float sampleRate) {
|
|
| 294 | 202 |
freqMap = new int[fftSize/2+1]; |
| 295 | 203 |
double binWidth = sampleRate / fftSize; |
| 296 | 204 |
int crossoverBin = (int)(2 / (Math.pow(2, 1/12.0) - 1)); |
| ... | ... | |
| 312 | 220 |
/** Calculates the weighted phase deviation onset detection function. |
| 313 | 221 |
* Not used. |
| 314 | 222 |
* TODO: Test the change to WPD fn */ |
| 315 |
protected void weightedPhaseDeviation() {
|
|
| 223 |
void weightedPhaseDeviation() {
|
|
| 316 | 224 |
if (frameCount < 2) |
| 317 | 225 |
phaseDeviation[frameCount] = 0; |
| 318 | 226 |
else {
|
| ... | ... | |
| 337 | 245 |
* is read. If a complete frame cannot be read, the InputStream is set |
| 338 | 246 |
* to null. |
| 339 | 247 |
*/ |
| 340 |
public boolean getFrame() {
|
|
| 248 |
bool getFrame() {
|
|
| 341 | 249 |
if (pcmInputStream == null) |
| 342 | 250 |
return false; |
| 343 | 251 |
try {
|
| ... | ... | |
| 401 | 309 |
* part-logarithmic array, then computing the spectral flux |
| 402 | 310 |
* then (optionally) normalising and calculating onsets. |
| 403 | 311 |
*/ |
| 404 |
protected void processFrame() {
|
|
| 312 |
void processFrame() {
|
|
| 405 | 313 |
if (getFrame()) {
|
| 406 | 314 |
for (int i = 0; i < fftSize; i++) {
|
| 407 | 315 |
reBuffer[i] = window[i] * circBuffer[cbIndex]; |
| ... | ... | |
| 479 | 387 |
} // processFrame() |
| 480 | 388 |
|
| 481 | 389 |
/** Processes a complete file of audio data. */ |
| 482 |
public void processFile() {
|
|
| 390 |
void processFile() {
|
|
| 483 | 391 |
while (pcmInputStream != null) {
|
| 484 | 392 |
// Profile.start(0); |
| 485 | 393 |
processFrame(); |
| ... | ... | |
| 552 | 460 |
* @param fileName File containing the data |
| 553 | 461 |
* @return An array containing the feature values |
| 554 | 462 |
*/ |
| 555 |
public static double[] getFeatures(String fileName) {
|
|
| 463 |
static double[] getFeatures(String fileName) {
|
|
| 556 | 464 |
ArrayList<Double> l = new ArrayList<Double>(); |
| 557 | 465 |
try {
|
| 558 | 466 |
BufferedReader b = new BufferedReader(new FileReader(fileName)); |
| ... | ... | |
| 606 | 514 |
* @param fileName The file of feature values |
| 607 | 515 |
* @param hopTime The spacing of feature values in time |
| 608 | 516 |
*/ |
| 609 |
public void processFeatures(String fileName, double hopTime) {
|
|
| 517 |
void processFeatures(String fileName, double hopTime) {
|
|
| 610 | 518 |
double hop = hopTime; |
| 611 | 519 |
double[] features = getFeatures(fileName); |
| 612 | 520 |
Peaks.normalise(features); |
| ... | ... | |
| 627 | 535 |
} // processFeatures() |
| 628 | 536 |
|
| 629 | 537 |
/** Copies output of audio processing to the display panel. */ |
| 630 |
public void setDisplay(BeatTrackDisplay btd) {
|
|
| 538 |
void setDisplay(BeatTrackDisplay btd) {
|
|
| 631 | 539 |
int energy2[] = new int[totalFrames*energyOversampleFactor]; |
| 632 | 540 |
double time[] = new double[totalFrames*energyOversampleFactor]; |
| 633 | 541 |
for (int i = 0; i < totalFrames*energyOversampleFactor; i++) {
|
Also available in: Unified diff