comparison BeatRootProcessor.h @ 10:1c1e98cd1b2e

Fixes so as to run and, in theory, return results without crashing -- still doesn't actually produce meaningful data though
author Chris Cannam
date Fri, 30 Sep 2011 23:08:42 +0100
parents 4f6626f9ffac
children 59520cd6abac
comparison
equal deleted inserted replaced
9:4f6626f9ffac 10:1c1e98cd1b2e
49 int hopSize; 49 int hopSize;
50 50
51 /** The size of an FFT frame in samples (see <code>fftTime</code>) */ 51 /** The size of an FFT frame in samples (see <code>fftTime</code>) */
52 int fftSize; 52 int fftSize;
53 53
54 /** The number of overlapping frames of audio data which have been read. */
55 int frameCount;
56
57 /** RMS amplitude of the current frame. */
58 double frameRMS;
59
60 /** Long term average frame energy (in frequency domain representation). */
61 double ltAverage;
62
63 /** Spectral flux onset detection function, indexed by frame. */ 54 /** Spectral flux onset detection function, indexed by frame. */
64 vector<double> spectralFlux; 55 vector<double> spectralFlux;
65 56
66 /** A mapping function for mapping FFT bins to final frequency bins. 57 /** A mapping function for mapping FFT bins to final frequency bins.
67 * The mapping is linear (1-1) until the resolution reaches 2 points per 58 * The mapping is linear (1-1) until the resolution reaches 2 points per
77 int freqMapSize; 68 int freqMapSize;
78 69
79 /** The magnitude spectrum of the most recent frame. Used for 70 /** The magnitude spectrum of the most recent frame. Used for
80 * calculating the spectral flux. */ 71 * calculating the spectral flux. */
81 vector<double> prevFrame; 72 vector<double> prevFrame;
82
83 /** The magnitude spectrum of the current frame. */
84 vector<double> newFrame;
85 73
86 /** The magnitude spectra of all frames, used for plotting the spectrogram. */
87 vector<vector<double> > frames; //!!! do we need this? much cheaper to lose it if we don't
88
89 /** The RMS energy of all frames. */
90 // vector<double> energy; //!!! unused in beat tracking?
91
92 /** The estimated onset times from peak-picking the onset 74 /** The estimated onset times from peak-picking the onset
93 * detection function(s). */ 75 * detection function(s). */
94 vector<double> onsets; 76 vector<double> onsets;
95 77
96 /** The estimated onset times and their saliences. */ 78 /** The estimated onset times and their saliences. */
97 EventList onsetList; 79 EventList onsetList;
98 80
99 /** Flag for suppressing all standard output messages except results. */ 81 /** Flag for suppressing all standard output messages except results. */
100 static bool silent; 82 static bool silent;
101 83
102 /** RMS frame energy below this value results in the frame being
103 * set to zero, so that normalisation does not have undesired
104 * side-effects. */
105 static double silenceThreshold; //!!!??? energy of what? should not be static?
106
107 /** For dynamic range compression, this value is added to the log
108 * magnitude in each frequency bin and any remaining negative
109 * values are then set to zero.
110 */
111 static double rangeThreshold; //!!! sim
112
113 /** Determines method of normalisation. Values can be:<ul>
114 * <li>0: no normalisation</li>
115 * <li>1: normalisation by current frame energy</li>
116 * <li>2: normalisation by exponential average of frame energy</li>
117 * </ul>
118 */
119 static int normaliseMode;
120
121 /** Ratio between rate of sampling the signal energy (for the
122 * amplitude envelope) and the hop size */
123 // static int energyOversampleFactor; //!!! not used?
124
125 public: 84 public:
126 85
127 /** Constructor: note that streams are not opened until the input 86 /** Constructor: note that streams are not opened until the input
128 * file is set (see <code>setInputFile()</code>). */ 87 * file is set (see <code>setInputFile()</code>). */
129 BeatRootProcessor(float sr) : 88 BeatRootProcessor(float sr) :
130 sampleRate(sr) { 89 sampleRate(sr) {
131 frameRMS = 0;
132 ltAverage = 0;
133 frameCount = 0;
134 hopSize = 0; 90 hopSize = 0;
135 fftSize = 0; 91 fftSize = 0;
136 hopTime = 0.010; 92 hopTime = 0.010;
137 fftTime = 0.04644; 93 fftTime = 0.04644;
138 hopSize = lrint(sampleRate * hopTime); 94 hopSize = lrint(sampleRate * hopTime);
141 97
142 void reset() { 98 void reset() {
143 init(); 99 init();
144 } 100 }
145 101
102 /** Processes a frame of frequency-domain audio data by mapping
103 * the frequency bins into a part-linear part-logarithmic array,
104 * then computing the spectral flux then (optionally) normalising
105 * and calculating onsets.
106 */
107 void processFrame(const float *const *inputBuffers) {
108 double flux = 0;
109 for (int i = 0; i <= fftSize/2; i++) {
110 double mag = sqrt(inputBuffers[0][i*2] * inputBuffers[0][i*2] +
111 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]);
112 if (mag > prevFrame[i]) flux += mag - prevFrame[i];
113 prevFrame[i] = mag;
114 }
115
116 spectralFlux.push_back(flux);
117
118 } // processFrame()
119
120 /** Tracks beats once all frames have been processed by processFrame
121 */
122 EventList beatTrack() {
123
124 double hop = hopTime;
125 Peaks::normalise(spectralFlux);
126 vector<int> peaks = Peaks::findPeaks(spectralFlux, (int)lrint(0.06 / hop), 0.35, 0.84, true);
127 onsets.clear();
128 onsets.resize(peaks.size(), 0);
129 vector<int>::iterator it = peaks.begin();
130 onsetList.clear();
131 double minSalience = Peaks::min(spectralFlux);
132 for (int i = 0; i < onsets.size(); i++) {
133 int index = *it;
134 ++it;
135 onsets[i] = index * hop;
136 Event e = BeatTracker::newBeat(onsets[i], 0);
137 // if (debug)
138 // System.err.printf("Onset: %8.3f %8.3f %8.3f\n",
139 // onsets[i], energy[index], slope[index]);
140 // e.salience = slope[index]; // or combination of energy + slope??
141 // Note that salience must be non-negative or the beat tracking system fails!
142 e.salience = spectralFlux[index] - minSalience;
143 onsetList.push_back(e);
144 }
145
146 return BeatTracker::beatTrack(onsetList);
147
148 } // processFile()
149
146 protected: 150 protected:
147 /** Allocates memory for arrays, based on parameter settings */ 151 /** Allocates memory for arrays, based on parameter settings */
148 void init() { 152 void init() {
149 makeFreqMap(fftSize, sampleRate); 153 makeFreqMap(fftSize, sampleRate);
150 prevFrame.clear(); 154 prevFrame.clear();
151 for (int i = 0; i < freqMapSize; i++) prevFrame.push_back(0); 155 for (int i = 0; i <= fftSize/2; i++) prevFrame.push_back(0);
152 frameCount = 0;
153 frameRMS = 0;
154 ltAverage = 0;
155 spectralFlux.clear(); 156 spectralFlux.clear();
156 } // init() 157 } // init()
157 158
158 /** Creates a map of FFT frequency bins to comparison bins. 159 /** Creates a map of FFT frequency bins to comparison bins.
159 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is 160 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is
178 freqMap[i++] = crossoverBin + (int)lrint(midi) - crossoverMidi; 179 freqMap[i++] = crossoverBin + (int)lrint(midi) - crossoverMidi;
179 } 180 }
180 freqMapSize = freqMap[i-1] + 1; 181 freqMapSize = freqMap[i-1] + 1;
181 } // makeFreqMap() 182 } // makeFreqMap()
182 183
183 /** Processes a frame of frequency-domain audio data by mapping
184 * the frequency bins into a part-linear part-logarithmic array,
185 * then computing the spectral flux then (optionally) normalising
186 * and calculating onsets.
187 */
188 void processFrame(const float *const *inputBuffers) {
189 newFrame.clear();
190 for (int i = 0; i < freqMapSize; i++) {
191 newFrame.push_back(0);
192 }
193 double flux = 0;
194 for (int i = 0; i <= fftSize/2; i++) {
195 double mag = sqrt(inputBuffers[0][i*2] * inputBuffers[0][i*2] +
196 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]);
197 if (mag > prevFrame[i]) flux += mag - prevFrame[i];
198 prevFrame[i] = mag;
199 newFrame[freqMap[i]] += mag;
200 }
201 spectralFlux.push_back(flux);
202 frames.push_back(newFrame);
203 // for (int i = 0; i < freqMapSize; i++)
204 // [frameCount][i] = newFrame[i];
205 /*
206 int index = cbIndex - (fftSize - hopSize);
207 if (index < 0)
208 index += fftSize;
209 int sz = (fftSize - hopSize) / energyOversampleFactor;
210 for (int j = 0; j < energyOversampleFactor; j++) {
211 double newEnergy = 0;
212 for (int i = 0; i < sz; i++) {
213 newEnergy += circBuffer[index] * circBuffer[index];
214 if (++index == fftSize)
215 index = 0;
216 }
217 energy[frameCount * energyOversampleFactor + j] =
218 newEnergy / sz <= 1e-6? 0: log(newEnergy / sz) + 13.816;
219 }*/
220
221 double decay = frameCount >= 200? 0.99:
222 (frameCount < 100? 0: (frameCount - 100) / 100.0);
223
224 //!!! uh-oh -- frameRMS has not been calculated (it came from time-domain signal) -- will always appear silent
225
226 if (ltAverage == 0)
227 ltAverage = frameRMS;
228 else
229 ltAverage = ltAverage * decay + frameRMS * (1.0 - decay);
230 if (frameRMS <= silenceThreshold)
231 for (int i = 0; i < freqMapSize; i++)
232 frames[frameCount][i] = 0;
233 else {
234 if (normaliseMode == 1)
235 for (int i = 0; i < freqMapSize; i++)
236 frames[frameCount][i] /= frameRMS;
237 else if (normaliseMode == 2)
238 for (int i = 0; i < freqMapSize; i++)
239 frames[frameCount][i] /= ltAverage;
240 for (int i = 0; i < freqMapSize; i++) {
241 frames[frameCount][i] = log(frames[frameCount][i]) + rangeThreshold;
242 if (frames[frameCount][i] < 0)
243 frames[frameCount][i] = 0;
244 }
245 }
246 // weightedPhaseDeviation();
247 // if (debug)
248 // System.err.printf("PhaseDev: t=%7.3f phDev=%7.3f RMS=%7.3f\n",
249 // frameCount * hopTime,
250 // phaseDeviation[frameCount],
251 // frameRMS);
252 frameCount++;
253 } // processFrame()
254
255 /** Processes a complete file of audio data. */
256 void processFile() {
257 /*
258 while (pcmInputStream != null) {
259 // Profile.start(0);
260 processFrame();
261 // Profile.log(0);
262 if (Thread.currentThread().isInterrupted()) {
263 System.err.println("info: INTERRUPTED in processFile()");
264 return;
265 }
266 }
267 */
268 // double[] x1 = new double[phaseDeviation.length];
269 // for (int i = 0; i < x1.length; i++) {
270 // x1[i] = i * hopTime;
271 // phaseDeviation[i] = (phaseDeviation[i] - 0.4) * 100;
272 // }
273 // double[] x2 = new double[energy.length];
274 // for (int i = 0; i < x2.length; i++)
275 // x2[i] = i * hopTime / energyOversampleFactor;
276 // // plot.clear();
277 // plot.addPlot(x1, phaseDeviation, Color.green, 7);
278 // plot.addPlot(x2, energy, Color.red, 7);
279 // plot.setTitle("Test phase deviation");
280 // plot.fitAxes();
281
282 // double[] slope = new double[energy.length];
283 // double hop = hopTime / energyOversampleFactor;
284 // Peaks.getSlope(energy, hop, 15, slope);
285 // vector<Integer> peaks = Peaks.findPeaks(slope, (int)lrint(0.06 / hop), 10);
286
287 double hop = hopTime;
288 Peaks::normalise(spectralFlux);
289 vector<int> peaks = Peaks::findPeaks(spectralFlux, (int)lrint(0.06 / hop), 0.35, 0.84, true);
290 onsets.clear();
291 onsets.resize(peaks.size(), 0);
292 vector<int>::iterator it = peaks.begin();
293 onsetList.clear();
294 double minSalience = Peaks::min(spectralFlux);
295 for (int i = 0; i < onsets.size(); i++) {
296 int index = *it;
297 ++it;
298 onsets[i] = index * hop;
299 Event e = BeatTracker::newBeat(onsets[i], 0);
300 // if (debug)
301 // System.err.printf("Onset: %8.3f %8.3f %8.3f\n",
302 // onsets[i], energy[index], slope[index]);
303 // e.salience = slope[index]; // or combination of energy + slope??
304 // Note that salience must be non-negative or the beat tracking system fails!
305 e.salience = spectralFlux[index] - minSalience;
306 onsetList.push_back(e);
307 }
308
309 //!!! This onsetList is then fed in to BeatTrackDisplay::beatTrack
310
311 } // processFile()
312
313 }; // class AudioProcessor 184 }; // class AudioProcessor
314 185
315 186
316 #endif 187 #endif