Mercurial > hg > beatroot-vamp
comparison BeatRootProcessor.h @ 10:1c1e98cd1b2e
Fixes so as to run and, in theory, return results without crashing -- still doesn't actually produce meaningful data though
author | Chris Cannam |
---|---|
date | Fri, 30 Sep 2011 23:08:42 +0100 |
parents | 4f6626f9ffac |
children | 59520cd6abac |
comparison
equal
deleted
inserted
replaced
9:4f6626f9ffac | 10:1c1e98cd1b2e |
---|---|
49 int hopSize; | 49 int hopSize; |
50 | 50 |
51 /** The size of an FFT frame in samples (see <code>fftTime</code>) */ | 51 /** The size of an FFT frame in samples (see <code>fftTime</code>) */ |
52 int fftSize; | 52 int fftSize; |
53 | 53 |
54 /** The number of overlapping frames of audio data which have been read. */ | |
55 int frameCount; | |
56 | |
57 /** RMS amplitude of the current frame. */ | |
58 double frameRMS; | |
59 | |
60 /** Long term average frame energy (in frequency domain representation). */ | |
61 double ltAverage; | |
62 | |
63 /** Spectral flux onset detection function, indexed by frame. */ | 54 /** Spectral flux onset detection function, indexed by frame. */ |
64 vector<double> spectralFlux; | 55 vector<double> spectralFlux; |
65 | 56 |
66 /** A mapping function for mapping FFT bins to final frequency bins. | 57 /** A mapping function for mapping FFT bins to final frequency bins. |
67 * The mapping is linear (1-1) until the resolution reaches 2 points per | 58 * The mapping is linear (1-1) until the resolution reaches 2 points per |
77 int freqMapSize; | 68 int freqMapSize; |
78 | 69 |
79 /** The magnitude spectrum of the most recent frame. Used for | 70 /** The magnitude spectrum of the most recent frame. Used for |
80 * calculating the spectral flux. */ | 71 * calculating the spectral flux. */ |
81 vector<double> prevFrame; | 72 vector<double> prevFrame; |
82 | |
83 /** The magnitude spectrum of the current frame. */ | |
84 vector<double> newFrame; | |
85 | 73 |
86 /** The magnitude spectra of all frames, used for plotting the spectrogram. */ | |
87 vector<vector<double> > frames; //!!! do we need this? much cheaper to lose it if we don't | |
88 | |
89 /** The RMS energy of all frames. */ | |
90 // vector<double> energy; //!!! unused in beat tracking? | |
91 | |
92 /** The estimated onset times from peak-picking the onset | 74 /** The estimated onset times from peak-picking the onset |
93 * detection function(s). */ | 75 * detection function(s). */ |
94 vector<double> onsets; | 76 vector<double> onsets; |
95 | 77 |
96 /** The estimated onset times and their saliences. */ | 78 /** The estimated onset times and their saliences. */ |
97 EventList onsetList; | 79 EventList onsetList; |
98 | 80 |
99 /** Flag for suppressing all standard output messages except results. */ | 81 /** Flag for suppressing all standard output messages except results. */ |
100 static bool silent; | 82 static bool silent; |
101 | 83 |
102 /** RMS frame energy below this value results in the frame being | |
103 * set to zero, so that normalisation does not have undesired | |
104 * side-effects. */ | |
105 static double silenceThreshold; //!!!??? energy of what? should not be static? | |
106 | |
107 /** For dynamic range compression, this value is added to the log | |
108 * magnitude in each frequency bin and any remaining negative | |
109 * values are then set to zero. | |
110 */ | |
111 static double rangeThreshold; //!!! sim | |
112 | |
113 /** Determines method of normalisation. Values can be:<ul> | |
114 * <li>0: no normalisation</li> | |
115 * <li>1: normalisation by current frame energy</li> | |
116 * <li>2: normalisation by exponential average of frame energy</li> | |
117 * </ul> | |
118 */ | |
119 static int normaliseMode; | |
120 | |
121 /** Ratio between rate of sampling the signal energy (for the | |
122 * amplitude envelope) and the hop size */ | |
123 // static int energyOversampleFactor; //!!! not used? | |
124 | |
125 public: | 84 public: |
126 | 85 |
127 /** Constructor: note that streams are not opened until the input | 86 /** Constructor: note that streams are not opened until the input |
128 * file is set (see <code>setInputFile()</code>). */ | 87 * file is set (see <code>setInputFile()</code>). */ |
129 BeatRootProcessor(float sr) : | 88 BeatRootProcessor(float sr) : |
130 sampleRate(sr) { | 89 sampleRate(sr) { |
131 frameRMS = 0; | |
132 ltAverage = 0; | |
133 frameCount = 0; | |
134 hopSize = 0; | 90 hopSize = 0; |
135 fftSize = 0; | 91 fftSize = 0; |
136 hopTime = 0.010; | 92 hopTime = 0.010; |
137 fftTime = 0.04644; | 93 fftTime = 0.04644; |
138 hopSize = lrint(sampleRate * hopTime); | 94 hopSize = lrint(sampleRate * hopTime); |
141 | 97 |
142 void reset() { | 98 void reset() { |
143 init(); | 99 init(); |
144 } | 100 } |
145 | 101 |
102 /** Processes a frame of frequency-domain audio data by mapping | |
103 * the frequency bins into a part-linear part-logarithmic array, | |
104 * then computing the spectral flux then (optionally) normalising | |
105 * and calculating onsets. | |
106 */ | |
107 void processFrame(const float *const *inputBuffers) { | |
108 double flux = 0; | |
109 for (int i = 0; i <= fftSize/2; i++) { | |
110 double mag = sqrt(inputBuffers[0][i*2] * inputBuffers[0][i*2] + | |
111 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]); | |
112 if (mag > prevFrame[i]) flux += mag - prevFrame[i]; | |
113 prevFrame[i] = mag; | |
114 } | |
115 | |
116 spectralFlux.push_back(flux); | |
117 | |
118 } // processFrame() | |
119 | |
120 /** Tracks beats once all frames have been processed by processFrame | |
121 */ | |
122 EventList beatTrack() { | |
123 | |
124 double hop = hopTime; | |
125 Peaks::normalise(spectralFlux); | |
126 vector<int> peaks = Peaks::findPeaks(spectralFlux, (int)lrint(0.06 / hop), 0.35, 0.84, true); | |
127 onsets.clear(); | |
128 onsets.resize(peaks.size(), 0); | |
129 vector<int>::iterator it = peaks.begin(); | |
130 onsetList.clear(); | |
131 double minSalience = Peaks::min(spectralFlux); | |
132 for (int i = 0; i < onsets.size(); i++) { | |
133 int index = *it; | |
134 ++it; | |
135 onsets[i] = index * hop; | |
136 Event e = BeatTracker::newBeat(onsets[i], 0); | |
137 // if (debug) | |
138 // System.err.printf("Onset: %8.3f %8.3f %8.3f\n", | |
139 // onsets[i], energy[index], slope[index]); | |
140 // e.salience = slope[index]; // or combination of energy + slope?? | |
141 // Note that salience must be non-negative or the beat tracking system fails! | |
142 e.salience = spectralFlux[index] - minSalience; | |
143 onsetList.push_back(e); | |
144 } | |
145 | |
146 return BeatTracker::beatTrack(onsetList); | |
147 | |
148 } // processFile() | |
149 | |
146 protected: | 150 protected: |
147 /** Allocates memory for arrays, based on parameter settings */ | 151 /** Allocates memory for arrays, based on parameter settings */ |
148 void init() { | 152 void init() { |
149 makeFreqMap(fftSize, sampleRate); | 153 makeFreqMap(fftSize, sampleRate); |
150 prevFrame.clear(); | 154 prevFrame.clear(); |
151 for (int i = 0; i < freqMapSize; i++) prevFrame.push_back(0); | 155 for (int i = 0; i <= fftSize/2; i++) prevFrame.push_back(0); |
152 frameCount = 0; | |
153 frameRMS = 0; | |
154 ltAverage = 0; | |
155 spectralFlux.clear(); | 156 spectralFlux.clear(); |
156 } // init() | 157 } // init() |
157 | 158 |
158 /** Creates a map of FFT frequency bins to comparison bins. | 159 /** Creates a map of FFT frequency bins to comparison bins. |
159 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is | 160 * Where the spacing of FFT bins is less than 0.5 semitones, the mapping is |
178 freqMap[i++] = crossoverBin + (int)lrint(midi) - crossoverMidi; | 179 freqMap[i++] = crossoverBin + (int)lrint(midi) - crossoverMidi; |
179 } | 180 } |
180 freqMapSize = freqMap[i-1] + 1; | 181 freqMapSize = freqMap[i-1] + 1; |
181 } // makeFreqMap() | 182 } // makeFreqMap() |
182 | 183 |
183 /** Processes a frame of frequency-domain audio data by mapping | |
184 * the frequency bins into a part-linear part-logarithmic array, | |
185 * then computing the spectral flux then (optionally) normalising | |
186 * and calculating onsets. | |
187 */ | |
188 void processFrame(const float *const *inputBuffers) { | |
189 newFrame.clear(); | |
190 for (int i = 0; i < freqMapSize; i++) { | |
191 newFrame.push_back(0); | |
192 } | |
193 double flux = 0; | |
194 for (int i = 0; i <= fftSize/2; i++) { | |
195 double mag = sqrt(inputBuffers[0][i*2] * inputBuffers[0][i*2] + | |
196 inputBuffers[0][i*2+1] * inputBuffers[0][i*2+1]); | |
197 if (mag > prevFrame[i]) flux += mag - prevFrame[i]; | |
198 prevFrame[i] = mag; | |
199 newFrame[freqMap[i]] += mag; | |
200 } | |
201 spectralFlux.push_back(flux); | |
202 frames.push_back(newFrame); | |
203 // for (int i = 0; i < freqMapSize; i++) | |
204 // [frameCount][i] = newFrame[i]; | |
205 /* | |
206 int index = cbIndex - (fftSize - hopSize); | |
207 if (index < 0) | |
208 index += fftSize; | |
209 int sz = (fftSize - hopSize) / energyOversampleFactor; | |
210 for (int j = 0; j < energyOversampleFactor; j++) { | |
211 double newEnergy = 0; | |
212 for (int i = 0; i < sz; i++) { | |
213 newEnergy += circBuffer[index] * circBuffer[index]; | |
214 if (++index == fftSize) | |
215 index = 0; | |
216 } | |
217 energy[frameCount * energyOversampleFactor + j] = | |
218 newEnergy / sz <= 1e-6? 0: log(newEnergy / sz) + 13.816; | |
219 }*/ | |
220 | |
221 double decay = frameCount >= 200? 0.99: | |
222 (frameCount < 100? 0: (frameCount - 100) / 100.0); | |
223 | |
224 //!!! uh-oh -- frameRMS has not been calculated (it came from time-domain signal) -- will always appear silent | |
225 | |
226 if (ltAverage == 0) | |
227 ltAverage = frameRMS; | |
228 else | |
229 ltAverage = ltAverage * decay + frameRMS * (1.0 - decay); | |
230 if (frameRMS <= silenceThreshold) | |
231 for (int i = 0; i < freqMapSize; i++) | |
232 frames[frameCount][i] = 0; | |
233 else { | |
234 if (normaliseMode == 1) | |
235 for (int i = 0; i < freqMapSize; i++) | |
236 frames[frameCount][i] /= frameRMS; | |
237 else if (normaliseMode == 2) | |
238 for (int i = 0; i < freqMapSize; i++) | |
239 frames[frameCount][i] /= ltAverage; | |
240 for (int i = 0; i < freqMapSize; i++) { | |
241 frames[frameCount][i] = log(frames[frameCount][i]) + rangeThreshold; | |
242 if (frames[frameCount][i] < 0) | |
243 frames[frameCount][i] = 0; | |
244 } | |
245 } | |
246 // weightedPhaseDeviation(); | |
247 // if (debug) | |
248 // System.err.printf("PhaseDev: t=%7.3f phDev=%7.3f RMS=%7.3f\n", | |
249 // frameCount * hopTime, | |
250 // phaseDeviation[frameCount], | |
251 // frameRMS); | |
252 frameCount++; | |
253 } // processFrame() | |
254 | |
255 /** Processes a complete file of audio data. */ | |
256 void processFile() { | |
257 /* | |
258 while (pcmInputStream != null) { | |
259 // Profile.start(0); | |
260 processFrame(); | |
261 // Profile.log(0); | |
262 if (Thread.currentThread().isInterrupted()) { | |
263 System.err.println("info: INTERRUPTED in processFile()"); | |
264 return; | |
265 } | |
266 } | |
267 */ | |
268 // double[] x1 = new double[phaseDeviation.length]; | |
269 // for (int i = 0; i < x1.length; i++) { | |
270 // x1[i] = i * hopTime; | |
271 // phaseDeviation[i] = (phaseDeviation[i] - 0.4) * 100; | |
272 // } | |
273 // double[] x2 = new double[energy.length]; | |
274 // for (int i = 0; i < x2.length; i++) | |
275 // x2[i] = i * hopTime / energyOversampleFactor; | |
276 // // plot.clear(); | |
277 // plot.addPlot(x1, phaseDeviation, Color.green, 7); | |
278 // plot.addPlot(x2, energy, Color.red, 7); | |
279 // plot.setTitle("Test phase deviation"); | |
280 // plot.fitAxes(); | |
281 | |
282 // double[] slope = new double[energy.length]; | |
283 // double hop = hopTime / energyOversampleFactor; | |
284 // Peaks.getSlope(energy, hop, 15, slope); | |
285 // vector<Integer> peaks = Peaks.findPeaks(slope, (int)lrint(0.06 / hop), 10); | |
286 | |
287 double hop = hopTime; | |
288 Peaks::normalise(spectralFlux); | |
289 vector<int> peaks = Peaks::findPeaks(spectralFlux, (int)lrint(0.06 / hop), 0.35, 0.84, true); | |
290 onsets.clear(); | |
291 onsets.resize(peaks.size(), 0); | |
292 vector<int>::iterator it = peaks.begin(); | |
293 onsetList.clear(); | |
294 double minSalience = Peaks::min(spectralFlux); | |
295 for (int i = 0; i < onsets.size(); i++) { | |
296 int index = *it; | |
297 ++it; | |
298 onsets[i] = index * hop; | |
299 Event e = BeatTracker::newBeat(onsets[i], 0); | |
300 // if (debug) | |
301 // System.err.printf("Onset: %8.3f %8.3f %8.3f\n", | |
302 // onsets[i], energy[index], slope[index]); | |
303 // e.salience = slope[index]; // or combination of energy + slope?? | |
304 // Note that salience must be non-negative or the beat tracking system fails! | |
305 e.salience = spectralFlux[index] - minSalience; | |
306 onsetList.push_back(e); | |
307 } | |
308 | |
309 //!!! This onsetList is then fed in to BeatTrackDisplay::beatTrack | |
310 | |
311 } // processFile() | |
312 | |
313 }; // class AudioProcessor | 184 }; // class AudioProcessor |
314 | 185 |
315 | 186 |
316 #endif | 187 #endif |