Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
Chris@35
|
33 NNLSBase(inputSampleRate)
|
matthiasm@0
|
34 {
|
Chris@35
|
35 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@0
|
36 }
|
matthiasm@0
|
37
|
Chris@35
|
38 Chordino::~Chordino()
|
matthiasm@0
|
39 {
|
Chris@35
|
40 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
41 }
|
matthiasm@0
|
42
|
matthiasm@0
|
43 string
|
Chris@35
|
44 Chordino::getIdentifier() const
|
matthiasm@0
|
45 {
|
Chris@23
|
46 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
47 return "chordino";
|
matthiasm@0
|
48 }
|
matthiasm@0
|
49
|
matthiasm@0
|
50 string
|
Chris@35
|
51 Chordino::getName() const
|
matthiasm@0
|
52 {
|
Chris@23
|
53 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
54 return "Chordino";
|
matthiasm@0
|
55 }
|
matthiasm@0
|
56
|
matthiasm@0
|
57 string
|
Chris@35
|
58 Chordino::getDescription() const
|
matthiasm@0
|
59 {
|
Chris@23
|
60 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
61 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
62 }
|
matthiasm@0
|
63
|
matthiasm@50
|
64 Chordino::ParameterList
|
matthiasm@50
|
65 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
66 {
|
matthiasm@50
|
67 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
68 ParameterList list;
|
matthiasm@50
|
69
|
matthiasm@50
|
70 ParameterDescriptor d;
|
matthiasm@50
|
71 d.identifier = "useNNLS";
|
matthiasm@50
|
72 d.name = "use approximate transcription (NNLS)";
|
matthiasm@50
|
73 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@50
|
74 d.unit = "";
|
matthiasm@50
|
75 d.minValue = 0.0;
|
matthiasm@50
|
76 d.maxValue = 1.0;
|
matthiasm@50
|
77 d.defaultValue = 1.0;
|
matthiasm@50
|
78 d.isQuantized = true;
|
matthiasm@50
|
79 d.quantizeStep = 1.0;
|
matthiasm@50
|
80 list.push_back(d);
|
matthiasm@50
|
81
|
matthiasm@50
|
82 ParameterDescriptor d4;
|
matthiasm@50
|
83 d4.identifier = "useHMM";
|
matthiasm@53
|
84 d4.name = "HMM (Viterbi decoding)";
|
matthiasm@50
|
85 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
matthiasm@50
|
86 d4.unit = "";
|
matthiasm@50
|
87 d4.minValue = 0.0;
|
matthiasm@50
|
88 d4.maxValue = 1.0;
|
matthiasm@50
|
89 d4.defaultValue = 1.0;
|
matthiasm@50
|
90 d4.isQuantized = true;
|
matthiasm@50
|
91 d4.quantizeStep = 1.0;
|
matthiasm@50
|
92 list.push_back(d4);
|
matthiasm@50
|
93
|
matthiasm@50
|
94 ParameterDescriptor d0;
|
matthiasm@50
|
95 d0.identifier = "rollon";
|
matthiasm@50
|
96 d0.name = "spectral roll-on";
|
matthiasm@58
|
97 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
98 d0.unit = "%";
|
matthiasm@50
|
99 d0.minValue = 0;
|
mail@76
|
100 d0.maxValue = 5;
|
matthiasm@50
|
101 d0.defaultValue = 0;
|
matthiasm@50
|
102 d0.isQuantized = true;
|
mail@76
|
103 d0.quantizeStep = 0.5;
|
matthiasm@50
|
104 list.push_back(d0);
|
matthiasm@50
|
105
|
matthiasm@50
|
106 ParameterDescriptor d1;
|
matthiasm@50
|
107 d1.identifier = "tuningmode";
|
matthiasm@50
|
108 d1.name = "tuning mode";
|
matthiasm@50
|
109 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@50
|
110 d1.unit = "";
|
matthiasm@50
|
111 d1.minValue = 0;
|
matthiasm@50
|
112 d1.maxValue = 1;
|
matthiasm@50
|
113 d1.defaultValue = 0;
|
matthiasm@50
|
114 d1.isQuantized = true;
|
matthiasm@50
|
115 d1.valueNames.push_back("global tuning");
|
matthiasm@50
|
116 d1.valueNames.push_back("local tuning");
|
matthiasm@50
|
117 d1.quantizeStep = 1.0;
|
matthiasm@50
|
118 list.push_back(d1);
|
matthiasm@50
|
119
|
matthiasm@50
|
120 ParameterDescriptor d2;
|
matthiasm@50
|
121 d2.identifier = "whitening";
|
matthiasm@50
|
122 d2.name = "spectral whitening";
|
matthiasm@50
|
123 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
matthiasm@50
|
124 d2.unit = "";
|
matthiasm@50
|
125 d2.isQuantized = true;
|
matthiasm@50
|
126 d2.minValue = 0.0;
|
matthiasm@50
|
127 d2.maxValue = 1.0;
|
matthiasm@50
|
128 d2.defaultValue = 1.0;
|
matthiasm@50
|
129 d2.isQuantized = false;
|
matthiasm@50
|
130 list.push_back(d2);
|
matthiasm@50
|
131
|
matthiasm@50
|
132 ParameterDescriptor d3;
|
matthiasm@50
|
133 d3.identifier = "s";
|
matthiasm@50
|
134 d3.name = "spectral shape";
|
matthiasm@50
|
135 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
matthiasm@50
|
136 d3.unit = "";
|
matthiasm@50
|
137 d3.minValue = 0.5;
|
matthiasm@50
|
138 d3.maxValue = 0.9;
|
matthiasm@50
|
139 d3.defaultValue = 0.7;
|
matthiasm@50
|
140 d3.isQuantized = false;
|
matthiasm@50
|
141 list.push_back(d3);
|
matthiasm@50
|
142
|
matthiasm@50
|
143 // ParameterDescriptor d4;
|
matthiasm@50
|
144 // d4.identifier = "chromanormalize";
|
matthiasm@50
|
145 // d4.name = "chroma normalization";
|
matthiasm@50
|
146 // d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@50
|
147 // d4.unit = "";
|
matthiasm@50
|
148 // d4.minValue = 0;
|
matthiasm@50
|
149 // d4.maxValue = 3;
|
matthiasm@50
|
150 // d4.defaultValue = 0;
|
matthiasm@50
|
151 // d4.isQuantized = true;
|
matthiasm@50
|
152 // d4.valueNames.push_back("none");
|
matthiasm@50
|
153 // d4.valueNames.push_back("maximum norm");
|
matthiasm@50
|
154 // d4.valueNames.push_back("L1 norm");
|
matthiasm@50
|
155 // d4.valueNames.push_back("L2 norm");
|
matthiasm@50
|
156 // d4.quantizeStep = 1.0;
|
matthiasm@50
|
157 // list.push_back(d4);
|
matthiasm@50
|
158
|
matthiasm@50
|
159 return list;
|
matthiasm@50
|
160 }
|
matthiasm@50
|
161
|
Chris@35
|
162 Chordino::OutputList
|
Chris@35
|
163 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
164 {
|
Chris@23
|
165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
166 OutputList list;
|
matthiasm@0
|
167
|
Chris@35
|
168 int index = 0;
|
matthiasm@0
|
169
|
matthiasm@0
|
170 OutputDescriptor d7;
|
matthiasm@0
|
171 d7.identifier = "simplechord";
|
Chris@36
|
172 d7.name = "Chord Estimate";
|
matthiasm@58
|
173 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
174 d7.unit = "";
|
matthiasm@0
|
175 d7.hasFixedBinCount = true;
|
matthiasm@0
|
176 d7.binCount = 0;
|
matthiasm@0
|
177 d7.hasKnownExtents = false;
|
matthiasm@0
|
178 d7.isQuantized = false;
|
matthiasm@0
|
179 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
180 d7.hasDuration = false;
|
matthiasm@0
|
181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
182 list.push_back(d7);
|
Chris@35
|
183 m_outputChords = index++;
|
matthiasm@0
|
184
|
Chris@23
|
185 OutputDescriptor d8;
|
mail@60
|
186 d8.identifier = "harmonicchange";
|
Chris@36
|
187 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
188 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
189 d8.unit = "";
|
matthiasm@17
|
190 d8.hasFixedBinCount = true;
|
matthiasm@17
|
191 d8.binCount = 1;
|
mail@60
|
192 d8.hasKnownExtents = false;
|
mail@60
|
193 // d8.minValue = 0.0;
|
mail@60
|
194 // d8.maxValue = 0.999;
|
matthiasm@17
|
195 d8.isQuantized = false;
|
matthiasm@17
|
196 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
197 d8.hasDuration = false;
|
matthiasm@17
|
198 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
199 list.push_back(d8);
|
Chris@35
|
200 m_outputHarmonicChange = index++;
|
matthiasm@1
|
201
|
matthiasm@0
|
202 return list;
|
matthiasm@0
|
203 }
|
matthiasm@0
|
204
|
matthiasm@0
|
205 bool
|
Chris@35
|
206 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
207 {
|
Chris@23
|
208 if (debug_on) {
|
Chris@23
|
209 cerr << "--> initialise";
|
Chris@23
|
210 }
|
mail@76
|
211
|
Chris@35
|
212 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
213 return false;
|
Chris@35
|
214 }
|
matthiasm@1
|
215
|
matthiasm@0
|
216 return true;
|
matthiasm@0
|
217 }
|
matthiasm@0
|
218
|
matthiasm@0
|
219 void
|
Chris@35
|
220 Chordino::reset()
|
matthiasm@0
|
221 {
|
Chris@23
|
222 if (debug_on) cerr << "--> reset";
|
Chris@35
|
223 NNLSBase::reset();
|
matthiasm@0
|
224 }
|
matthiasm@0
|
225
|
Chris@35
|
226 Chordino::FeatureSet
|
Chris@35
|
227 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
228 {
|
Chris@23
|
229 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
230
|
Chris@35
|
231 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
232
|
Chris@35
|
233 return FeatureSet();
|
matthiasm@0
|
234 }
|
matthiasm@0
|
235
|
Chris@35
|
236 Chordino::FeatureSet
|
Chris@35
|
237 Chordino::getRemainingFeatures()
|
matthiasm@0
|
238 {
|
mail@76
|
239 cerr << hw[0] << hw[1] << endl;
|
Chris@23
|
240 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
241 FeatureSet fsOut;
|
Chris@35
|
242 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
243 int nChord = m_chordnames.size();
|
Chris@23
|
244 //
|
Chris@23
|
245 /** Calculate Tuning
|
Chris@23
|
246 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
247 cumulative mean real and imag values)
|
Chris@23
|
248 **/
|
mail@80
|
249 float meanTuningImag = 0;
|
mail@80
|
250 float meanTuningReal = 0;
|
mail@80
|
251 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
252 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
253 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
254 }
|
Chris@23
|
255 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
256 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
257 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
258 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
259
|
Chris@23
|
260 char buffer0 [50];
|
matthiasm@1
|
261
|
Chris@23
|
262 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
263
|
matthiasm@1
|
264
|
Chris@23
|
265 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
266 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
matthiasm@43
|
267 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum).
|
Chris@23
|
268 **/
|
Chris@35
|
269 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
270
|
Chris@23
|
271 float tempValue = 0;
|
Chris@23
|
272 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
273 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
274 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
275 int count = 0;
|
matthiasm@1
|
276
|
Chris@35
|
277 FeatureList tunedSpec;
|
matthiasm@43
|
278 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
279
|
matthiasm@43
|
280 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
281
|
Chris@35
|
282 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
matthiasm@43
|
283 Feature currentLogSpectum = *i;
|
matthiasm@43
|
284 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
285 currentTunedSpec.hasTimestamp = true;
|
matthiasm@43
|
286 currentTunedSpec.timestamp = currentLogSpectum.timestamp;
|
matthiasm@43
|
287 timestamps.push_back(currentLogSpectum.timestamp);
|
matthiasm@43
|
288 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
289
|
Chris@23
|
290 if (m_tuneLocal) {
|
Chris@23
|
291 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
292 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
293 }
|
matthiasm@1
|
294
|
mail@80
|
295 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
296
|
matthiasm@43
|
297 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@80
|
298 tempValue = currentLogSpectum.values[k + intShift] * (1-floatShift) + currentLogSpectum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
299 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
300 }
|
matthiasm@1
|
301
|
matthiasm@43
|
302 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
303 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
304 vector<float> runningstd;
|
mail@77
|
305 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
306 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
307 }
|
Chris@23
|
308 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
309 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
310 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
311 if (runningstd[i] > 0) {
|
matthiasm@43
|
312 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
313 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
314 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
315 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
316 }
|
matthiasm@43
|
317 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
318 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
319 }
|
Chris@23
|
320 }
|
matthiasm@43
|
321 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
322 count++;
|
Chris@23
|
323 }
|
Chris@23
|
324 cerr << "done." << endl;
|
matthiasm@1
|
325
|
Chris@23
|
326 /** Semitone spectrum and chromagrams
|
Chris@23
|
327 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
328 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
329 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
330 bass and treble stacked onto each other).
|
Chris@23
|
331 **/
|
matthiasm@42
|
332 if (m_useNNLS == 0) {
|
Chris@35
|
333 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
334 } else {
|
Chris@35
|
335 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
336 }
|
matthiasm@13
|
337
|
matthiasm@1
|
338
|
matthiasm@43
|
339 vector<vector<double> > chordogram;
|
Chris@23
|
340 vector<vector<int> > scoreChordogram;
|
Chris@35
|
341 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
342 count = 0;
|
matthiasm@9
|
343
|
Chris@35
|
344 FeatureList chromaList;
|
matthiasm@43
|
345
|
matthiasm@43
|
346
|
Chris@35
|
347
|
Chris@35
|
348 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
349 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
350 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
351
|
matthiasm@43
|
352 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
353 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
354
|
mail@77
|
355 float b[nNote];
|
matthiasm@1
|
356
|
Chris@23
|
357 bool some_b_greater_zero = false;
|
Chris@23
|
358 float sumb = 0;
|
mail@77
|
359 for (int i = 0; i < nNote; i++) {
|
mail@77
|
360 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
361 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
362 sumb += b[i];
|
Chris@23
|
363 if (b[i] > 0) {
|
Chris@23
|
364 some_b_greater_zero = true;
|
Chris@23
|
365 }
|
Chris@23
|
366 }
|
matthiasm@1
|
367
|
Chris@23
|
368 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
369
|
Chris@23
|
370 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
371 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
372 float currval;
|
Chris@23
|
373 unsigned iSemitone = 0;
|
matthiasm@1
|
374
|
Chris@23
|
375 if (some_b_greater_zero) {
|
matthiasm@42
|
376 if (m_useNNLS == 0) {
|
Chris@23
|
377 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
378 currval = 0;
|
Chris@35
|
379 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@35
|
380 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@35
|
381 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
382 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
383 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
384 iSemitone++;
|
Chris@23
|
385 }
|
matthiasm@1
|
386
|
Chris@23
|
387 } else {
|
Chris@35
|
388 float x[84+1000];
|
Chris@23
|
389 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
390 vector<int> signifIndex;
|
Chris@23
|
391 int index=0;
|
Chris@23
|
392 sumb /= 84.0;
|
Chris@23
|
393 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
394 float currval = 0;
|
Chris@23
|
395 currval += b[iNote + 1 + -1];
|
Chris@23
|
396 currval += b[iNote + 1 + 0];
|
Chris@23
|
397 currval += b[iNote + 1 + 1];
|
Chris@23
|
398 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
399 index++;
|
Chris@23
|
400 }
|
Chris@35
|
401 float rnorm;
|
Chris@35
|
402 float w[84+1000];
|
Chris@35
|
403 float zz[84+1000];
|
Chris@23
|
404 int indx[84+1000];
|
Chris@23
|
405 int mode;
|
mail@77
|
406 int dictsize = nNote*signifIndex.size();
|
Chris@35
|
407 float *curr_dict = new float[dictsize];
|
Chris@23
|
408 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@77
|
409 for (unsigned iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
410 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
411 }
|
Chris@23
|
412 }
|
Chris@35
|
413 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
414 delete [] curr_dict;
|
Chris@23
|
415 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
416 // cerr << mode << endl;
|
Chris@23
|
417 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
418 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
419 }
|
Chris@23
|
420 }
|
Chris@23
|
421 }
|
Chris@35
|
422
|
Chris@35
|
423 vector<float> origchroma = chroma;
|
Chris@23
|
424 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
425 currentChromas.values = chroma;
|
Chris@35
|
426
|
Chris@23
|
427 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
428 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
429 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
430 case 0: // should never end up here
|
Chris@23
|
431 break;
|
Chris@23
|
432 case 1:
|
Chris@35
|
433 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
434 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
435 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
436 break;
|
Chris@23
|
437 case 2:
|
Chris@35
|
438 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
439 chromanorm[2] += *it;
|
Chris@23
|
440 }
|
Chris@23
|
441 break;
|
Chris@23
|
442 case 3:
|
Chris@35
|
443 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
444 chromanorm[2] += pow(*it,2);
|
Chris@23
|
445 }
|
Chris@23
|
446 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
447 break;
|
Chris@23
|
448 }
|
Chris@23
|
449 if (chromanorm[2] > 0) {
|
Chris@35
|
450 for (int i = 0; i < chroma.size(); i++) {
|
matthiasm@43
|
451 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
452 }
|
Chris@23
|
453 }
|
Chris@23
|
454 }
|
Chris@35
|
455
|
matthiasm@43
|
456 chromaList.push_back(currentChromas);
|
Chris@35
|
457
|
Chris@23
|
458 // local chord estimation
|
matthiasm@43
|
459 vector<double> currentChordSalience;
|
matthiasm@43
|
460 double tempchordvalue = 0;
|
matthiasm@43
|
461 double sumchordvalue = 0;
|
matthiasm@9
|
462
|
Chris@23
|
463 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
464 tempchordvalue = 0;
|
Chris@23
|
465 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
466 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
467 }
|
Chris@23
|
468 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
469 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
470 }
|
matthiasm@48
|
471 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
472 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
473 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
474 sumchordvalue+=tempchordvalue;
|
Chris@23
|
475 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
476 }
|
Chris@23
|
477 if (sumchordvalue > 0) {
|
Chris@23
|
478 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
479 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
480 }
|
Chris@23
|
481 } else {
|
Chris@23
|
482 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
483 }
|
Chris@23
|
484 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
485
|
Chris@23
|
486 count++;
|
Chris@23
|
487 }
|
Chris@23
|
488 cerr << "done." << endl;
|
matthiasm@13
|
489
|
matthiasm@10
|
490
|
matthiasm@50
|
491 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
492 if (m_useHMM == 1.0) {
|
matthiasm@44
|
493 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
494 int oldchord = nChord-1;
|
matthiasm@48
|
495 double selftransprob = 0.99;
|
matthiasm@43
|
496
|
matthiasm@48
|
497 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
498 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
499
|
matthiasm@50
|
500 double *delta;
|
matthiasm@50
|
501 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
502
|
matthiasm@43
|
503 vector<vector<double> > trans;
|
matthiasm@43
|
504 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
505 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
506 temp[iChord] = selftransprob;
|
matthiasm@43
|
507 trans.push_back(temp);
|
matthiasm@43
|
508 }
|
matthiasm@50
|
509 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta);
|
matthiasm@48
|
510
|
matthiasm@48
|
511
|
matthiasm@48
|
512 Feature chord_feature; // chord estimate
|
matthiasm@48
|
513 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
514 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
515 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
516 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
517
|
mail@60
|
518 chordchange[0] = 0;
|
matthiasm@50
|
519 for (int iFrame = 1; iFrame < chordpath.size(); ++iFrame) {
|
matthiasm@43
|
520 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
521 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@43
|
522 Feature chord_feature; // chord estimate
|
matthiasm@43
|
523 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
524 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
525 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
526 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
527 oldchord = chordpath[iFrame];
|
Chris@23
|
528 }
|
matthiasm@50
|
529 /* calculating simple chord change prob */
|
matthiasm@50
|
530 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
531 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
532 }
|
Chris@23
|
533 }
|
matthiasm@43
|
534
|
matthiasm@43
|
535 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
536 } else {
|
matthiasm@43
|
537 /* Simple chord estimation
|
matthiasm@43
|
538 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
539 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
540 */
|
matthiasm@44
|
541 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
542 count = 0;
|
matthiasm@43
|
543 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
544 vector<int> chordSequence;
|
matthiasm@43
|
545 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
546 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
547 scoreChordogram.push_back(temp);
|
matthiasm@43
|
548 }
|
matthiasm@43
|
549 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
550 int startIndex = count + 1;
|
matthiasm@43
|
551 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
552
|
matthiasm@43
|
553 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
554
|
matthiasm@43
|
555 vector<int> chordCandidates;
|
matthiasm@43
|
556 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
matthiasm@43
|
557 // float currsum = 0;
|
matthiasm@43
|
558 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
559 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
560 // }
|
matthiasm@43
|
561 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
matthiasm@43
|
562 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
563 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
564 chordCandidates.push_back(iChord);
|
matthiasm@43
|
565 break;
|
matthiasm@43
|
566 }
|
Chris@23
|
567 }
|
Chris@23
|
568 }
|
matthiasm@43
|
569 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
570 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
571
|
matthiasm@43
|
572 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
573 float maxindex = 0; //... and the index thereof
|
matthiasm@43
|
574 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
matthiasm@43
|
575 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
576
|
matthiasm@43
|
577 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
578 // now find the max values on both sides of iWF
|
matthiasm@43
|
579 // left side:
|
matthiasm@43
|
580 float maxL = 0;
|
matthiasm@43
|
581 unsigned maxindL = nChord-1;
|
matthiasm@43
|
582 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@43
|
583 unsigned iChord = chordCandidates[kChord];
|
matthiasm@43
|
584 float currsum = 0;
|
matthiasm@43
|
585 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
586 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
587 }
|
matthiasm@43
|
588 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
589 if (currsum > maxL) {
|
matthiasm@43
|
590 maxL = currsum;
|
matthiasm@43
|
591 maxindL = iChord;
|
matthiasm@43
|
592 }
|
matthiasm@43
|
593 }
|
matthiasm@43
|
594 // right side:
|
matthiasm@43
|
595 float maxR = 0;
|
matthiasm@43
|
596 unsigned maxindR = nChord-1;
|
matthiasm@43
|
597 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@43
|
598 unsigned iChord = chordCandidates[kChord];
|
matthiasm@43
|
599 float currsum = 0;
|
matthiasm@43
|
600 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
601 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
602 }
|
matthiasm@43
|
603 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
604 if (currsum > maxR) {
|
matthiasm@43
|
605 maxR = currsum;
|
matthiasm@43
|
606 maxindR = iChord;
|
matthiasm@43
|
607 }
|
matthiasm@43
|
608 }
|
matthiasm@43
|
609 if (maxL+maxR > maxval) {
|
matthiasm@43
|
610 maxval = maxL+maxR;
|
matthiasm@43
|
611 maxindex = iWF;
|
matthiasm@43
|
612 bestchordL = maxindL;
|
matthiasm@43
|
613 bestchordR = maxindR;
|
matthiasm@43
|
614 }
|
matthiasm@43
|
615
|
Chris@23
|
616 }
|
matthiasm@43
|
617 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
618 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@43
|
619 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
620 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
621 }
|
matthiasm@43
|
622 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
623 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
624 }
|
matthiasm@50
|
625 if (bestchordL != bestchordR) {
|
matthiasm@50
|
626 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
627 }
|
matthiasm@43
|
628 count++;
|
Chris@23
|
629 }
|
matthiasm@43
|
630 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
631 count = 0;
|
matthiasm@43
|
632 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
633 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
634 float maxindex = 0; //... and the index thereof
|
matthiasm@43
|
635 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
636 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
637 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
638 maxindex = iChord;
|
matthiasm@43
|
639 // cerr << iChord << endl;
|
matthiasm@43
|
640 }
|
matthiasm@43
|
641 }
|
matthiasm@43
|
642 chordSequence.push_back(maxindex);
|
matthiasm@43
|
643 count++;
|
Chris@23
|
644 }
|
matthiasm@43
|
645
|
matthiasm@43
|
646
|
matthiasm@43
|
647 // mode filter on chordSequence
|
matthiasm@43
|
648 count = 0;
|
matthiasm@43
|
649 string oldChord = "";
|
matthiasm@43
|
650 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
651 Feature chord_feature; // chord estimate
|
matthiasm@43
|
652 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
653 chord_feature.timestamp = *it;
|
matthiasm@43
|
654 // Feature currentChord; // chord estimate
|
matthiasm@43
|
655 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
656 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
657
|
matthiasm@43
|
658 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
659 int maxChordCount = 0;
|
matthiasm@43
|
660 int maxChordIndex = nChord-1;
|
matthiasm@43
|
661 string maxChord;
|
matthiasm@43
|
662 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
663 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
664 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
665 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
666 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
667 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
668 maxChordCount++;
|
matthiasm@43
|
669 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
670 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
671 }
|
matthiasm@43
|
672 }
|
matthiasm@43
|
673 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
674 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
675 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
676 if (oldChord != maxChord) {
|
matthiasm@43
|
677 oldChord = maxChord;
|
matthiasm@43
|
678 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
679 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
680 }
|
matthiasm@43
|
681 count++;
|
Chris@23
|
682 }
|
Chris@23
|
683 }
|
matthiasm@43
|
684 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
685 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
686 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
687 chord_feature.label = "N";
|
mail@60
|
688 fsOut[m_outputChords].push_back(chord_feature);
|
Chris@23
|
689 cerr << "done." << endl;
|
matthiasm@50
|
690
|
matthiasm@50
|
691 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
692 Feature chordchange_feature;
|
matthiasm@50
|
693 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
694 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
695 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
696 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
697 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
698 }
|
matthiasm@50
|
699
|
mail@60
|
700 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
701
|
matthiasm@50
|
702
|
Chris@23
|
703 return fsOut;
|
matthiasm@0
|
704 }
|