Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
Chris@35
|
33 NNLSBase(inputSampleRate)
|
matthiasm@0
|
34 {
|
Chris@35
|
35 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@0
|
36 }
|
matthiasm@0
|
37
|
Chris@35
|
38 Chordino::~Chordino()
|
matthiasm@0
|
39 {
|
Chris@35
|
40 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
41 }
|
matthiasm@0
|
42
|
matthiasm@0
|
43 string
|
Chris@35
|
44 Chordino::getIdentifier() const
|
matthiasm@0
|
45 {
|
Chris@23
|
46 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
47 return "chordino";
|
matthiasm@0
|
48 }
|
matthiasm@0
|
49
|
matthiasm@0
|
50 string
|
Chris@35
|
51 Chordino::getName() const
|
matthiasm@0
|
52 {
|
Chris@23
|
53 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
54 return "Chordino";
|
matthiasm@0
|
55 }
|
matthiasm@0
|
56
|
matthiasm@0
|
57 string
|
Chris@35
|
58 Chordino::getDescription() const
|
matthiasm@0
|
59 {
|
Chris@23
|
60 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
61 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
62 }
|
matthiasm@0
|
63
|
matthiasm@50
|
64 Chordino::ParameterList
|
matthiasm@50
|
65 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
66 {
|
matthiasm@50
|
67 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
68 ParameterList list;
|
matthiasm@50
|
69
|
matthiasm@50
|
70 ParameterDescriptor d;
|
matthiasm@50
|
71 d.identifier = "useNNLS";
|
matthiasm@50
|
72 d.name = "use approximate transcription (NNLS)";
|
matthiasm@50
|
73 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@50
|
74 d.unit = "";
|
matthiasm@50
|
75 d.minValue = 0.0;
|
matthiasm@50
|
76 d.maxValue = 1.0;
|
matthiasm@50
|
77 d.defaultValue = 1.0;
|
matthiasm@50
|
78 d.isQuantized = true;
|
matthiasm@50
|
79 d.quantizeStep = 1.0;
|
matthiasm@50
|
80 list.push_back(d);
|
matthiasm@50
|
81
|
matthiasm@50
|
82 ParameterDescriptor d4;
|
matthiasm@50
|
83 d4.identifier = "useHMM";
|
matthiasm@53
|
84 d4.name = "HMM (Viterbi decoding)";
|
matthiasm@50
|
85 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
matthiasm@50
|
86 d4.unit = "";
|
matthiasm@50
|
87 d4.minValue = 0.0;
|
matthiasm@50
|
88 d4.maxValue = 1.0;
|
matthiasm@50
|
89 d4.defaultValue = 1.0;
|
matthiasm@50
|
90 d4.isQuantized = true;
|
matthiasm@50
|
91 d4.quantizeStep = 1.0;
|
matthiasm@50
|
92 list.push_back(d4);
|
matthiasm@50
|
93
|
matthiasm@50
|
94 ParameterDescriptor d0;
|
matthiasm@50
|
95 d0.identifier = "rollon";
|
matthiasm@50
|
96 d0.name = "spectral roll-on";
|
matthiasm@58
|
97 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
98 d0.unit = "%";
|
matthiasm@50
|
99 d0.minValue = 0;
|
mail@76
|
100 d0.maxValue = 5;
|
matthiasm@50
|
101 d0.defaultValue = 0;
|
matthiasm@50
|
102 d0.isQuantized = true;
|
mail@76
|
103 d0.quantizeStep = 0.5;
|
matthiasm@50
|
104 list.push_back(d0);
|
matthiasm@50
|
105
|
matthiasm@50
|
106 ParameterDescriptor d1;
|
matthiasm@50
|
107 d1.identifier = "tuningmode";
|
matthiasm@50
|
108 d1.name = "tuning mode";
|
matthiasm@50
|
109 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@50
|
110 d1.unit = "";
|
matthiasm@50
|
111 d1.minValue = 0;
|
matthiasm@50
|
112 d1.maxValue = 1;
|
matthiasm@50
|
113 d1.defaultValue = 0;
|
matthiasm@50
|
114 d1.isQuantized = true;
|
matthiasm@50
|
115 d1.valueNames.push_back("global tuning");
|
matthiasm@50
|
116 d1.valueNames.push_back("local tuning");
|
matthiasm@50
|
117 d1.quantizeStep = 1.0;
|
matthiasm@50
|
118 list.push_back(d1);
|
matthiasm@50
|
119
|
matthiasm@50
|
120 ParameterDescriptor d2;
|
matthiasm@50
|
121 d2.identifier = "whitening";
|
matthiasm@50
|
122 d2.name = "spectral whitening";
|
matthiasm@50
|
123 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
matthiasm@50
|
124 d2.unit = "";
|
matthiasm@50
|
125 d2.isQuantized = true;
|
matthiasm@50
|
126 d2.minValue = 0.0;
|
matthiasm@50
|
127 d2.maxValue = 1.0;
|
matthiasm@50
|
128 d2.defaultValue = 1.0;
|
matthiasm@50
|
129 d2.isQuantized = false;
|
matthiasm@50
|
130 list.push_back(d2);
|
matthiasm@50
|
131
|
matthiasm@50
|
132 ParameterDescriptor d3;
|
matthiasm@50
|
133 d3.identifier = "s";
|
matthiasm@50
|
134 d3.name = "spectral shape";
|
matthiasm@50
|
135 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
matthiasm@50
|
136 d3.unit = "";
|
matthiasm@50
|
137 d3.minValue = 0.5;
|
matthiasm@50
|
138 d3.maxValue = 0.9;
|
matthiasm@50
|
139 d3.defaultValue = 0.7;
|
matthiasm@50
|
140 d3.isQuantized = false;
|
matthiasm@50
|
141 list.push_back(d3);
|
matthiasm@50
|
142
|
matthiasm@50
|
143 // ParameterDescriptor d4;
|
matthiasm@50
|
144 // d4.identifier = "chromanormalize";
|
matthiasm@50
|
145 // d4.name = "chroma normalization";
|
matthiasm@50
|
146 // d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@50
|
147 // d4.unit = "";
|
matthiasm@50
|
148 // d4.minValue = 0;
|
matthiasm@50
|
149 // d4.maxValue = 3;
|
matthiasm@50
|
150 // d4.defaultValue = 0;
|
matthiasm@50
|
151 // d4.isQuantized = true;
|
matthiasm@50
|
152 // d4.valueNames.push_back("none");
|
matthiasm@50
|
153 // d4.valueNames.push_back("maximum norm");
|
matthiasm@50
|
154 // d4.valueNames.push_back("L1 norm");
|
matthiasm@50
|
155 // d4.valueNames.push_back("L2 norm");
|
matthiasm@50
|
156 // d4.quantizeStep = 1.0;
|
matthiasm@50
|
157 // list.push_back(d4);
|
matthiasm@50
|
158
|
matthiasm@50
|
159 return list;
|
matthiasm@50
|
160 }
|
matthiasm@50
|
161
|
Chris@35
|
162 Chordino::OutputList
|
Chris@35
|
163 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
164 {
|
Chris@23
|
165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
166 OutputList list;
|
matthiasm@0
|
167
|
Chris@35
|
168 int index = 0;
|
matthiasm@0
|
169
|
matthiasm@0
|
170 OutputDescriptor d7;
|
matthiasm@0
|
171 d7.identifier = "simplechord";
|
Chris@36
|
172 d7.name = "Chord Estimate";
|
matthiasm@58
|
173 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
174 d7.unit = "";
|
matthiasm@0
|
175 d7.hasFixedBinCount = true;
|
matthiasm@0
|
176 d7.binCount = 0;
|
matthiasm@0
|
177 d7.hasKnownExtents = false;
|
matthiasm@0
|
178 d7.isQuantized = false;
|
matthiasm@0
|
179 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
180 d7.hasDuration = false;
|
matthiasm@0
|
181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
182 list.push_back(d7);
|
Chris@35
|
183 m_outputChords = index++;
|
matthiasm@0
|
184
|
Chris@23
|
185 OutputDescriptor d8;
|
mail@60
|
186 d8.identifier = "harmonicchange";
|
Chris@36
|
187 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
188 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
189 d8.unit = "";
|
matthiasm@17
|
190 d8.hasFixedBinCount = true;
|
matthiasm@17
|
191 d8.binCount = 1;
|
mail@60
|
192 d8.hasKnownExtents = false;
|
mail@60
|
193 // d8.minValue = 0.0;
|
mail@60
|
194 // d8.maxValue = 0.999;
|
matthiasm@17
|
195 d8.isQuantized = false;
|
matthiasm@17
|
196 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
197 d8.hasDuration = false;
|
matthiasm@17
|
198 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
199 list.push_back(d8);
|
Chris@35
|
200 m_outputHarmonicChange = index++;
|
matthiasm@1
|
201
|
matthiasm@0
|
202 return list;
|
matthiasm@0
|
203 }
|
matthiasm@0
|
204
|
matthiasm@0
|
205 bool
|
Chris@35
|
206 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
207 {
|
Chris@23
|
208 if (debug_on) {
|
Chris@23
|
209 cerr << "--> initialise";
|
Chris@23
|
210 }
|
mail@76
|
211
|
Chris@35
|
212 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
213 return false;
|
Chris@35
|
214 }
|
matthiasm@1
|
215
|
matthiasm@0
|
216 return true;
|
matthiasm@0
|
217 }
|
matthiasm@0
|
218
|
matthiasm@0
|
219 void
|
Chris@35
|
220 Chordino::reset()
|
matthiasm@0
|
221 {
|
Chris@23
|
222 if (debug_on) cerr << "--> reset";
|
Chris@35
|
223 NNLSBase::reset();
|
matthiasm@0
|
224 }
|
matthiasm@0
|
225
|
Chris@35
|
226 Chordino::FeatureSet
|
Chris@35
|
227 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
228 {
|
Chris@23
|
229 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
230
|
Chris@35
|
231 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
232
|
Chris@35
|
233 return FeatureSet();
|
matthiasm@0
|
234 }
|
matthiasm@0
|
235
|
Chris@35
|
236 Chordino::FeatureSet
|
Chris@35
|
237 Chordino::getRemainingFeatures()
|
matthiasm@0
|
238 {
|
mail@76
|
239 cerr << hw[0] << hw[1] << endl;
|
Chris@23
|
240 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
241 FeatureSet fsOut;
|
Chris@35
|
242 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
243 int nChord = m_chordnames.size();
|
Chris@23
|
244 //
|
Chris@23
|
245 /** Calculate Tuning
|
Chris@23
|
246 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
247 cumulative mean real and imag values)
|
Chris@23
|
248 **/
|
Chris@23
|
249 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
|
Chris@23
|
250 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
|
Chris@23
|
251 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
252 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
253 int intShift = floor(normalisedtuning * 3);
|
Chris@23
|
254 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
|
matthiasm@1
|
255
|
Chris@23
|
256 char buffer0 [50];
|
matthiasm@1
|
257
|
Chris@23
|
258 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
259
|
matthiasm@1
|
260
|
Chris@23
|
261 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
262 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
matthiasm@43
|
263 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum).
|
Chris@23
|
264 **/
|
Chris@35
|
265 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
266
|
Chris@23
|
267 float tempValue = 0;
|
Chris@23
|
268 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
269 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
270 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
271 int count = 0;
|
matthiasm@1
|
272
|
Chris@35
|
273 FeatureList tunedSpec;
|
matthiasm@43
|
274 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
275
|
matthiasm@43
|
276 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
277
|
Chris@35
|
278 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
matthiasm@43
|
279 Feature currentLogSpectum = *i;
|
matthiasm@43
|
280 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
281 currentTunedSpec.hasTimestamp = true;
|
matthiasm@43
|
282 currentTunedSpec.timestamp = currentLogSpectum.timestamp;
|
matthiasm@43
|
283 timestamps.push_back(currentLogSpectum.timestamp);
|
matthiasm@43
|
284 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
285
|
Chris@23
|
286 if (m_tuneLocal) {
|
Chris@23
|
287 intShift = floor(m_localTuning[count] * 3);
|
Chris@23
|
288 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
|
Chris@23
|
289 }
|
matthiasm@1
|
290
|
Chris@23
|
291 // cerr << intShift << " " << intFactor << endl;
|
matthiasm@1
|
292
|
matthiasm@43
|
293 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
|
matthiasm@43
|
294 tempValue = currentLogSpectum.values[k + intShift] * (1-intFactor) + currentLogSpectum.values[k+intShift+1] * intFactor;
|
matthiasm@43
|
295 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
296 }
|
matthiasm@1
|
297
|
matthiasm@43
|
298 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
299 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
300 vector<float> runningstd;
|
mail@77
|
301 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
302 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
303 }
|
Chris@23
|
304 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
305 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
306 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
307 if (runningstd[i] > 0) {
|
matthiasm@43
|
308 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
309 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
310 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
311 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
312 }
|
matthiasm@43
|
313 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
314 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
315 }
|
Chris@23
|
316 }
|
matthiasm@43
|
317 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
318 count++;
|
Chris@23
|
319 }
|
Chris@23
|
320 cerr << "done." << endl;
|
matthiasm@1
|
321
|
Chris@23
|
322 /** Semitone spectrum and chromagrams
|
Chris@23
|
323 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
324 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
325 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
326 bass and treble stacked onto each other).
|
Chris@23
|
327 **/
|
matthiasm@42
|
328 if (m_useNNLS == 0) {
|
Chris@35
|
329 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
330 } else {
|
Chris@35
|
331 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
332 }
|
matthiasm@13
|
333
|
matthiasm@1
|
334
|
matthiasm@43
|
335 vector<vector<double> > chordogram;
|
Chris@23
|
336 vector<vector<int> > scoreChordogram;
|
Chris@35
|
337 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
338 count = 0;
|
matthiasm@9
|
339
|
Chris@35
|
340 FeatureList chromaList;
|
matthiasm@43
|
341
|
matthiasm@43
|
342
|
Chris@35
|
343
|
Chris@35
|
344 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
345 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
346 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
347
|
matthiasm@43
|
348 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
349 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
350
|
mail@77
|
351 float b[nNote];
|
matthiasm@1
|
352
|
Chris@23
|
353 bool some_b_greater_zero = false;
|
Chris@23
|
354 float sumb = 0;
|
mail@77
|
355 for (int i = 0; i < nNote; i++) {
|
mail@77
|
356 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
357 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
358 sumb += b[i];
|
Chris@23
|
359 if (b[i] > 0) {
|
Chris@23
|
360 some_b_greater_zero = true;
|
Chris@23
|
361 }
|
Chris@23
|
362 }
|
matthiasm@1
|
363
|
Chris@23
|
364 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
365
|
Chris@23
|
366 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
367 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
368 float currval;
|
Chris@23
|
369 unsigned iSemitone = 0;
|
matthiasm@1
|
370
|
Chris@23
|
371 if (some_b_greater_zero) {
|
matthiasm@42
|
372 if (m_useNNLS == 0) {
|
Chris@23
|
373 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
374 currval = 0;
|
Chris@35
|
375 currval += b[iNote + 1 + -1] * 0.5;
|
Chris@35
|
376 currval += b[iNote + 1 + 0] * 1.0;
|
Chris@35
|
377 currval += b[iNote + 1 + 1] * 0.5;
|
Chris@23
|
378 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
379 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
380 iSemitone++;
|
Chris@23
|
381 }
|
matthiasm@1
|
382
|
Chris@23
|
383 } else {
|
Chris@35
|
384 float x[84+1000];
|
Chris@23
|
385 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
386 vector<int> signifIndex;
|
Chris@23
|
387 int index=0;
|
Chris@23
|
388 sumb /= 84.0;
|
Chris@23
|
389 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
|
Chris@23
|
390 float currval = 0;
|
Chris@23
|
391 currval += b[iNote + 1 + -1];
|
Chris@23
|
392 currval += b[iNote + 1 + 0];
|
Chris@23
|
393 currval += b[iNote + 1 + 1];
|
Chris@23
|
394 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
395 index++;
|
Chris@23
|
396 }
|
Chris@35
|
397 float rnorm;
|
Chris@35
|
398 float w[84+1000];
|
Chris@35
|
399 float zz[84+1000];
|
Chris@23
|
400 int indx[84+1000];
|
Chris@23
|
401 int mode;
|
mail@77
|
402 int dictsize = nNote*signifIndex.size();
|
Chris@35
|
403 float *curr_dict = new float[dictsize];
|
Chris@23
|
404 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@77
|
405 for (unsigned iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
406 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
407 }
|
Chris@23
|
408 }
|
Chris@35
|
409 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
410 delete [] curr_dict;
|
Chris@23
|
411 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
412 // cerr << mode << endl;
|
Chris@23
|
413 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
414 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
415 }
|
Chris@23
|
416 }
|
Chris@23
|
417 }
|
Chris@35
|
418
|
Chris@35
|
419 vector<float> origchroma = chroma;
|
Chris@23
|
420 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
421 currentChromas.values = chroma;
|
Chris@35
|
422
|
Chris@23
|
423 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
424 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
425 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
426 case 0: // should never end up here
|
Chris@23
|
427 break;
|
Chris@23
|
428 case 1:
|
Chris@35
|
429 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
430 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
431 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
432 break;
|
Chris@23
|
433 case 2:
|
Chris@35
|
434 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
435 chromanorm[2] += *it;
|
Chris@23
|
436 }
|
Chris@23
|
437 break;
|
Chris@23
|
438 case 3:
|
Chris@35
|
439 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
440 chromanorm[2] += pow(*it,2);
|
Chris@23
|
441 }
|
Chris@23
|
442 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
443 break;
|
Chris@23
|
444 }
|
Chris@23
|
445 if (chromanorm[2] > 0) {
|
Chris@35
|
446 for (int i = 0; i < chroma.size(); i++) {
|
matthiasm@43
|
447 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
448 }
|
Chris@23
|
449 }
|
Chris@23
|
450 }
|
Chris@35
|
451
|
matthiasm@43
|
452 chromaList.push_back(currentChromas);
|
Chris@35
|
453
|
Chris@23
|
454 // local chord estimation
|
matthiasm@43
|
455 vector<double> currentChordSalience;
|
matthiasm@43
|
456 double tempchordvalue = 0;
|
matthiasm@43
|
457 double sumchordvalue = 0;
|
matthiasm@9
|
458
|
Chris@23
|
459 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
460 tempchordvalue = 0;
|
Chris@23
|
461 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
462 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
463 }
|
Chris@23
|
464 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
465 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
466 }
|
matthiasm@48
|
467 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
468 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
469 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
470 sumchordvalue+=tempchordvalue;
|
Chris@23
|
471 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
472 }
|
Chris@23
|
473 if (sumchordvalue > 0) {
|
Chris@23
|
474 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
475 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
476 }
|
Chris@23
|
477 } else {
|
Chris@23
|
478 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
479 }
|
Chris@23
|
480 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
481
|
Chris@23
|
482 count++;
|
Chris@23
|
483 }
|
Chris@23
|
484 cerr << "done." << endl;
|
matthiasm@13
|
485
|
matthiasm@10
|
486
|
matthiasm@50
|
487 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
488 if (m_useHMM == 1.0) {
|
matthiasm@44
|
489 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
490 int oldchord = nChord-1;
|
matthiasm@48
|
491 double selftransprob = 0.99;
|
matthiasm@43
|
492
|
matthiasm@48
|
493 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
494 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
495
|
matthiasm@50
|
496 double *delta;
|
matthiasm@50
|
497 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
498
|
matthiasm@43
|
499 vector<vector<double> > trans;
|
matthiasm@43
|
500 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
501 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
502 temp[iChord] = selftransprob;
|
matthiasm@43
|
503 trans.push_back(temp);
|
matthiasm@43
|
504 }
|
matthiasm@50
|
505 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta);
|
matthiasm@48
|
506
|
matthiasm@48
|
507
|
matthiasm@48
|
508 Feature chord_feature; // chord estimate
|
matthiasm@48
|
509 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
510 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
511 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
512 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
513
|
mail@60
|
514 chordchange[0] = 0;
|
matthiasm@50
|
515 for (int iFrame = 1; iFrame < chordpath.size(); ++iFrame) {
|
matthiasm@43
|
516 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
517 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@43
|
518 Feature chord_feature; // chord estimate
|
matthiasm@43
|
519 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
520 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
521 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
522 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
523 oldchord = chordpath[iFrame];
|
Chris@23
|
524 }
|
matthiasm@50
|
525 /* calculating simple chord change prob */
|
matthiasm@50
|
526 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
527 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
528 }
|
Chris@23
|
529 }
|
matthiasm@43
|
530
|
matthiasm@43
|
531 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
532 } else {
|
matthiasm@43
|
533 /* Simple chord estimation
|
matthiasm@43
|
534 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
535 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
536 */
|
matthiasm@44
|
537 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
538 count = 0;
|
matthiasm@43
|
539 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
540 vector<int> chordSequence;
|
matthiasm@43
|
541 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
542 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
543 scoreChordogram.push_back(temp);
|
matthiasm@43
|
544 }
|
matthiasm@43
|
545 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
546 int startIndex = count + 1;
|
matthiasm@43
|
547 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
548
|
matthiasm@43
|
549 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
550
|
matthiasm@43
|
551 vector<int> chordCandidates;
|
matthiasm@43
|
552 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
matthiasm@43
|
553 // float currsum = 0;
|
matthiasm@43
|
554 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
555 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
556 // }
|
matthiasm@43
|
557 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
matthiasm@43
|
558 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
559 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
560 chordCandidates.push_back(iChord);
|
matthiasm@43
|
561 break;
|
matthiasm@43
|
562 }
|
Chris@23
|
563 }
|
Chris@23
|
564 }
|
matthiasm@43
|
565 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
566 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
567
|
matthiasm@43
|
568 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
569 float maxindex = 0; //... and the index thereof
|
matthiasm@43
|
570 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
matthiasm@43
|
571 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
572
|
matthiasm@43
|
573 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
574 // now find the max values on both sides of iWF
|
matthiasm@43
|
575 // left side:
|
matthiasm@43
|
576 float maxL = 0;
|
matthiasm@43
|
577 unsigned maxindL = nChord-1;
|
matthiasm@43
|
578 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@43
|
579 unsigned iChord = chordCandidates[kChord];
|
matthiasm@43
|
580 float currsum = 0;
|
matthiasm@43
|
581 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
582 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
583 }
|
matthiasm@43
|
584 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
585 if (currsum > maxL) {
|
matthiasm@43
|
586 maxL = currsum;
|
matthiasm@43
|
587 maxindL = iChord;
|
matthiasm@43
|
588 }
|
matthiasm@43
|
589 }
|
matthiasm@43
|
590 // right side:
|
matthiasm@43
|
591 float maxR = 0;
|
matthiasm@43
|
592 unsigned maxindR = nChord-1;
|
matthiasm@43
|
593 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@43
|
594 unsigned iChord = chordCandidates[kChord];
|
matthiasm@43
|
595 float currsum = 0;
|
matthiasm@43
|
596 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
597 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
598 }
|
matthiasm@43
|
599 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
600 if (currsum > maxR) {
|
matthiasm@43
|
601 maxR = currsum;
|
matthiasm@43
|
602 maxindR = iChord;
|
matthiasm@43
|
603 }
|
matthiasm@43
|
604 }
|
matthiasm@43
|
605 if (maxL+maxR > maxval) {
|
matthiasm@43
|
606 maxval = maxL+maxR;
|
matthiasm@43
|
607 maxindex = iWF;
|
matthiasm@43
|
608 bestchordL = maxindL;
|
matthiasm@43
|
609 bestchordR = maxindR;
|
matthiasm@43
|
610 }
|
matthiasm@43
|
611
|
Chris@23
|
612 }
|
matthiasm@43
|
613 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
614 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@43
|
615 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
616 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
617 }
|
matthiasm@43
|
618 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
619 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
620 }
|
matthiasm@50
|
621 if (bestchordL != bestchordR) {
|
matthiasm@50
|
622 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
623 }
|
matthiasm@43
|
624 count++;
|
Chris@23
|
625 }
|
matthiasm@43
|
626 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
627 count = 0;
|
matthiasm@43
|
628 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
629 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
630 float maxindex = 0; //... and the index thereof
|
matthiasm@43
|
631 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
632 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
633 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
634 maxindex = iChord;
|
matthiasm@43
|
635 // cerr << iChord << endl;
|
matthiasm@43
|
636 }
|
matthiasm@43
|
637 }
|
matthiasm@43
|
638 chordSequence.push_back(maxindex);
|
matthiasm@43
|
639 count++;
|
Chris@23
|
640 }
|
matthiasm@43
|
641
|
matthiasm@43
|
642
|
matthiasm@43
|
643 // mode filter on chordSequence
|
matthiasm@43
|
644 count = 0;
|
matthiasm@43
|
645 string oldChord = "";
|
matthiasm@43
|
646 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
647 Feature chord_feature; // chord estimate
|
matthiasm@43
|
648 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
649 chord_feature.timestamp = *it;
|
matthiasm@43
|
650 // Feature currentChord; // chord estimate
|
matthiasm@43
|
651 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
652 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
653
|
matthiasm@43
|
654 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
655 int maxChordCount = 0;
|
matthiasm@43
|
656 int maxChordIndex = nChord-1;
|
matthiasm@43
|
657 string maxChord;
|
matthiasm@43
|
658 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
659 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
660 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
661 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
662 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
663 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
664 maxChordCount++;
|
matthiasm@43
|
665 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
666 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
667 }
|
matthiasm@43
|
668 }
|
matthiasm@43
|
669 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
670 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
671 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
672 if (oldChord != maxChord) {
|
matthiasm@43
|
673 oldChord = maxChord;
|
matthiasm@43
|
674 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
675 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
676 }
|
matthiasm@43
|
677 count++;
|
Chris@23
|
678 }
|
Chris@23
|
679 }
|
matthiasm@43
|
680 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
681 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
682 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
683 chord_feature.label = "N";
|
mail@60
|
684 fsOut[m_outputChords].push_back(chord_feature);
|
Chris@23
|
685 cerr << "done." << endl;
|
matthiasm@50
|
686
|
matthiasm@50
|
687 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
688 Feature chordchange_feature;
|
matthiasm@50
|
689 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
690 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
691 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
692 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
693 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
694 }
|
matthiasm@50
|
695
|
mail@60
|
696 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
697
|
matthiasm@50
|
698
|
Chris@23
|
699 return fsOut;
|
matthiasm@0
|
700 }
|