Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@86
|
39 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@86
|
40 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes);
|
matthiasm@86
|
41
|
matthiasm@0
|
42 }
|
matthiasm@0
|
43
|
Chris@35
|
44 Chordino::~Chordino()
|
matthiasm@0
|
45 {
|
Chris@35
|
46 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
47 }
|
matthiasm@0
|
48
|
matthiasm@0
|
49 string
|
Chris@35
|
50 Chordino::getIdentifier() const
|
matthiasm@0
|
51 {
|
Chris@23
|
52 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
53 return "chordino";
|
matthiasm@0
|
54 }
|
matthiasm@0
|
55
|
matthiasm@0
|
56 string
|
Chris@35
|
57 Chordino::getName() const
|
matthiasm@0
|
58 {
|
Chris@23
|
59 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
60 return "Chordino";
|
matthiasm@0
|
61 }
|
matthiasm@0
|
62
|
matthiasm@0
|
63 string
|
Chris@35
|
64 Chordino::getDescription() const
|
matthiasm@0
|
65 {
|
Chris@23
|
66 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
67 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
68 }
|
matthiasm@0
|
69
|
matthiasm@50
|
70 Chordino::ParameterList
|
matthiasm@50
|
71 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
72 {
|
matthiasm@50
|
73 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
74 ParameterList list;
|
matthiasm@50
|
75
|
matthiasm@50
|
76 ParameterDescriptor d;
|
matthiasm@50
|
77 d.identifier = "useNNLS";
|
matthiasm@50
|
78 d.name = "use approximate transcription (NNLS)";
|
matthiasm@50
|
79 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@50
|
80 d.unit = "";
|
matthiasm@50
|
81 d.minValue = 0.0;
|
matthiasm@50
|
82 d.maxValue = 1.0;
|
matthiasm@50
|
83 d.defaultValue = 1.0;
|
matthiasm@50
|
84 d.isQuantized = true;
|
matthiasm@50
|
85 d.quantizeStep = 1.0;
|
matthiasm@50
|
86 list.push_back(d);
|
matthiasm@50
|
87
|
matthiasm@50
|
88 ParameterDescriptor d4;
|
matthiasm@50
|
89 d4.identifier = "useHMM";
|
matthiasm@53
|
90 d4.name = "HMM (Viterbi decoding)";
|
matthiasm@50
|
91 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
matthiasm@50
|
92 d4.unit = "";
|
matthiasm@50
|
93 d4.minValue = 0.0;
|
matthiasm@50
|
94 d4.maxValue = 1.0;
|
matthiasm@50
|
95 d4.defaultValue = 1.0;
|
matthiasm@50
|
96 d4.isQuantized = true;
|
matthiasm@50
|
97 d4.quantizeStep = 1.0;
|
matthiasm@50
|
98 list.push_back(d4);
|
matthiasm@50
|
99
|
matthiasm@50
|
100 ParameterDescriptor d0;
|
matthiasm@50
|
101 d0.identifier = "rollon";
|
matthiasm@50
|
102 d0.name = "spectral roll-on";
|
matthiasm@58
|
103 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
104 d0.unit = "%";
|
matthiasm@50
|
105 d0.minValue = 0;
|
mail@76
|
106 d0.maxValue = 5;
|
matthiasm@50
|
107 d0.defaultValue = 0;
|
matthiasm@50
|
108 d0.isQuantized = true;
|
mail@76
|
109 d0.quantizeStep = 0.5;
|
matthiasm@50
|
110 list.push_back(d0);
|
matthiasm@50
|
111
|
matthiasm@50
|
112 ParameterDescriptor d1;
|
matthiasm@50
|
113 d1.identifier = "tuningmode";
|
matthiasm@50
|
114 d1.name = "tuning mode";
|
matthiasm@50
|
115 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@50
|
116 d1.unit = "";
|
matthiasm@50
|
117 d1.minValue = 0;
|
matthiasm@50
|
118 d1.maxValue = 1;
|
matthiasm@50
|
119 d1.defaultValue = 0;
|
matthiasm@50
|
120 d1.isQuantized = true;
|
matthiasm@50
|
121 d1.valueNames.push_back("global tuning");
|
matthiasm@50
|
122 d1.valueNames.push_back("local tuning");
|
matthiasm@50
|
123 d1.quantizeStep = 1.0;
|
matthiasm@50
|
124 list.push_back(d1);
|
matthiasm@50
|
125
|
matthiasm@50
|
126 ParameterDescriptor d2;
|
matthiasm@50
|
127 d2.identifier = "whitening";
|
matthiasm@50
|
128 d2.name = "spectral whitening";
|
matthiasm@50
|
129 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
matthiasm@50
|
130 d2.unit = "";
|
matthiasm@50
|
131 d2.isQuantized = true;
|
matthiasm@50
|
132 d2.minValue = 0.0;
|
matthiasm@50
|
133 d2.maxValue = 1.0;
|
matthiasm@50
|
134 d2.defaultValue = 1.0;
|
matthiasm@50
|
135 d2.isQuantized = false;
|
matthiasm@50
|
136 list.push_back(d2);
|
matthiasm@50
|
137
|
matthiasm@50
|
138 ParameterDescriptor d3;
|
matthiasm@50
|
139 d3.identifier = "s";
|
matthiasm@50
|
140 d3.name = "spectral shape";
|
matthiasm@50
|
141 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
matthiasm@50
|
142 d3.unit = "";
|
matthiasm@50
|
143 d3.minValue = 0.5;
|
matthiasm@50
|
144 d3.maxValue = 0.9;
|
matthiasm@50
|
145 d3.defaultValue = 0.7;
|
matthiasm@50
|
146 d3.isQuantized = false;
|
matthiasm@50
|
147 list.push_back(d3);
|
matthiasm@50
|
148
|
matthiasm@50
|
149 // ParameterDescriptor d4;
|
matthiasm@50
|
150 // d4.identifier = "chromanormalize";
|
matthiasm@50
|
151 // d4.name = "chroma normalization";
|
matthiasm@50
|
152 // d4.description = "How shall the chroma vector be normalized?";
|
matthiasm@50
|
153 // d4.unit = "";
|
matthiasm@50
|
154 // d4.minValue = 0;
|
matthiasm@50
|
155 // d4.maxValue = 3;
|
matthiasm@50
|
156 // d4.defaultValue = 0;
|
matthiasm@50
|
157 // d4.isQuantized = true;
|
matthiasm@50
|
158 // d4.valueNames.push_back("none");
|
matthiasm@50
|
159 // d4.valueNames.push_back("maximum norm");
|
matthiasm@50
|
160 // d4.valueNames.push_back("L1 norm");
|
matthiasm@50
|
161 // d4.valueNames.push_back("L2 norm");
|
matthiasm@50
|
162 // d4.quantizeStep = 1.0;
|
matthiasm@50
|
163 // list.push_back(d4);
|
matthiasm@50
|
164
|
matthiasm@50
|
165 return list;
|
matthiasm@50
|
166 }
|
matthiasm@50
|
167
|
Chris@35
|
168 Chordino::OutputList
|
Chris@35
|
169 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
170 {
|
Chris@23
|
171 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
172 OutputList list;
|
matthiasm@0
|
173
|
Chris@35
|
174 int index = 0;
|
matthiasm@0
|
175
|
matthiasm@0
|
176 OutputDescriptor d7;
|
matthiasm@0
|
177 d7.identifier = "simplechord";
|
Chris@36
|
178 d7.name = "Chord Estimate";
|
matthiasm@58
|
179 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
180 d7.unit = "";
|
matthiasm@0
|
181 d7.hasFixedBinCount = true;
|
matthiasm@0
|
182 d7.binCount = 0;
|
matthiasm@0
|
183 d7.hasKnownExtents = false;
|
matthiasm@0
|
184 d7.isQuantized = false;
|
matthiasm@0
|
185 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
186 d7.hasDuration = false;
|
matthiasm@0
|
187 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
188 list.push_back(d7);
|
Chris@35
|
189 m_outputChords = index++;
|
matthiasm@0
|
190
|
matthiasm@86
|
191 OutputDescriptor chordnotes;
|
matthiasm@86
|
192 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
193 chordnotes.name = "Note Representation of Chord Estimate";
|
matthiasm@86
|
194 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
195 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
196 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
197 chordnotes.binCount = 1;
|
matthiasm@86
|
198 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
199 chordnotes.minValue = 0;
|
matthiasm@86
|
200 chordnotes.maxValue = 127;
|
matthiasm@86
|
201 chordnotes.isQuantized = true;
|
matthiasm@86
|
202 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
203 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
204 chordnotes.hasDuration = true;
|
matthiasm@86
|
205 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@86
|
206 list.push_back(chordnotes);
|
matthiasm@86
|
207 m_outputChordnotes = index++;
|
matthiasm@86
|
208
|
Chris@23
|
209 OutputDescriptor d8;
|
mail@60
|
210 d8.identifier = "harmonicchange";
|
Chris@36
|
211 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
212 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
213 d8.unit = "";
|
matthiasm@17
|
214 d8.hasFixedBinCount = true;
|
matthiasm@17
|
215 d8.binCount = 1;
|
mail@60
|
216 d8.hasKnownExtents = false;
|
mail@60
|
217 // d8.minValue = 0.0;
|
mail@60
|
218 // d8.maxValue = 0.999;
|
matthiasm@17
|
219 d8.isQuantized = false;
|
matthiasm@17
|
220 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
221 d8.hasDuration = false;
|
matthiasm@17
|
222 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
223 list.push_back(d8);
|
Chris@35
|
224 m_outputHarmonicChange = index++;
|
matthiasm@1
|
225
|
matthiasm@0
|
226 return list;
|
matthiasm@0
|
227 }
|
matthiasm@0
|
228
|
matthiasm@0
|
229 bool
|
Chris@35
|
230 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
231 {
|
Chris@23
|
232 if (debug_on) {
|
Chris@23
|
233 cerr << "--> initialise";
|
Chris@23
|
234 }
|
mail@76
|
235
|
Chris@35
|
236 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
237 return false;
|
Chris@35
|
238 }
|
matthiasm@1
|
239
|
matthiasm@0
|
240 return true;
|
matthiasm@0
|
241 }
|
matthiasm@0
|
242
|
matthiasm@0
|
243 void
|
Chris@35
|
244 Chordino::reset()
|
matthiasm@0
|
245 {
|
Chris@23
|
246 if (debug_on) cerr << "--> reset";
|
Chris@35
|
247 NNLSBase::reset();
|
matthiasm@0
|
248 }
|
matthiasm@0
|
249
|
Chris@35
|
250 Chordino::FeatureSet
|
Chris@35
|
251 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
252 {
|
Chris@23
|
253 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
254
|
Chris@35
|
255 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
256
|
Chris@35
|
257 return FeatureSet();
|
matthiasm@0
|
258 }
|
matthiasm@0
|
259
|
Chris@35
|
260 Chordino::FeatureSet
|
Chris@35
|
261 Chordino::getRemainingFeatures()
|
matthiasm@0
|
262 {
|
mail@76
|
263 cerr << hw[0] << hw[1] << endl;
|
Chris@23
|
264 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
265 FeatureSet fsOut;
|
Chris@35
|
266 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
267 int nChord = m_chordnames.size();
|
Chris@23
|
268 //
|
Chris@23
|
269 /** Calculate Tuning
|
Chris@23
|
270 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
271 cumulative mean real and imag values)
|
Chris@23
|
272 **/
|
mail@80
|
273 float meanTuningImag = 0;
|
mail@80
|
274 float meanTuningReal = 0;
|
mail@80
|
275 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
276 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
277 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
278 }
|
Chris@23
|
279 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
280 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
281 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
282 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
283
|
Chris@23
|
284 char buffer0 [50];
|
matthiasm@1
|
285
|
Chris@23
|
286 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
287
|
matthiasm@1
|
288
|
Chris@23
|
289 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
290 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
matthiasm@43
|
291 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectum).
|
Chris@23
|
292 **/
|
Chris@35
|
293 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
294
|
Chris@23
|
295 float tempValue = 0;
|
Chris@23
|
296 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
297 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
298 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
299 int count = 0;
|
matthiasm@1
|
300
|
Chris@35
|
301 FeatureList tunedSpec;
|
matthiasm@43
|
302 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
303
|
matthiasm@43
|
304 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
305
|
Chris@35
|
306 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
matthiasm@43
|
307 Feature currentLogSpectum = *i;
|
matthiasm@43
|
308 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
309 currentTunedSpec.hasTimestamp = true;
|
matthiasm@43
|
310 currentTunedSpec.timestamp = currentLogSpectum.timestamp;
|
matthiasm@43
|
311 timestamps.push_back(currentLogSpectum.timestamp);
|
matthiasm@43
|
312 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
313
|
Chris@23
|
314 if (m_tuneLocal) {
|
Chris@23
|
315 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
316 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
317 }
|
matthiasm@1
|
318
|
mail@80
|
319 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
320
|
matthiasm@43
|
321 for (unsigned k = 2; k < currentLogSpectum.values.size() - 3; ++k) { // interpolate all inner bins
|
mail@80
|
322 tempValue = currentLogSpectum.values[k + intShift] * (1-floatShift) + currentLogSpectum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
323 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
324 }
|
matthiasm@1
|
325
|
matthiasm@43
|
326 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
327 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
328 vector<float> runningstd;
|
mail@77
|
329 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
330 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
331 }
|
Chris@23
|
332 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
333 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
334 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
335 if (runningstd[i] > 0) {
|
matthiasm@43
|
336 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
337 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
338 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
339 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
340 }
|
matthiasm@43
|
341 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
342 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
343 }
|
Chris@23
|
344 }
|
matthiasm@43
|
345 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
346 count++;
|
Chris@23
|
347 }
|
Chris@23
|
348 cerr << "done." << endl;
|
matthiasm@1
|
349
|
Chris@23
|
350 /** Semitone spectrum and chromagrams
|
Chris@23
|
351 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
352 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
353 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
354 bass and treble stacked onto each other).
|
Chris@23
|
355 **/
|
matthiasm@42
|
356 if (m_useNNLS == 0) {
|
Chris@35
|
357 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
358 } else {
|
Chris@35
|
359 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
360 }
|
matthiasm@13
|
361
|
matthiasm@1
|
362
|
matthiasm@43
|
363 vector<vector<double> > chordogram;
|
Chris@23
|
364 vector<vector<int> > scoreChordogram;
|
Chris@35
|
365 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
366 count = 0;
|
matthiasm@9
|
367
|
Chris@35
|
368 FeatureList chromaList;
|
matthiasm@43
|
369
|
matthiasm@43
|
370
|
Chris@35
|
371
|
Chris@35
|
372 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
373 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
374 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
375
|
matthiasm@43
|
376 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
377 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
378
|
mail@77
|
379 float b[nNote];
|
matthiasm@1
|
380
|
Chris@23
|
381 bool some_b_greater_zero = false;
|
Chris@23
|
382 float sumb = 0;
|
mail@77
|
383 for (int i = 0; i < nNote; i++) {
|
mail@77
|
384 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
385 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
386 sumb += b[i];
|
Chris@23
|
387 if (b[i] > 0) {
|
Chris@23
|
388 some_b_greater_zero = true;
|
Chris@23
|
389 }
|
Chris@23
|
390 }
|
matthiasm@1
|
391
|
Chris@23
|
392 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
393
|
Chris@23
|
394 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
395 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
396 float currval;
|
Chris@23
|
397 unsigned iSemitone = 0;
|
matthiasm@1
|
398
|
Chris@23
|
399 if (some_b_greater_zero) {
|
matthiasm@42
|
400 if (m_useNNLS == 0) {
|
mail@81
|
401 for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
402 currval = 0;
|
mail@81
|
403 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
404 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
405 }
|
Chris@23
|
406 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
407 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
408 iSemitone++;
|
Chris@23
|
409 }
|
matthiasm@1
|
410
|
Chris@23
|
411 } else {
|
Chris@35
|
412 float x[84+1000];
|
Chris@23
|
413 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
414 vector<int> signifIndex;
|
Chris@23
|
415 int index=0;
|
Chris@23
|
416 sumb /= 84.0;
|
mail@81
|
417 for (unsigned iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
418 float currval = 0;
|
mail@81
|
419 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
420 currval += b[iNote + iBPS];
|
mail@81
|
421 }
|
Chris@23
|
422 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
423 index++;
|
Chris@23
|
424 }
|
Chris@35
|
425 float rnorm;
|
Chris@35
|
426 float w[84+1000];
|
Chris@35
|
427 float zz[84+1000];
|
Chris@23
|
428 int indx[84+1000];
|
Chris@23
|
429 int mode;
|
mail@77
|
430 int dictsize = nNote*signifIndex.size();
|
mail@81
|
431 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
432 float *curr_dict = new float[dictsize];
|
Chris@23
|
433 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
mail@77
|
434 for (unsigned iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
435 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
436 }
|
Chris@23
|
437 }
|
Chris@35
|
438 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
439 delete [] curr_dict;
|
Chris@23
|
440 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
|
Chris@23
|
441 // cerr << mode << endl;
|
Chris@23
|
442 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
443 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
444 }
|
Chris@23
|
445 }
|
Chris@23
|
446 }
|
Chris@35
|
447
|
Chris@35
|
448 vector<float> origchroma = chroma;
|
Chris@23
|
449 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
450 currentChromas.values = chroma;
|
Chris@35
|
451
|
Chris@23
|
452 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
453 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
454 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
455 case 0: // should never end up here
|
Chris@23
|
456 break;
|
Chris@23
|
457 case 1:
|
Chris@35
|
458 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
459 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
460 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
461 break;
|
Chris@23
|
462 case 2:
|
Chris@35
|
463 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
464 chromanorm[2] += *it;
|
Chris@23
|
465 }
|
Chris@23
|
466 break;
|
Chris@23
|
467 case 3:
|
Chris@35
|
468 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
469 chromanorm[2] += pow(*it,2);
|
Chris@23
|
470 }
|
Chris@23
|
471 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
472 break;
|
Chris@23
|
473 }
|
Chris@23
|
474 if (chromanorm[2] > 0) {
|
Chris@35
|
475 for (int i = 0; i < chroma.size(); i++) {
|
matthiasm@43
|
476 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
477 }
|
Chris@23
|
478 }
|
Chris@23
|
479 }
|
Chris@35
|
480
|
matthiasm@43
|
481 chromaList.push_back(currentChromas);
|
Chris@35
|
482
|
Chris@23
|
483 // local chord estimation
|
matthiasm@43
|
484 vector<double> currentChordSalience;
|
matthiasm@43
|
485 double tempchordvalue = 0;
|
matthiasm@43
|
486 double sumchordvalue = 0;
|
matthiasm@9
|
487
|
Chris@23
|
488 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
489 tempchordvalue = 0;
|
Chris@23
|
490 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
491 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
492 }
|
Chris@23
|
493 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
494 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
495 }
|
matthiasm@48
|
496 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
497 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
498 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
499 sumchordvalue+=tempchordvalue;
|
Chris@23
|
500 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
501 }
|
Chris@23
|
502 if (sumchordvalue > 0) {
|
Chris@23
|
503 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
504 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
505 }
|
Chris@23
|
506 } else {
|
Chris@23
|
507 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
508 }
|
Chris@23
|
509 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
510
|
Chris@23
|
511 count++;
|
Chris@23
|
512 }
|
Chris@23
|
513 cerr << "done." << endl;
|
matthiasm@13
|
514
|
matthiasm@86
|
515 vector<Feature> oldnotes;
|
matthiasm@10
|
516
|
matthiasm@50
|
517 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
518 if (m_useHMM == 1.0) {
|
matthiasm@44
|
519 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
520 int oldchord = nChord-1;
|
matthiasm@48
|
521 double selftransprob = 0.99;
|
matthiasm@43
|
522
|
matthiasm@48
|
523 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
524 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
525
|
matthiasm@50
|
526 double *delta;
|
matthiasm@50
|
527 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
528
|
matthiasm@43
|
529 vector<vector<double> > trans;
|
matthiasm@43
|
530 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
531 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
532 temp[iChord] = selftransprob;
|
matthiasm@43
|
533 trans.push_back(temp);
|
matthiasm@43
|
534 }
|
matthiasm@50
|
535 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta);
|
matthiasm@48
|
536
|
matthiasm@48
|
537
|
matthiasm@48
|
538 Feature chord_feature; // chord estimate
|
matthiasm@48
|
539 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
540 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
541 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
542 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
543
|
mail@60
|
544 chordchange[0] = 0;
|
matthiasm@50
|
545 for (int iFrame = 1; iFrame < chordpath.size(); ++iFrame) {
|
matthiasm@43
|
546 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
547 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@86
|
548 // chord
|
matthiasm@43
|
549 Feature chord_feature; // chord estimate
|
matthiasm@43
|
550 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
551 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
552 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
553 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
554 oldchord = chordpath[iFrame];
|
matthiasm@86
|
555 // chord notes
|
matthiasm@86
|
556 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
557 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@86
|
558 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
559 }
|
matthiasm@86
|
560 oldnotes.clear();
|
matthiasm@86
|
561 for (int iNote = 0; iNote < m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
562 Feature chordnote_feature;
|
matthiasm@86
|
563 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
564 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@86
|
565 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@86
|
566 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
567 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@86
|
568 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
569 }
|
Chris@23
|
570 }
|
matthiasm@50
|
571 /* calculating simple chord change prob */
|
matthiasm@50
|
572 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
573 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
574 }
|
Chris@23
|
575 }
|
matthiasm@43
|
576
|
matthiasm@43
|
577 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
578 } else {
|
matthiasm@43
|
579 /* Simple chord estimation
|
matthiasm@43
|
580 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
581 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
582 */
|
matthiasm@44
|
583 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
584 count = 0;
|
matthiasm@43
|
585 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
586 vector<int> chordSequence;
|
matthiasm@43
|
587 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
588 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
589 scoreChordogram.push_back(temp);
|
matthiasm@43
|
590 }
|
matthiasm@43
|
591 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
592 int startIndex = count + 1;
|
matthiasm@43
|
593 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
594
|
matthiasm@43
|
595 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
596
|
matthiasm@43
|
597 vector<int> chordCandidates;
|
matthiasm@43
|
598 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
|
matthiasm@43
|
599 // float currsum = 0;
|
matthiasm@43
|
600 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
601 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
602 // }
|
matthiasm@43
|
603 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
matthiasm@43
|
604 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
605 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
606 chordCandidates.push_back(iChord);
|
matthiasm@43
|
607 break;
|
matthiasm@43
|
608 }
|
Chris@23
|
609 }
|
Chris@23
|
610 }
|
matthiasm@43
|
611 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
612 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
613
|
matthiasm@43
|
614 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
615 float maxindex = 0; //... and the index thereof
|
matthiasm@43
|
616 unsigned bestchordL = nChord-1; // index of the best "left" chord
|
matthiasm@43
|
617 unsigned bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
618
|
matthiasm@43
|
619 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
620 // now find the max values on both sides of iWF
|
matthiasm@43
|
621 // left side:
|
matthiasm@43
|
622 float maxL = 0;
|
matthiasm@43
|
623 unsigned maxindL = nChord-1;
|
matthiasm@43
|
624 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@43
|
625 unsigned iChord = chordCandidates[kChord];
|
matthiasm@43
|
626 float currsum = 0;
|
matthiasm@43
|
627 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
628 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
629 }
|
matthiasm@43
|
630 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
631 if (currsum > maxL) {
|
matthiasm@43
|
632 maxL = currsum;
|
matthiasm@43
|
633 maxindL = iChord;
|
matthiasm@43
|
634 }
|
matthiasm@43
|
635 }
|
matthiasm@43
|
636 // right side:
|
matthiasm@43
|
637 float maxR = 0;
|
matthiasm@43
|
638 unsigned maxindR = nChord-1;
|
matthiasm@43
|
639 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
|
matthiasm@43
|
640 unsigned iChord = chordCandidates[kChord];
|
matthiasm@43
|
641 float currsum = 0;
|
matthiasm@43
|
642 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
643 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
644 }
|
matthiasm@43
|
645 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
646 if (currsum > maxR) {
|
matthiasm@43
|
647 maxR = currsum;
|
matthiasm@43
|
648 maxindR = iChord;
|
matthiasm@43
|
649 }
|
matthiasm@43
|
650 }
|
matthiasm@43
|
651 if (maxL+maxR > maxval) {
|
matthiasm@43
|
652 maxval = maxL+maxR;
|
matthiasm@43
|
653 maxindex = iWF;
|
matthiasm@43
|
654 bestchordL = maxindL;
|
matthiasm@43
|
655 bestchordR = maxindR;
|
matthiasm@43
|
656 }
|
matthiasm@43
|
657
|
Chris@23
|
658 }
|
matthiasm@43
|
659 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
660 // add a score to every chord-frame-point that was part of a maximum
|
matthiasm@43
|
661 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
662 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
663 }
|
matthiasm@43
|
664 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
665 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
666 }
|
matthiasm@50
|
667 if (bestchordL != bestchordR) {
|
matthiasm@50
|
668 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
669 }
|
matthiasm@43
|
670 count++;
|
Chris@23
|
671 }
|
matthiasm@43
|
672 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
673 count = 0;
|
matthiasm@43
|
674 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
675 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
676 float maxindex = 0; //... and the index thereof
|
matthiasm@43
|
677 for (unsigned iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
678 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
679 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
680 maxindex = iChord;
|
matthiasm@43
|
681 // cerr << iChord << endl;
|
matthiasm@43
|
682 }
|
matthiasm@43
|
683 }
|
matthiasm@43
|
684 chordSequence.push_back(maxindex);
|
matthiasm@43
|
685 count++;
|
Chris@23
|
686 }
|
matthiasm@43
|
687
|
matthiasm@43
|
688
|
matthiasm@43
|
689 // mode filter on chordSequence
|
matthiasm@43
|
690 count = 0;
|
matthiasm@43
|
691 string oldChord = "";
|
matthiasm@43
|
692 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
693 Feature chord_feature; // chord estimate
|
matthiasm@43
|
694 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
695 chord_feature.timestamp = *it;
|
matthiasm@43
|
696 // Feature currentChord; // chord estimate
|
matthiasm@43
|
697 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
698 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
699
|
matthiasm@43
|
700 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
701 int maxChordCount = 0;
|
matthiasm@43
|
702 int maxChordIndex = nChord-1;
|
matthiasm@43
|
703 string maxChord;
|
matthiasm@43
|
704 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
705 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
706 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
707 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
708 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
709 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
710 maxChordCount++;
|
matthiasm@43
|
711 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
712 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
713 }
|
matthiasm@43
|
714 }
|
matthiasm@43
|
715 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
716 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
717 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
718 if (oldChord != maxChord) {
|
matthiasm@43
|
719 oldChord = maxChord;
|
matthiasm@43
|
720 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
721 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
722 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
723 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
|
matthiasm@86
|
724 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
725 }
|
matthiasm@86
|
726 oldnotes.clear();
|
matthiasm@86
|
727 for (int iNote = 0; iNote < m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
728 Feature chordnote_feature;
|
matthiasm@86
|
729 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
730 chordnote_feature.timestamp = chord_feature.timestamp;
|
matthiasm@86
|
731 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
|
matthiasm@86
|
732 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
733 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
|
matthiasm@86
|
734 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
735 }
|
matthiasm@43
|
736 }
|
matthiasm@43
|
737 count++;
|
Chris@23
|
738 }
|
Chris@23
|
739 }
|
matthiasm@43
|
740 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
741 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
742 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
743 chord_feature.label = "N";
|
mail@60
|
744 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
745
|
matthiasm@86
|
746 for (int iNote = 0; iNote < oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
747 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
748 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
749 }
|
matthiasm@86
|
750
|
Chris@23
|
751 cerr << "done." << endl;
|
matthiasm@50
|
752
|
matthiasm@50
|
753 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
754 Feature chordchange_feature;
|
matthiasm@50
|
755 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
756 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
757 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
758 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
759 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
760 }
|
matthiasm@50
|
761
|
mail@60
|
762 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
763
|
matthiasm@50
|
764
|
Chris@23
|
765 return fsOut;
|
matthiasm@0
|
766 }
|