Chris@23
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
matthiasm@0
|
2
|
Chris@35
|
3 /*
|
Chris@35
|
4 NNLS-Chroma / Chordino
|
Chris@35
|
5
|
Chris@35
|
6 Audio feature extraction plugins for chromagram and chord
|
Chris@35
|
7 estimation.
|
Chris@35
|
8
|
Chris@35
|
9 Centre for Digital Music, Queen Mary University of London.
|
Chris@35
|
10 This file copyright 2008-2010 Matthias Mauch and QMUL.
|
Chris@35
|
11
|
Chris@35
|
12 This program is free software; you can redistribute it and/or
|
Chris@35
|
13 modify it under the terms of the GNU General Public License as
|
Chris@35
|
14 published by the Free Software Foundation; either version 2 of the
|
Chris@35
|
15 License, or (at your option) any later version. See the file
|
Chris@35
|
16 COPYING included with this distribution for more information.
|
Chris@35
|
17 */
|
Chris@35
|
18
|
Chris@35
|
19 #include "Chordino.h"
|
Chris@27
|
20
|
Chris@27
|
21 #include "chromamethods.h"
|
matthiasm@43
|
22 #include "viterbi.h"
|
Chris@27
|
23
|
Chris@27
|
24 #include <cstdlib>
|
Chris@27
|
25 #include <fstream>
|
matthiasm@0
|
26 #include <cmath>
|
matthiasm@9
|
27
|
Chris@27
|
28 #include <algorithm>
|
matthiasm@0
|
29
|
matthiasm@0
|
30 const bool debug_on = false;
|
matthiasm@0
|
31
|
Chris@35
|
32 Chordino::Chordino(float inputSampleRate) :
|
matthiasm@86
|
33 NNLSBase(inputSampleRate),
|
matthiasm@86
|
34 m_chorddict(0),
|
matthiasm@86
|
35 m_chordnotes(0),
|
matthiasm@86
|
36 m_chordnames(0)
|
matthiasm@0
|
37 {
|
Chris@35
|
38 if (debug_on) cerr << "--> Chordino" << endl;
|
matthiasm@86
|
39 // get the *chord* dictionary from file (if the file exists)
|
matthiasm@86
|
40
|
matthiasm@0
|
41 }
|
matthiasm@0
|
42
|
Chris@35
|
43 Chordino::~Chordino()
|
matthiasm@0
|
44 {
|
Chris@35
|
45 if (debug_on) cerr << "--> ~Chordino" << endl;
|
matthiasm@0
|
46 }
|
matthiasm@0
|
47
|
matthiasm@0
|
48 string
|
Chris@35
|
49 Chordino::getIdentifier() const
|
matthiasm@0
|
50 {
|
Chris@23
|
51 if (debug_on) cerr << "--> getIdentifier" << endl;
|
Chris@35
|
52 return "chordino";
|
matthiasm@0
|
53 }
|
matthiasm@0
|
54
|
matthiasm@0
|
55 string
|
Chris@35
|
56 Chordino::getName() const
|
matthiasm@0
|
57 {
|
Chris@23
|
58 if (debug_on) cerr << "--> getName" << endl;
|
Chris@35
|
59 return "Chordino";
|
matthiasm@0
|
60 }
|
matthiasm@0
|
61
|
matthiasm@0
|
62 string
|
Chris@35
|
63 Chordino::getDescription() const
|
matthiasm@0
|
64 {
|
Chris@23
|
65 if (debug_on) cerr << "--> getDescription" << endl;
|
matthiasm@58
|
66 return "Chordino provides a simple chord transcription based on NNLS Chroma (as in the NNLS Chroma plugin). Chord profiles given by the user in the file chord.dict are used to calculate frame-wise chord similarities. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
67 }
|
matthiasm@0
|
68
|
matthiasm@50
|
69 Chordino::ParameterList
|
matthiasm@50
|
70 Chordino::getParameterDescriptors() const
|
matthiasm@50
|
71 {
|
matthiasm@50
|
72 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
|
matthiasm@50
|
73 ParameterList list;
|
matthiasm@50
|
74
|
matthiasm@50
|
75 ParameterDescriptor d;
|
matthiasm@50
|
76 d.identifier = "useNNLS";
|
matthiasm@50
|
77 d.name = "use approximate transcription (NNLS)";
|
matthiasm@50
|
78 d.description = "Toggles approximate transcription (NNLS).";
|
matthiasm@50
|
79 d.unit = "";
|
matthiasm@50
|
80 d.minValue = 0.0;
|
matthiasm@50
|
81 d.maxValue = 1.0;
|
matthiasm@50
|
82 d.defaultValue = 1.0;
|
matthiasm@50
|
83 d.isQuantized = true;
|
matthiasm@50
|
84 d.quantizeStep = 1.0;
|
matthiasm@50
|
85 list.push_back(d);
|
matthiasm@50
|
86
|
matthiasm@50
|
87 ParameterDescriptor d4;
|
matthiasm@50
|
88 d4.identifier = "useHMM";
|
matthiasm@53
|
89 d4.name = "HMM (Viterbi decoding)";
|
matthiasm@50
|
90 d4.description = "Turns on Viterbi decoding (when off, the simple chord estimator is used).";
|
matthiasm@50
|
91 d4.unit = "";
|
matthiasm@50
|
92 d4.minValue = 0.0;
|
matthiasm@50
|
93 d4.maxValue = 1.0;
|
matthiasm@50
|
94 d4.defaultValue = 1.0;
|
matthiasm@50
|
95 d4.isQuantized = true;
|
matthiasm@50
|
96 d4.quantizeStep = 1.0;
|
matthiasm@50
|
97 list.push_back(d4);
|
matthiasm@50
|
98
|
matthiasm@50
|
99 ParameterDescriptor d0;
|
matthiasm@50
|
100 d0.identifier = "rollon";
|
matthiasm@50
|
101 d0.name = "spectral roll-on";
|
matthiasm@58
|
102 d0.description = "Consider the cumulative energy spectrum (from low to high frequencies). All bins below the first bin whose cumulative energy exceeds the quantile [spectral roll on] x [total energy] will be set to 0. A value of 0 means that no bins will be changed.";
|
matthiasm@59
|
103 d0.unit = "%";
|
matthiasm@50
|
104 d0.minValue = 0;
|
mail@76
|
105 d0.maxValue = 5;
|
matthiasm@92
|
106 d0.defaultValue = 0.0;
|
matthiasm@50
|
107 d0.isQuantized = true;
|
mail@76
|
108 d0.quantizeStep = 0.5;
|
matthiasm@50
|
109 list.push_back(d0);
|
matthiasm@50
|
110
|
matthiasm@50
|
111 ParameterDescriptor d1;
|
matthiasm@50
|
112 d1.identifier = "tuningmode";
|
matthiasm@50
|
113 d1.name = "tuning mode";
|
matthiasm@50
|
114 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
|
matthiasm@50
|
115 d1.unit = "";
|
matthiasm@50
|
116 d1.minValue = 0;
|
matthiasm@50
|
117 d1.maxValue = 1;
|
matthiasm@92
|
118 d1.defaultValue = 0.0;
|
matthiasm@50
|
119 d1.isQuantized = true;
|
matthiasm@50
|
120 d1.valueNames.push_back("global tuning");
|
matthiasm@50
|
121 d1.valueNames.push_back("local tuning");
|
matthiasm@50
|
122 d1.quantizeStep = 1.0;
|
matthiasm@50
|
123 list.push_back(d1);
|
matthiasm@50
|
124
|
matthiasm@50
|
125 ParameterDescriptor d2;
|
matthiasm@50
|
126 d2.identifier = "whitening";
|
matthiasm@50
|
127 d2.name = "spectral whitening";
|
matthiasm@50
|
128 d2.description = "Spectral whitening: no whitening - 0; whitening - 1.";
|
matthiasm@50
|
129 d2.unit = "";
|
matthiasm@50
|
130 d2.isQuantized = true;
|
matthiasm@50
|
131 d2.minValue = 0.0;
|
matthiasm@50
|
132 d2.maxValue = 1.0;
|
matthiasm@50
|
133 d2.defaultValue = 1.0;
|
matthiasm@50
|
134 d2.isQuantized = false;
|
matthiasm@50
|
135 list.push_back(d2);
|
matthiasm@50
|
136
|
matthiasm@50
|
137 ParameterDescriptor d3;
|
matthiasm@50
|
138 d3.identifier = "s";
|
matthiasm@50
|
139 d3.name = "spectral shape";
|
matthiasm@50
|
140 d3.description = "Determines how individual notes in the note dictionary look: higher values mean more dominant higher harmonics.";
|
matthiasm@50
|
141 d3.unit = "";
|
matthiasm@50
|
142 d3.minValue = 0.5;
|
matthiasm@50
|
143 d3.maxValue = 0.9;
|
matthiasm@50
|
144 d3.defaultValue = 0.7;
|
matthiasm@50
|
145 d3.isQuantized = false;
|
matthiasm@50
|
146 list.push_back(d3);
|
matthiasm@50
|
147
|
mail@89
|
148 ParameterDescriptor boostn;
|
mail@89
|
149 boostn.identifier = "boostn";
|
mail@89
|
150 boostn.name = "boost N";
|
matthiasm@95
|
151 boostn.description = "Boost likelihood of the N (no chord) label.";
|
mail@89
|
152 boostn.unit = "";
|
matthiasm@95
|
153 boostn.minValue = 0.0;
|
matthiasm@95
|
154 boostn.maxValue = 1.0;
|
matthiasm@95
|
155 boostn.defaultValue = 0.1;
|
mail@89
|
156 boostn.isQuantized = false;
|
mail@89
|
157 list.push_back(boostn);
|
matthiasm@50
|
158
|
matthiasm@50
|
159 return list;
|
matthiasm@50
|
160 }
|
matthiasm@50
|
161
|
Chris@35
|
162 Chordino::OutputList
|
Chris@35
|
163 Chordino::getOutputDescriptors() const
|
matthiasm@0
|
164 {
|
Chris@23
|
165 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
|
matthiasm@0
|
166 OutputList list;
|
matthiasm@0
|
167
|
Chris@35
|
168 int index = 0;
|
matthiasm@0
|
169
|
matthiasm@0
|
170 OutputDescriptor d7;
|
matthiasm@0
|
171 d7.identifier = "simplechord";
|
Chris@36
|
172 d7.name = "Chord Estimate";
|
matthiasm@58
|
173 d7.description = "Estimated chord times and labels. Two simple (non-state-of-the-art!) algorithms are available that smooth these to provide a chord transcription: a simple chord change method, and a standard HMM/Viterbi approach.";
|
matthiasm@0
|
174 d7.unit = "";
|
matthiasm@0
|
175 d7.hasFixedBinCount = true;
|
matthiasm@0
|
176 d7.binCount = 0;
|
matthiasm@0
|
177 d7.hasKnownExtents = false;
|
matthiasm@0
|
178 d7.isQuantized = false;
|
matthiasm@0
|
179 d7.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@0
|
180 d7.hasDuration = false;
|
matthiasm@0
|
181 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@0
|
182 list.push_back(d7);
|
Chris@35
|
183 m_outputChords = index++;
|
matthiasm@0
|
184
|
matthiasm@86
|
185 OutputDescriptor chordnotes;
|
matthiasm@86
|
186 chordnotes.identifier = "chordnotes";
|
matthiasm@86
|
187 chordnotes.name = "Note Representation of Chord Estimate";
|
matthiasm@86
|
188 chordnotes.description = "A simple represenation of the estimated chord with bass note (if applicable) and chord notes.";
|
matthiasm@86
|
189 chordnotes.unit = "MIDI units";
|
matthiasm@86
|
190 chordnotes.hasFixedBinCount = true;
|
matthiasm@86
|
191 chordnotes.binCount = 1;
|
matthiasm@86
|
192 chordnotes.hasKnownExtents = true;
|
matthiasm@86
|
193 chordnotes.minValue = 0;
|
matthiasm@86
|
194 chordnotes.maxValue = 127;
|
matthiasm@86
|
195 chordnotes.isQuantized = true;
|
matthiasm@86
|
196 chordnotes.quantizeStep = 1;
|
matthiasm@86
|
197 chordnotes.sampleType = OutputDescriptor::VariableSampleRate;
|
matthiasm@86
|
198 chordnotes.hasDuration = true;
|
matthiasm@86
|
199 chordnotes.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@86
|
200 list.push_back(chordnotes);
|
matthiasm@86
|
201 m_outputChordnotes = index++;
|
matthiasm@86
|
202
|
Chris@23
|
203 OutputDescriptor d8;
|
mail@60
|
204 d8.identifier = "harmonicchange";
|
Chris@36
|
205 d8.name = "Harmonic Change Value";
|
matthiasm@58
|
206 d8.description = "An indication of the likelihood of harmonic change. Depends on the chord dictionary. Calculation is different depending on whether the Viterbi algorithm is used for chord estimation, or the simple chord estimate.";
|
matthiasm@17
|
207 d8.unit = "";
|
matthiasm@17
|
208 d8.hasFixedBinCount = true;
|
matthiasm@17
|
209 d8.binCount = 1;
|
mail@60
|
210 d8.hasKnownExtents = false;
|
mail@60
|
211 // d8.minValue = 0.0;
|
mail@60
|
212 // d8.maxValue = 0.999;
|
matthiasm@17
|
213 d8.isQuantized = false;
|
matthiasm@17
|
214 d8.sampleType = OutputDescriptor::FixedSampleRate;
|
matthiasm@17
|
215 d8.hasDuration = false;
|
matthiasm@17
|
216 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
|
matthiasm@17
|
217 list.push_back(d8);
|
Chris@35
|
218 m_outputHarmonicChange = index++;
|
matthiasm@1
|
219
|
matthiasm@0
|
220 return list;
|
matthiasm@0
|
221 }
|
matthiasm@0
|
222
|
matthiasm@0
|
223 bool
|
Chris@35
|
224 Chordino::initialise(size_t channels, size_t stepSize, size_t blockSize)
|
matthiasm@0
|
225 {
|
Chris@23
|
226 if (debug_on) {
|
Chris@23
|
227 cerr << "--> initialise";
|
Chris@23
|
228 }
|
mail@76
|
229
|
Chris@35
|
230 if (!NNLSBase::initialise(channels, stepSize, blockSize)) {
|
Chris@35
|
231 return false;
|
Chris@35
|
232 }
|
mail@89
|
233 m_chordnames = chordDictionary(&m_chorddict, &m_chordnotes, m_boostN);
|
matthiasm@0
|
234 return true;
|
matthiasm@0
|
235 }
|
matthiasm@0
|
236
|
matthiasm@0
|
237 void
|
Chris@35
|
238 Chordino::reset()
|
matthiasm@0
|
239 {
|
Chris@23
|
240 if (debug_on) cerr << "--> reset";
|
Chris@35
|
241 NNLSBase::reset();
|
matthiasm@0
|
242 }
|
matthiasm@0
|
243
|
Chris@35
|
244 Chordino::FeatureSet
|
Chris@35
|
245 Chordino::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
|
matthiasm@0
|
246 {
|
Chris@23
|
247 if (debug_on) cerr << "--> process" << endl;
|
matthiasm@0
|
248
|
Chris@35
|
249 NNLSBase::baseProcess(inputBuffers, timestamp);
|
matthiasm@0
|
250
|
Chris@35
|
251 return FeatureSet();
|
matthiasm@0
|
252 }
|
matthiasm@0
|
253
|
Chris@35
|
254 Chordino::FeatureSet
|
Chris@35
|
255 Chordino::getRemainingFeatures()
|
matthiasm@0
|
256 {
|
mail@89
|
257 // cerr << hw[0] << hw[1] << endl;
|
mail@89
|
258 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
|
Chris@23
|
259 FeatureSet fsOut;
|
Chris@35
|
260 if (m_logSpectrum.size() == 0) return fsOut;
|
Chris@23
|
261 int nChord = m_chordnames.size();
|
Chris@23
|
262 //
|
Chris@23
|
263 /** Calculate Tuning
|
Chris@23
|
264 calculate tuning from (using the angle of the complex number defined by the
|
Chris@23
|
265 cumulative mean real and imag values)
|
Chris@23
|
266 **/
|
mail@80
|
267 float meanTuningImag = 0;
|
mail@80
|
268 float meanTuningReal = 0;
|
mail@80
|
269 for (int iBPS = 0; iBPS < nBPS; ++iBPS) {
|
mail@80
|
270 meanTuningReal += m_meanTunings[iBPS] * cosvalues[iBPS];
|
mail@80
|
271 meanTuningImag += m_meanTunings[iBPS] * sinvalues[iBPS];
|
mail@80
|
272 }
|
Chris@23
|
273 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
|
Chris@23
|
274 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
|
Chris@23
|
275 int intShift = floor(normalisedtuning * 3);
|
mail@80
|
276 float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this
|
matthiasm@1
|
277
|
Chris@23
|
278 char buffer0 [50];
|
matthiasm@1
|
279
|
Chris@23
|
280 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
|
matthiasm@1
|
281
|
matthiasm@1
|
282
|
Chris@23
|
283 /** Tune Log-Frequency Spectrogram
|
matthiasm@43
|
284 calculate a tuned log-frequency spectrogram (currentTunedSpec): use the tuning estimated above (kinda f0) to
|
Chris@91
|
285 perform linear interpolation on the existing log-frequency spectrogram (kinda currentLogSpectrum).
|
Chris@23
|
286 **/
|
Chris@35
|
287 cerr << endl << "[Chordino Plugin] Tuning Log-Frequency Spectrogram ... ";
|
matthiasm@13
|
288
|
Chris@23
|
289 float tempValue = 0;
|
Chris@23
|
290 float dbThreshold = 0; // relative to the background spectrum
|
Chris@23
|
291 float thresh = pow(10,dbThreshold/20);
|
Chris@23
|
292 // cerr << "tune local ? " << m_tuneLocal << endl;
|
Chris@23
|
293 int count = 0;
|
matthiasm@1
|
294
|
Chris@35
|
295 FeatureList tunedSpec;
|
matthiasm@43
|
296 int nFrame = m_logSpectrum.size();
|
matthiasm@43
|
297
|
matthiasm@43
|
298 vector<Vamp::RealTime> timestamps;
|
Chris@35
|
299
|
Chris@35
|
300 for (FeatureList::iterator i = m_logSpectrum.begin(); i != m_logSpectrum.end(); ++i) {
|
Chris@91
|
301 Feature currentLogSpectrum = *i;
|
matthiasm@43
|
302 Feature currentTunedSpec; // tuned log-frequency spectrum
|
matthiasm@43
|
303 currentTunedSpec.hasTimestamp = true;
|
Chris@91
|
304 currentTunedSpec.timestamp = currentLogSpectrum.timestamp;
|
Chris@91
|
305 timestamps.push_back(currentLogSpectrum.timestamp);
|
matthiasm@43
|
306 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // set lower edge to zero
|
matthiasm@1
|
307
|
Chris@23
|
308 if (m_tuneLocal) {
|
Chris@23
|
309 intShift = floor(m_localTuning[count] * 3);
|
mail@80
|
310 floatShift = m_localTuning[count] * 3 - intShift; // floatShift is a really bad name for this
|
Chris@23
|
311 }
|
matthiasm@1
|
312
|
mail@80
|
313 // cerr << intShift << " " << floatShift << endl;
|
matthiasm@1
|
314
|
Chris@91
|
315 for (int k = 2; k < (int)currentLogSpectrum.values.size() - 3; ++k) { // interpolate all inner bins
|
Chris@91
|
316 tempValue = currentLogSpectrum.values[k + intShift] * (1-floatShift) + currentLogSpectrum.values[k+intShift+1] * floatShift;
|
matthiasm@43
|
317 currentTunedSpec.values.push_back(tempValue);
|
Chris@23
|
318 }
|
matthiasm@1
|
319
|
matthiasm@43
|
320 currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); currentTunedSpec.values.push_back(0.0); // upper edge
|
matthiasm@43
|
321 vector<float> runningmean = SpecialConvolution(currentTunedSpec.values,hw);
|
Chris@23
|
322 vector<float> runningstd;
|
mail@77
|
323 for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance)
|
matthiasm@43
|
324 runningstd.push_back((currentTunedSpec.values[i] - runningmean[i]) * (currentTunedSpec.values[i] - runningmean[i]));
|
Chris@23
|
325 }
|
Chris@23
|
326 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
|
mail@77
|
327 for (int i = 0; i < nNote; i++) {
|
Chris@23
|
328 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
|
Chris@23
|
329 if (runningstd[i] > 0) {
|
matthiasm@43
|
330 // currentTunedSpec.values[i] = (currentTunedSpec.values[i] / runningmean[i]) > thresh ?
|
matthiasm@43
|
331 // (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
matthiasm@43
|
332 currentTunedSpec.values[i] = (currentTunedSpec.values[i] - runningmean[i]) > 0 ?
|
matthiasm@43
|
333 (currentTunedSpec.values[i] - runningmean[i]) / pow(runningstd[i],m_whitening) : 0;
|
Chris@23
|
334 }
|
matthiasm@43
|
335 if (currentTunedSpec.values[i] < 0) {
|
Chris@23
|
336 cerr << "ERROR: negative value in logfreq spectrum" << endl;
|
Chris@23
|
337 }
|
Chris@23
|
338 }
|
matthiasm@43
|
339 tunedSpec.push_back(currentTunedSpec);
|
Chris@23
|
340 count++;
|
Chris@23
|
341 }
|
Chris@23
|
342 cerr << "done." << endl;
|
matthiasm@1
|
343
|
Chris@23
|
344 /** Semitone spectrum and chromagrams
|
Chris@23
|
345 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
|
Chris@23
|
346 is inferred using a non-negative least squares algorithm.
|
Chris@23
|
347 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
|
Chris@23
|
348 bass and treble stacked onto each other).
|
Chris@23
|
349 **/
|
matthiasm@42
|
350 if (m_useNNLS == 0) {
|
Chris@35
|
351 cerr << "[Chordino Plugin] Mapping to semitone spectrum and chroma ... ";
|
Chris@23
|
352 } else {
|
Chris@35
|
353 cerr << "[Chordino Plugin] Performing NNLS and mapping to chroma ... ";
|
Chris@23
|
354 }
|
matthiasm@13
|
355
|
matthiasm@1
|
356
|
matthiasm@43
|
357 vector<vector<double> > chordogram;
|
Chris@23
|
358 vector<vector<int> > scoreChordogram;
|
Chris@35
|
359 vector<float> chordchange = vector<float>(tunedSpec.size(),0);
|
Chris@23
|
360 count = 0;
|
matthiasm@9
|
361
|
Chris@35
|
362 FeatureList chromaList;
|
matthiasm@43
|
363
|
matthiasm@43
|
364
|
Chris@35
|
365
|
Chris@35
|
366 for (FeatureList::iterator it = tunedSpec.begin(); it != tunedSpec.end(); ++it) {
|
matthiasm@43
|
367 Feature currentTunedSpec = *it; // logfreq spectrum
|
matthiasm@43
|
368 Feature currentChromas; // treble and bass chromagram
|
Chris@35
|
369
|
matthiasm@43
|
370 currentChromas.hasTimestamp = true;
|
matthiasm@43
|
371 currentChromas.timestamp = currentTunedSpec.timestamp;
|
Chris@35
|
372
|
mail@77
|
373 float b[nNote];
|
matthiasm@1
|
374
|
Chris@23
|
375 bool some_b_greater_zero = false;
|
Chris@23
|
376 float sumb = 0;
|
mail@77
|
377 for (int i = 0; i < nNote; i++) {
|
mail@77
|
378 // b[i] = m_dict[(nNote * count + i) % (nNote * 84)];
|
matthiasm@43
|
379 b[i] = currentTunedSpec.values[i];
|
Chris@23
|
380 sumb += b[i];
|
Chris@23
|
381 if (b[i] > 0) {
|
Chris@23
|
382 some_b_greater_zero = true;
|
Chris@23
|
383 }
|
Chris@23
|
384 }
|
matthiasm@1
|
385
|
Chris@23
|
386 // here's where the non-negative least squares algorithm calculates the note activation x
|
matthiasm@1
|
387
|
Chris@23
|
388 vector<float> chroma = vector<float>(12, 0);
|
Chris@23
|
389 vector<float> basschroma = vector<float>(12, 0);
|
Chris@23
|
390 float currval;
|
Chris@91
|
391 int iSemitone = 0;
|
matthiasm@1
|
392
|
Chris@23
|
393 if (some_b_greater_zero) {
|
matthiasm@42
|
394 if (m_useNNLS == 0) {
|
Chris@91
|
395 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
396 currval = 0;
|
mail@81
|
397 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
398 currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1)));
|
mail@81
|
399 }
|
Chris@23
|
400 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
|
Chris@23
|
401 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
|
Chris@23
|
402 iSemitone++;
|
Chris@23
|
403 }
|
matthiasm@1
|
404
|
Chris@23
|
405 } else {
|
Chris@35
|
406 float x[84+1000];
|
Chris@23
|
407 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
|
Chris@23
|
408 vector<int> signifIndex;
|
Chris@23
|
409 int index=0;
|
Chris@23
|
410 sumb /= 84.0;
|
Chris@91
|
411 for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) {
|
Chris@23
|
412 float currval = 0;
|
mail@81
|
413 for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) {
|
mail@81
|
414 currval += b[iNote + iBPS];
|
mail@81
|
415 }
|
Chris@23
|
416 if (currval > 0) signifIndex.push_back(index);
|
Chris@23
|
417 index++;
|
Chris@23
|
418 }
|
Chris@35
|
419 float rnorm;
|
Chris@35
|
420 float w[84+1000];
|
Chris@35
|
421 float zz[84+1000];
|
Chris@23
|
422 int indx[84+1000];
|
Chris@23
|
423 int mode;
|
mail@77
|
424 int dictsize = nNote*signifIndex.size();
|
mail@81
|
425 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
|
Chris@35
|
426 float *curr_dict = new float[dictsize];
|
Chris@91
|
427 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@91
|
428 for (int iBin = 0; iBin < nNote; iBin++) {
|
mail@77
|
429 curr_dict[iNote * nNote + iBin] = 1.0 * m_dict[signifIndex[iNote] * nNote + iBin];
|
Chris@23
|
430 }
|
Chris@23
|
431 }
|
Chris@35
|
432 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
|
Chris@23
|
433 delete [] curr_dict;
|
Chris@91
|
434 for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) {
|
Chris@23
|
435 // cerr << mode << endl;
|
Chris@23
|
436 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
|
Chris@23
|
437 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
|
Chris@23
|
438 }
|
Chris@23
|
439 }
|
Chris@23
|
440 }
|
Chris@35
|
441
|
Chris@35
|
442 vector<float> origchroma = chroma;
|
Chris@23
|
443 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
|
matthiasm@43
|
444 currentChromas.values = chroma;
|
Chris@35
|
445
|
Chris@23
|
446 if (m_doNormalizeChroma > 0) {
|
Chris@23
|
447 vector<float> chromanorm = vector<float>(3,0);
|
Chris@23
|
448 switch (int(m_doNormalizeChroma)) {
|
Chris@23
|
449 case 0: // should never end up here
|
Chris@23
|
450 break;
|
Chris@23
|
451 case 1:
|
Chris@35
|
452 chromanorm[0] = *max_element(origchroma.begin(), origchroma.end());
|
Chris@35
|
453 chromanorm[1] = *max_element(basschroma.begin(), basschroma.end());
|
Chris@23
|
454 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
|
Chris@23
|
455 break;
|
Chris@23
|
456 case 2:
|
Chris@35
|
457 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
458 chromanorm[2] += *it;
|
Chris@23
|
459 }
|
Chris@23
|
460 break;
|
Chris@23
|
461 case 3:
|
Chris@35
|
462 for (vector<float>::iterator it = chroma.begin(); it != chroma.end(); ++it) {
|
Chris@23
|
463 chromanorm[2] += pow(*it,2);
|
Chris@23
|
464 }
|
Chris@23
|
465 chromanorm[2] = sqrt(chromanorm[2]);
|
Chris@23
|
466 break;
|
Chris@23
|
467 }
|
Chris@23
|
468 if (chromanorm[2] > 0) {
|
Chris@91
|
469 for (int i = 0; i < (int)chroma.size(); i++) {
|
matthiasm@43
|
470 currentChromas.values[i] /= chromanorm[2];
|
Chris@23
|
471 }
|
Chris@23
|
472 }
|
Chris@23
|
473 }
|
Chris@35
|
474
|
matthiasm@43
|
475 chromaList.push_back(currentChromas);
|
Chris@35
|
476
|
Chris@23
|
477 // local chord estimation
|
matthiasm@43
|
478 vector<double> currentChordSalience;
|
matthiasm@43
|
479 double tempchordvalue = 0;
|
matthiasm@43
|
480 double sumchordvalue = 0;
|
matthiasm@9
|
481
|
Chris@23
|
482 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
483 tempchordvalue = 0;
|
Chris@23
|
484 for (int iBin = 0; iBin < 12; iBin++) {
|
matthiasm@44
|
485 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
486 }
|
Chris@23
|
487 for (int iBin = 12; iBin < 24; iBin++) {
|
Chris@23
|
488 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
|
Chris@23
|
489 }
|
matthiasm@48
|
490 if (iChord == nChord-1) tempchordvalue *= .7;
|
matthiasm@48
|
491 if (tempchordvalue < 0) tempchordvalue = 0.0;
|
matthiasm@50
|
492 tempchordvalue = pow(1.3,tempchordvalue);
|
Chris@23
|
493 sumchordvalue+=tempchordvalue;
|
Chris@23
|
494 currentChordSalience.push_back(tempchordvalue);
|
Chris@23
|
495 }
|
Chris@23
|
496 if (sumchordvalue > 0) {
|
Chris@23
|
497 for (int iChord = 0; iChord < nChord; iChord++) {
|
Chris@23
|
498 currentChordSalience[iChord] /= sumchordvalue;
|
Chris@23
|
499 }
|
Chris@23
|
500 } else {
|
Chris@23
|
501 currentChordSalience[nChord-1] = 1.0;
|
Chris@23
|
502 }
|
Chris@23
|
503 chordogram.push_back(currentChordSalience);
|
matthiasm@1
|
504
|
Chris@23
|
505 count++;
|
Chris@23
|
506 }
|
Chris@23
|
507 cerr << "done." << endl;
|
matthiasm@13
|
508
|
matthiasm@86
|
509 vector<Feature> oldnotes;
|
matthiasm@10
|
510
|
matthiasm@50
|
511 // bool m_useHMM = true; // this will go into the chordino header file.
|
matthiasm@50
|
512 if (m_useHMM == 1.0) {
|
matthiasm@44
|
513 cerr << "[Chordino Plugin] HMM Chord Estimation ... ";
|
matthiasm@43
|
514 int oldchord = nChord-1;
|
matthiasm@48
|
515 double selftransprob = 0.99;
|
matthiasm@43
|
516
|
matthiasm@48
|
517 // vector<double> init = vector<double>(nChord,1.0/nChord);
|
matthiasm@48
|
518 vector<double> init = vector<double>(nChord,0); init[nChord-1] = 1;
|
matthiasm@48
|
519
|
matthiasm@50
|
520 double *delta;
|
matthiasm@50
|
521 delta = (double *)malloc(sizeof(double)*nFrame*nChord);
|
matthiasm@50
|
522
|
matthiasm@43
|
523 vector<vector<double> > trans;
|
matthiasm@43
|
524 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
525 vector<double> temp = vector<double>(nChord,(1-selftransprob)/(nChord-1));
|
matthiasm@43
|
526 temp[iChord] = selftransprob;
|
matthiasm@43
|
527 trans.push_back(temp);
|
matthiasm@43
|
528 }
|
matthiasm@50
|
529 vector<int> chordpath = ViterbiPath(init, trans, chordogram, delta);
|
matthiasm@48
|
530
|
matthiasm@48
|
531
|
matthiasm@48
|
532 Feature chord_feature; // chord estimate
|
matthiasm@48
|
533 chord_feature.hasTimestamp = true;
|
matthiasm@48
|
534 chord_feature.timestamp = timestamps[0];
|
matthiasm@48
|
535 chord_feature.label = m_chordnames[chordpath[0]];
|
mail@60
|
536 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
537
|
mail@60
|
538 chordchange[0] = 0;
|
Chris@91
|
539 for (int iFrame = 1; iFrame < (int)chordpath.size(); ++iFrame) {
|
matthiasm@43
|
540 // cerr << chordpath[iFrame] << endl;
|
matthiasm@48
|
541 if (chordpath[iFrame] != oldchord ) {
|
matthiasm@86
|
542 // chord
|
matthiasm@43
|
543 Feature chord_feature; // chord estimate
|
matthiasm@43
|
544 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
545 chord_feature.timestamp = timestamps[iFrame];
|
matthiasm@43
|
546 chord_feature.label = m_chordnames[chordpath[iFrame]];
|
mail@60
|
547 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@43
|
548 oldchord = chordpath[iFrame];
|
matthiasm@86
|
549 // chord notes
|
Chris@91
|
550 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
551 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[iFrame];
|
matthiasm@86
|
552 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
553 }
|
matthiasm@86
|
554 oldnotes.clear();
|
Chris@91
|
555 for (int iNote = 0; iNote < (int)m_chordnotes[chordpath[iFrame]].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
556 Feature chordnote_feature;
|
matthiasm@86
|
557 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
558 chordnote_feature.timestamp = timestamps[iFrame];
|
matthiasm@86
|
559 chordnote_feature.values.push_back(m_chordnotes[chordpath[iFrame]][iNote]);
|
matthiasm@86
|
560 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
561 chordnote_feature.duration = -timestamps[iFrame]; // this will be corrected at the next chord
|
matthiasm@86
|
562 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
563 }
|
Chris@23
|
564 }
|
matthiasm@50
|
565 /* calculating simple chord change prob */
|
matthiasm@50
|
566 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@50
|
567 chordchange[iFrame-1] += delta[(iFrame-1)*nChord + iChord] * log(delta[(iFrame-1)*nChord + iChord]/delta[iFrame*nChord + iChord]);
|
matthiasm@50
|
568 }
|
Chris@23
|
569 }
|
matthiasm@43
|
570
|
matthiasm@43
|
571 // cerr << chordpath[0] << endl;
|
matthiasm@43
|
572 } else {
|
matthiasm@43
|
573 /* Simple chord estimation
|
matthiasm@43
|
574 I just take the local chord estimates ("currentChordSalience") and average them over time, then
|
matthiasm@43
|
575 take the maximum. Very simple, don't do this at home...
|
matthiasm@43
|
576 */
|
matthiasm@44
|
577 cerr << "[Chordino Plugin] Simple Chord Estimation ... ";
|
matthiasm@43
|
578 count = 0;
|
matthiasm@43
|
579 int halfwindowlength = m_inputSampleRate / m_stepSize;
|
matthiasm@43
|
580 vector<int> chordSequence;
|
matthiasm@43
|
581 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) { // initialise the score chordogram
|
matthiasm@43
|
582 vector<int> temp = vector<int>(nChord,0);
|
matthiasm@43
|
583 scoreChordogram.push_back(temp);
|
matthiasm@43
|
584 }
|
matthiasm@43
|
585 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it < timestamps.end()-2*halfwindowlength-1; ++it) {
|
matthiasm@43
|
586 int startIndex = count + 1;
|
matthiasm@43
|
587 int endIndex = count + 2 * halfwindowlength;
|
matthiasm@43
|
588
|
matthiasm@43
|
589 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
|
matthiasm@43
|
590
|
matthiasm@43
|
591 vector<int> chordCandidates;
|
Chris@91
|
592 for (int iChord = 0; iChord+1 < nChord; iChord++) {
|
matthiasm@43
|
593 // float currsum = 0;
|
Chris@91
|
594 // for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
595 // currsum += chordogram[iFrame][iChord];
|
matthiasm@43
|
596 // }
|
matthiasm@43
|
597 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
|
Chris@91
|
598 for (int iFrame = startIndex; iFrame < endIndex; ++iFrame) {
|
matthiasm@43
|
599 if (chordogram[iFrame][iChord] > chordThreshold) {
|
matthiasm@43
|
600 chordCandidates.push_back(iChord);
|
matthiasm@43
|
601 break;
|
matthiasm@43
|
602 }
|
Chris@23
|
603 }
|
Chris@23
|
604 }
|
matthiasm@43
|
605 chordCandidates.push_back(nChord-1);
|
matthiasm@43
|
606 // cerr << chordCandidates.size() << endl;
|
matthiasm@43
|
607
|
matthiasm@43
|
608 float maxval = 0; // will be the value of the most salient *chord change* in this frame
|
matthiasm@43
|
609 float maxindex = 0; //... and the index thereof
|
Chris@91
|
610 int bestchordL = nChord-1; // index of the best "left" chord
|
Chris@91
|
611 int bestchordR = nChord-1; // index of the best "right" chord
|
matthiasm@43
|
612
|
matthiasm@43
|
613 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
|
matthiasm@43
|
614 // now find the max values on both sides of iWF
|
matthiasm@43
|
615 // left side:
|
matthiasm@43
|
616 float maxL = 0;
|
Chris@91
|
617 int maxindL = nChord-1;
|
Chris@91
|
618 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
619 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
620 float currsum = 0;
|
Chris@91
|
621 for (int iFrame = 0; iFrame < iWF-1; ++iFrame) {
|
matthiasm@43
|
622 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
623 }
|
matthiasm@43
|
624 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
625 if (currsum > maxL) {
|
matthiasm@43
|
626 maxL = currsum;
|
matthiasm@43
|
627 maxindL = iChord;
|
matthiasm@43
|
628 }
|
matthiasm@43
|
629 }
|
matthiasm@43
|
630 // right side:
|
matthiasm@43
|
631 float maxR = 0;
|
Chris@91
|
632 int maxindR = nChord-1;
|
Chris@91
|
633 for (int kChord = 0; kChord < (int)chordCandidates.size(); kChord++) {
|
Chris@91
|
634 int iChord = chordCandidates[kChord];
|
matthiasm@43
|
635 float currsum = 0;
|
Chris@91
|
636 for (int iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
637 currsum += chordogram[count+iFrame][iChord];
|
matthiasm@43
|
638 }
|
matthiasm@43
|
639 if (iChord == nChord-1) currsum *= 0.8;
|
matthiasm@43
|
640 if (currsum > maxR) {
|
matthiasm@43
|
641 maxR = currsum;
|
matthiasm@43
|
642 maxindR = iChord;
|
matthiasm@43
|
643 }
|
matthiasm@43
|
644 }
|
matthiasm@43
|
645 if (maxL+maxR > maxval) {
|
matthiasm@43
|
646 maxval = maxL+maxR;
|
matthiasm@43
|
647 maxindex = iWF;
|
matthiasm@43
|
648 bestchordL = maxindL;
|
matthiasm@43
|
649 bestchordR = maxindR;
|
matthiasm@43
|
650 }
|
matthiasm@43
|
651
|
Chris@23
|
652 }
|
matthiasm@43
|
653 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
|
matthiasm@43
|
654 // add a score to every chord-frame-point that was part of a maximum
|
Chris@91
|
655 for (int iFrame = 0; iFrame < maxindex-1; ++iFrame) {
|
matthiasm@43
|
656 scoreChordogram[iFrame+count][bestchordL]++;
|
matthiasm@43
|
657 }
|
Chris@91
|
658 for (int iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
|
matthiasm@43
|
659 scoreChordogram[iFrame+count][bestchordR]++;
|
matthiasm@43
|
660 }
|
matthiasm@50
|
661 if (bestchordL != bestchordR) {
|
matthiasm@50
|
662 chordchange[maxindex+count] += (halfwindowlength - abs(maxindex-halfwindowlength)) * 2.0 / halfwindowlength;
|
matthiasm@50
|
663 }
|
matthiasm@43
|
664 count++;
|
Chris@23
|
665 }
|
matthiasm@43
|
666 // cerr << "******* agent finished *******" << endl;
|
matthiasm@43
|
667 count = 0;
|
matthiasm@43
|
668 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
669 float maxval = 0; // will be the value of the most salient chord in this frame
|
matthiasm@43
|
670 float maxindex = 0; //... and the index thereof
|
Chris@91
|
671 for (int iChord = 0; iChord < nChord; iChord++) {
|
matthiasm@43
|
672 if (scoreChordogram[count][iChord] > maxval) {
|
matthiasm@43
|
673 maxval = scoreChordogram[count][iChord];
|
matthiasm@43
|
674 maxindex = iChord;
|
matthiasm@43
|
675 // cerr << iChord << endl;
|
matthiasm@43
|
676 }
|
matthiasm@43
|
677 }
|
matthiasm@43
|
678 chordSequence.push_back(maxindex);
|
matthiasm@43
|
679 count++;
|
Chris@23
|
680 }
|
matthiasm@43
|
681
|
matthiasm@43
|
682
|
matthiasm@43
|
683 // mode filter on chordSequence
|
matthiasm@43
|
684 count = 0;
|
matthiasm@43
|
685 string oldChord = "";
|
matthiasm@43
|
686 for (vector<Vamp::RealTime>::iterator it = timestamps.begin(); it != timestamps.end(); ++it) {
|
matthiasm@43
|
687 Feature chord_feature; // chord estimate
|
matthiasm@43
|
688 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
689 chord_feature.timestamp = *it;
|
matthiasm@43
|
690 // Feature currentChord; // chord estimate
|
matthiasm@43
|
691 // currentChord.hasTimestamp = true;
|
matthiasm@43
|
692 // currentChord.timestamp = currentChromas.timestamp;
|
matthiasm@43
|
693
|
matthiasm@43
|
694 vector<int> chordCount = vector<int>(nChord,0);
|
matthiasm@43
|
695 int maxChordCount = 0;
|
matthiasm@43
|
696 int maxChordIndex = nChord-1;
|
matthiasm@43
|
697 string maxChord;
|
matthiasm@43
|
698 int startIndex = max(count - halfwindowlength/2,0);
|
matthiasm@43
|
699 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
|
matthiasm@43
|
700 for (int i = startIndex; i < endIndex; i++) {
|
matthiasm@43
|
701 chordCount[chordSequence[i]]++;
|
matthiasm@43
|
702 if (chordCount[chordSequence[i]] > maxChordCount) {
|
matthiasm@43
|
703 // cerr << "start index " << startIndex << endl;
|
matthiasm@43
|
704 maxChordCount++;
|
matthiasm@43
|
705 maxChordIndex = chordSequence[i];
|
matthiasm@43
|
706 maxChord = m_chordnames[maxChordIndex];
|
matthiasm@43
|
707 }
|
matthiasm@43
|
708 }
|
matthiasm@43
|
709 // chordSequence[count] = maxChordIndex;
|
matthiasm@43
|
710 // cerr << maxChordIndex << endl;
|
matthiasm@50
|
711 // cerr << chordchange[count] << endl;
|
matthiasm@43
|
712 if (oldChord != maxChord) {
|
matthiasm@43
|
713 oldChord = maxChord;
|
matthiasm@43
|
714 chord_feature.label = m_chordnames[maxChordIndex];
|
mail@60
|
715 fsOut[m_outputChords].push_back(chord_feature);
|
Chris@91
|
716 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
717 oldnotes[iNote].duration = oldnotes[iNote].duration + chord_feature.timestamp;
|
matthiasm@86
|
718 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
719 }
|
matthiasm@86
|
720 oldnotes.clear();
|
Chris@91
|
721 for (int iNote = 0; iNote < (int)m_chordnotes[maxChordIndex].size(); ++iNote) { // prepare notes of current chord
|
matthiasm@86
|
722 Feature chordnote_feature;
|
matthiasm@86
|
723 chordnote_feature.hasTimestamp = true;
|
matthiasm@86
|
724 chordnote_feature.timestamp = chord_feature.timestamp;
|
matthiasm@86
|
725 chordnote_feature.values.push_back(m_chordnotes[maxChordIndex][iNote]);
|
matthiasm@86
|
726 chordnote_feature.hasDuration = true;
|
matthiasm@86
|
727 chordnote_feature.duration = -chord_feature.timestamp; // this will be corrected at the next chord
|
matthiasm@86
|
728 oldnotes.push_back(chordnote_feature);
|
matthiasm@86
|
729 }
|
matthiasm@43
|
730 }
|
matthiasm@43
|
731 count++;
|
Chris@23
|
732 }
|
Chris@23
|
733 }
|
matthiasm@43
|
734 Feature chord_feature; // last chord estimate
|
matthiasm@43
|
735 chord_feature.hasTimestamp = true;
|
matthiasm@43
|
736 chord_feature.timestamp = timestamps[timestamps.size()-1];
|
matthiasm@43
|
737 chord_feature.label = "N";
|
mail@60
|
738 fsOut[m_outputChords].push_back(chord_feature);
|
matthiasm@86
|
739
|
Chris@91
|
740 for (int iNote = 0; iNote < (int)oldnotes.size(); ++iNote) { // finish duration of old chord
|
matthiasm@86
|
741 oldnotes[iNote].duration = oldnotes[iNote].duration + timestamps[timestamps.size()-1];
|
matthiasm@86
|
742 fsOut[m_outputChordnotes].push_back(oldnotes[iNote]);
|
matthiasm@86
|
743 }
|
matthiasm@86
|
744
|
Chris@23
|
745 cerr << "done." << endl;
|
matthiasm@50
|
746
|
matthiasm@50
|
747 for (int iFrame = 0; iFrame < nFrame; iFrame++) {
|
matthiasm@50
|
748 Feature chordchange_feature;
|
matthiasm@50
|
749 chordchange_feature.hasTimestamp = true;
|
matthiasm@50
|
750 chordchange_feature.timestamp = timestamps[iFrame];
|
matthiasm@50
|
751 chordchange_feature.values.push_back(chordchange[iFrame]);
|
mail@60
|
752 // cerr << chordchange[iFrame] << endl;
|
mail@60
|
753 fsOut[m_outputHarmonicChange].push_back(chordchange_feature);
|
matthiasm@50
|
754 }
|
matthiasm@50
|
755
|
mail@60
|
756 // for (int iFrame = 0; iFrame < nFrame; iFrame++) cerr << fsOut[m_outputHarmonicChange][iFrame].values[0] << endl;
|
matthiasm@50
|
757
|
matthiasm@50
|
758
|
Chris@23
|
759 return fsOut;
|
matthiasm@0
|
760 }
|